Move C runtime source directories into runtime/src/iree. (#8950)
* Pries apart the global include directory situation on both the Bazel and CMake side. On Bazel, we use a new iree_runtime_cc_(library|binary) macro that adds an implicit dep for include propagation and (in the future) can set copts.
* On the CMake side, we use a path-based implicit dep to similar effect. I tried a couple of other ways and this was the least intrusive.
* Reworks bazel_to_cmake target rewriting to account for the new split root.
* Removes the CMake DATA include::this:file.png style of data includes (used in one place) in favor of a path, since package names are no longer reversible to a location. This seems to be the only place we made that assumption.
* Will do a couple more followups to completely retire the iree/iree directory (in favor of top-level compiler/ and tools/ directories).
Progress on #8955
diff --git a/runtime/BUILD.bazel b/runtime/BUILD.bazel
new file mode 100644
index 0000000..9da145a
--- /dev/null
+++ b/runtime/BUILD.bazel
@@ -0,0 +1,13 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+exports_files(["lit.cfg.py"])
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index 8a3ce7a..de8151c 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -4,6 +4,8 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+add_subdirectory(src)
+
if(IREE_BUILD_PYTHON_BINDINGS)
# Copy Python packaging files to the build dir so that we can install from
# there.
diff --git a/runtime/lit.cfg.py b/runtime/lit.cfg.py
new file mode 100644
index 0000000..77a0498
--- /dev/null
+++ b/runtime/lit.cfg.py
@@ -0,0 +1,32 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""Lit config for IREE."""
+
+# Lint for undefined variables is disabled as config is not defined inside this
+# file, instead config is injected by way of evaluating runlit.cfg.py from
+# runlit.site.cfg.py which in turn is evaluated by lit.py.
+# pylint: disable=undefined-variable
+
+import os
+import tempfile
+
+import lit.formats
+
+config.name = "IREE"
+config.suffixes = [".mlir", ".txt"]
+config.test_format = lit.formats.ShTest(execute_external=True)
+# Forward all IREE environment variables
+passthrough_env_vars = ["VK_ICD_FILENAMES"]
+config.environment.update({
+ k: v
+ for k, v in os.environ.items()
+ if k.startswith("IREE_") or k in passthrough_env_vars
+})
+
+# Use the most preferred temp directory.
+config.test_exec_root = (os.environ.get("TEST_UNDECLARED_OUTPUTS_DIR") or
+ os.environ.get("TEST_TMPDIR") or
+ os.path.join(tempfile.gettempdir(), "lit"))
diff --git a/runtime/src/BUILD b/runtime/src/BUILD
new file mode 100644
index 0000000..ad62c80
--- /dev/null
+++ b/runtime/src/BUILD
@@ -0,0 +1,18 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+cc_library(
+ name = "runtime_defines",
+ includes = [
+ ".",
+ ],
+)
diff --git a/runtime/src/CMakeLists.txt b/runtime/src/CMakeLists.txt
new file mode 100644
index 0000000..e19c964
--- /dev/null
+++ b/runtime/src/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Configures all iree_cc_* targets to take this implicit dep,
+# which provides common includes and copts for the tree.
+set(IREE_IMPLICIT_DEFS_CC_DEPS iree_defs_runtime)
+
+add_library(iree_defs_runtime INTERFACE)
+target_include_directories(
+ iree_defs_runtime INTERFACE
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+)
+
+add_subdirectory(iree)
diff --git a/runtime/src/iree/CMakeLists.txt b/runtime/src/iree/CMakeLists.txt
new file mode 100644
index 0000000..3b1f024
--- /dev/null
+++ b/runtime/src/iree/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+add_subdirectory(base)
+add_subdirectory(builtins)
+add_subdirectory(hal)
+add_subdirectory(modules)
+add_subdirectory(runtime)
+add_subdirectory(schemas)
+add_subdirectory(task)
+add_subdirectory(testing)
+add_subdirectory(vm)
diff --git a/runtime/src/iree/base/BUILD b/runtime/src/iree/base/BUILD
new file mode 100644
index 0000000..b85f964
--- /dev/null
+++ b/runtime/src/iree/base/BUILD
@@ -0,0 +1,211 @@
+# Copyright 2019 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Common types and utilities used in the IREE codebase.
+
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library", "iree_runtime_cc_test")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+#===------------------------------------------------------------------------===#
+# Public API
+#===------------------------------------------------------------------------===#
+
+iree_runtime_cc_library(
+ name = "base",
+ srcs = [
+ "allocator.c",
+ "allocator.h",
+ "api.c",
+ "assert.h",
+ "bitfield.c",
+ "bitfield.h",
+ "loop.c",
+ "loop.h",
+ "loop_inline.c",
+ "loop_inline.h",
+ "status.c",
+ "status.h",
+ "string_builder.c",
+ "string_builder.h",
+ "string_view.c",
+ "string_view.h",
+ "time.c",
+ "time.h",
+ "wait_source.c",
+ "wait_source.h",
+ ],
+ hdrs = ["api.h"],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":core_headers",
+ ":tracing",
+ ],
+)
+
+# TODO(benvanik): make these srcs and only expose an api_cc.h.
+iree_runtime_cc_library(
+ name = "cc",
+ srcs = [
+ "status_cc.cc",
+ ],
+ hdrs = [
+ "status_cc.h",
+ ],
+ deps = [
+ ":base",
+ ":core_headers",
+ ":logging",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "bitfield_test",
+ srcs = ["bitfield_test.cc"],
+ deps = [
+ ":base",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "loop_inline_test",
+ srcs = [
+ "loop_inline_test.cc",
+ ],
+ deps = [
+ ":base",
+ ":cc",
+ ":loop_test_hdrs",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "loop_test_hdrs",
+ testonly = 1,
+ hdrs = [
+ "loop_test.h",
+ ],
+ deps = [
+ ":base",
+ ":tracing",
+ "//runtime/src/iree/base/internal:wait_handle",
+ "//runtime/src/iree/testing:gtest",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "status_test",
+ srcs = ["status_test.cc"],
+ deps = [
+ ":base",
+ ":cc",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "string_builder_test",
+ srcs = ["string_builder_test.cc"],
+ deps = [
+ ":base",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "string_view_test",
+ srcs = ["string_view_test.cc"],
+ deps = [
+ ":base",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+#===------------------------------------------------------------------------===#
+# Core headers (platform detection, compiler compat, etc)
+#===------------------------------------------------------------------------===#
+
+iree_runtime_cc_library(
+ name = "core_headers",
+ hdrs = [
+ "alignment.h",
+ "attributes.h",
+ "config.h",
+ "target_platform.h",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "target_platform",
+ hdrs = ["target_platform.h"],
+)
+
+#===------------------------------------------------------------------------===#
+# Internal IREE C++ wrappers and utilities
+#===------------------------------------------------------------------------===#
+
+iree_runtime_cc_library(
+ name = "logging",
+ srcs = ["logging.cc"],
+ hdrs = ["logging.h"],
+ linkopts = select({
+ "//iree:iree_is_android": [
+ "-llog",
+ ],
+ "//conditions:default": [],
+ }),
+ deps = [
+ ":core_headers",
+ ":tracing",
+ "//runtime/src/iree/base/internal:flags",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "loop_sync",
+ srcs = ["loop_sync.c"],
+ hdrs = ["loop_sync.h"],
+ deps = [
+ ":base",
+ ":tracing",
+ "//runtime/src/iree/base/internal",
+ "//runtime/src/iree/base/internal:wait_handle",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "loop_sync_test",
+ srcs = [
+ "loop_sync_test.cc",
+ ],
+ deps = [
+ ":base",
+ ":cc",
+ ":loop_sync",
+ ":loop_test_hdrs",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "tracing",
+ hdrs = ["tracing.h"],
+ deps = [
+ ":core_headers",
+ ],
+)
diff --git a/runtime/src/iree/base/CMakeLists.txt b/runtime/src/iree/base/CMakeLists.txt
new file mode 100644
index 0000000..1121f63
--- /dev/null
+++ b/runtime/src/iree/base/CMakeLists.txt
@@ -0,0 +1,223 @@
+# Copyright 2019 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ base
+ HDRS
+ "api.h"
+ SRCS
+ "allocator.c"
+ "allocator.h"
+ "api.c"
+ "assert.h"
+ "bitfield.c"
+ "bitfield.h"
+ "loop.c"
+ "loop.h"
+ "loop_inline.c"
+ "loop_inline.h"
+ "status.c"
+ "status.h"
+ "string_builder.c"
+ "string_builder.h"
+ "string_view.c"
+ "string_view.h"
+ "time.c"
+ "time.h"
+ "wait_source.c"
+ "wait_source.h"
+ DEPS
+ ::core_headers
+ ::tracing
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ core_headers
+ HDRS
+ "alignment.h"
+ "attributes.h"
+ "config.h"
+ "target_platform.h"
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ cc
+ HDRS
+ "status_cc.h"
+ SRCS
+ "status_cc.cc"
+ DEPS
+ ::base
+ ::core_headers
+ ::logging
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ bitfield_test
+ SRCS
+ "bitfield_test.cc"
+ DEPS
+ ::base
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_test(
+ NAME
+ loop_inline_test
+ SRCS
+ "loop_inline_test.cc"
+ DEPS
+ ::base
+ ::cc
+ ::loop_test_hdrs
+ ::tracing
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_library(
+ NAME
+ loop_test_hdrs
+ HDRS
+ "loop_test.h"
+ DEPS
+ ::base
+ ::cc
+ ::tracing
+ iree::base::internal::wait_handle
+ iree::testing::gtest
+ TESTONLY
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ status_test
+ SRCS
+ "status_test.cc"
+ DEPS
+ ::base
+ ::cc
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_test(
+ NAME
+ string_builder_test
+ SRCS
+ "string_builder_test.cc"
+ DEPS
+ ::base
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_test(
+ NAME
+ string_view_test
+ SRCS
+ "string_view_test.cc"
+ DEPS
+ ::base
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_library(
+ NAME
+ target_platform
+ HDRS
+ "target_platform.h"
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ logging
+ HDRS
+ "logging.h"
+ SRCS
+ "logging.cc"
+ DEPS
+ ::core_headers
+ ::tracing
+ iree::base::internal::flags
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ loop_sync
+ HDRS
+ "loop_sync.h"
+ SRCS
+ "loop_sync.c"
+ DEPS
+ ::base
+ ::tracing
+ iree::base::internal
+ iree::base::internal::wait_handle
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ loop_sync_test
+ SRCS
+ "loop_sync_test.cc"
+ DEPS
+ ::base
+ ::cc
+ ::loop_sync
+ ::loop_test_hdrs
+ ::tracing
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+# TODO(benvanik): evaluate if we want this as part of the API. Could restrict it
+# to excusively static linkage scenarios and note that it's unstable. It's just
+# really really useful and the only way for applications to interleave with our
+# tracing (today).
+if(${IREE_ENABLE_RUNTIME_TRACING})
+ iree_cc_library(
+ NAME
+ tracing
+ HDRS
+ "tracing.h"
+ "${IREE_ROOT_DIR}/third_party/tracy/Tracy.hpp"
+ "${IREE_ROOT_DIR}/third_party/tracy/TracyC.h"
+ SRCS
+ "tracing.cc"
+ DEPS
+ ${CMAKE_DL_LIBS}
+ ::core_headers
+ DEFINES
+ "IREE_TRACING_MODE=2"
+ PUBLIC
+ )
+else()
+ iree_cc_library(
+ NAME
+ tracing
+ HDRS
+ "tracing.h"
+ DEPS
+ ::core_headers
+ PUBLIC
+ )
+endif()
diff --git a/runtime/src/iree/base/alignment.h b/runtime/src/iree/base/alignment.h
new file mode 100644
index 0000000..1fd0356
--- /dev/null
+++ b/runtime/src/iree/base/alignment.h
@@ -0,0 +1,249 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Implementation of the primitives from stdalign.h used for cross-target
+// value alignment specification and queries.
+
+#ifndef IREE_BASE_ALIGNMENT_H_
+#define IREE_BASE_ALIGNMENT_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/config.h"
+#include "iree/base/target_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//===----------------------------------------------------------------------===//
+// Alignment utilities
+//===----------------------------------------------------------------------===//
+
+// https://en.cppreference.com/w/c/types/max_align_t
+#if defined(IREE_PLATFORM_WINDOWS)
+// NOTE: 16 is a specified Microsoft API requirement for some functions.
+#define iree_max_align_t 16
+#else
+#define iree_max_align_t sizeof(long double)
+#endif // IREE_PLATFORM_*
+
+// https://en.cppreference.com/w/c/language/_Alignas
+// https://en.cppreference.com/w/c/language/_Alignof
+#if defined(IREE_COMPILER_MSVC)
+#define iree_alignas(x) __declspec(align(x))
+#define iree_alignof(x) __alignof(x)
+#else
+#define iree_alignas(x) __attribute__((__aligned__(x)))
+#define iree_alignof(x) __alignof__(x)
+#endif // IREE_COMPILER_*
+
+// Aligns |value| up to the given power-of-two |alignment| if required.
+// https://en.wikipedia.org/wiki/Data_structure_alignment#Computing_padding
+static inline iree_host_size_t iree_host_align(iree_host_size_t value,
+ iree_host_size_t alignment) {
+ return (value + (alignment - 1)) & ~(alignment - 1);
+}
+
+// Returns true if |value| matches the given minimum |alignment|.
+static inline bool iree_host_size_has_alignment(iree_host_size_t value,
+ iree_host_size_t alignment) {
+ return iree_host_align(value, alignment) == value;
+}
+
+// Aligns |value| up to the given power-of-two |alignment| if required.
+// https://en.wikipedia.org/wiki/Data_structure_alignment#Computing_padding
+static inline iree_device_size_t iree_device_align(
+ iree_device_size_t value, iree_device_size_t alignment) {
+ return (value + (alignment - 1)) & ~(alignment - 1);
+}
+
+// Returns true if |value| matches the given minimum |alignment|.
+static inline bool iree_device_size_has_alignment(
+ iree_device_size_t value, iree_device_size_t alignment) {
+ return iree_device_align(value, alignment) == value;
+}
+
+// Returns the size of a struct padded out to iree_max_align_t.
+// This must be used when performing manual trailing allocation packing to
+// ensure the alignment requirements of the trailing data are satisfied.
+//
+// NOTE: do not use this if using VLAs (`struct { int trailing[]; }`) - those
+// must precisely follow the normal sizeof(t) as the compiler does the padding
+// for you.
+//
+// Example:
+// some_buffer_ptr_t* p = NULL;
+// iree_host_size_t total_size = iree_sizeof_struct(*buffer) + extra_data_size;
+// IREE_CHECK_OK(iree_allocator_malloc(allocator, total_size, (void**)&p));
+#define iree_sizeof_struct(t) iree_host_align(sizeof(t), iree_max_align_t)
+
+//===----------------------------------------------------------------------===//
+// Alignment-safe memory accesses
+//===----------------------------------------------------------------------===//
+
+// Map little-endian byte indices in memory to the host memory order indices.
+#if defined(IREE_ENDIANNESS_LITTLE)
+#define IREE_LE_IDX_1(i) (i)
+#define IREE_LE_IDX_2(i) (i)
+#define IREE_LE_IDX_4(i) (i)
+#define IREE_LE_IDX_8(i) (i)
+#else
+#define IREE_LE_IDX_1(i) (i)
+#define IREE_LE_IDX_2(i) (1 - (i))
+#define IREE_LE_IDX_4(i) (3 - (i))
+#define IREE_LE_IDX_8(i) (7 - (i))
+#endif // IREE_ENDIANNESS_*
+
+#if IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED
+
+static inline uint8_t iree_unaligned_load_le_u8(const uint8_t* ptr) {
+ return *ptr;
+}
+static inline uint16_t iree_unaligned_load_le_u16(const uint16_t* ptr) {
+ const uint8_t* p = (const uint8_t*)ptr;
+ return ((uint16_t)p[IREE_LE_IDX_2(0)]) | ((uint16_t)p[IREE_LE_IDX_2(1)] << 8);
+}
+static inline uint32_t iree_unaligned_load_le_u32(const uint32_t* ptr) {
+ const uint8_t* p = (const uint8_t*)ptr;
+ return ((uint32_t)p[IREE_LE_IDX_4(0)]) |
+ ((uint32_t)p[IREE_LE_IDX_4(1)] << 8) |
+ ((uint32_t)p[IREE_LE_IDX_4(2)] << 16) |
+ ((uint32_t)p[IREE_LE_IDX_4(3)] << 24);
+}
+static inline uint64_t iree_unaligned_load_le_u64(const uint64_t* ptr) {
+ const uint8_t* p = (const uint8_t*)ptr;
+ return ((uint64_t)p[IREE_LE_IDX_8(0)]) |
+ ((uint64_t)p[IREE_LE_IDX_8(1)] << 8) |
+ ((uint64_t)p[IREE_LE_IDX_8(2)] << 16) |
+ ((uint64_t)p[IREE_LE_IDX_8(3)] << 24) |
+ ((uint64_t)p[IREE_LE_IDX_8(4)] << 32) |
+ ((uint64_t)p[IREE_LE_IDX_8(5)] << 40) |
+ ((uint64_t)p[IREE_LE_IDX_8(6)] << 48) |
+ ((uint64_t)p[IREE_LE_IDX_8(7)] << 56);
+}
+static inline float iree_unaligned_load_le_f32(const float* ptr) {
+ uint32_t uint_value = iree_unaligned_load_le_u32((const uint32_t*)ptr);
+ float value;
+ memcpy(&value, &uint_value, sizeof(value));
+ return value;
+}
+static inline double iree_unaligned_load_le_f64(const double* ptr) {
+ uint64_t uint_value = iree_unaligned_load_le_u64((const uint64_t*)ptr);
+ double value;
+ memcpy(&value, &uint_value, sizeof(value));
+ return value;
+}
+
+static inline void iree_unaligned_store_le_u8(uint8_t* ptr, uint8_t value) {
+ *ptr = value;
+}
+static inline void iree_unaligned_store_le_u16(uint16_t* ptr, uint16_t value) {
+ uint8_t* p = (uint8_t*)ptr;
+ p[IREE_LE_IDX_2(0)] = value;
+ p[IREE_LE_IDX_2(1)] = value >> 8;
+}
+static inline void iree_unaligned_store_le_u32(uint32_t* ptr, uint32_t value) {
+ uint8_t* p = (uint8_t*)ptr;
+ p[IREE_LE_IDX_4(0)] = value;
+ p[IREE_LE_IDX_4(1)] = value >> 8;
+ p[IREE_LE_IDX_4(2)] = value >> 16;
+ p[IREE_LE_IDX_4(3)] = value >> 24;
+}
+static inline void iree_unaligned_store_le_u64(uint64_t* ptr, uint64_t value) {
+ uint8_t* p = (uint8_t*)ptr;
+ p[IREE_LE_IDX_8(0)] = value;
+ p[IREE_LE_IDX_8(1)] = value >> 8;
+ p[IREE_LE_IDX_8(2)] = value >> 16;
+ p[IREE_LE_IDX_8(3)] = value >> 24;
+ p[IREE_LE_IDX_8(4)] = value >> 32;
+ p[IREE_LE_IDX_8(5)] = value >> 40;
+ p[IREE_LE_IDX_8(6)] = value >> 48;
+ p[IREE_LE_IDX_8(7)] = value >> 56;
+}
+static inline void iree_unaligned_store_le_f32(float* ptr, float value) {
+ uint32_t uint_value;
+ memcpy(&uint_value, &value, sizeof(value));
+ iree_unaligned_store_le_u32((uint32_t*)ptr, uint_value);
+}
+static inline void iree_unaligned_store_le_f64(double* ptr, double value) {
+ uint64_t uint_value;
+ memcpy(&uint_value, &value, sizeof(value));
+ iree_unaligned_store_le_u64((uint64_t*)ptr, uint_value);
+}
+
+#else
+
+#if defined(IREE_ENDIANNESS_LITTLE)
+
+#define iree_unaligned_load_le_u8(ptr) *(ptr)
+#define iree_unaligned_load_le_u16(ptr) *(ptr)
+#define iree_unaligned_load_le_u32(ptr) *(ptr)
+#define iree_unaligned_load_le_u64(ptr) *(ptr)
+#define iree_unaligned_load_le_f32(ptr) *(ptr)
+#define iree_unaligned_load_le_f64(ptr) *(ptr)
+
+#define iree_unaligned_store_le_u8(ptr, value) *(ptr) = (value)
+#define iree_unaligned_store_le_u16(ptr, value) *(ptr) = (value)
+#define iree_unaligned_store_le_u32(ptr, value) *(ptr) = (value)
+#define iree_unaligned_store_le_u64(ptr, value) *(ptr) = (value)
+#define iree_unaligned_store_le_f32(ptr, value) *(ptr) = (value)
+#define iree_unaligned_store_le_f64(ptr, value) *(ptr) = (value)
+
+#else
+
+#error "TODO(benvanik): little-endian load/store for big-endian archs"
+
+#endif // IREE_ENDIANNESS_*
+
+#endif // IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED
+
+// clang-format off
+
+// Dereferences |ptr| and returns the value.
+// Automatically handles unaligned accesses on architectures that may not
+// support them natively (or efficiently). Memory is treated as little-endian.
+#define iree_unaligned_load_le(ptr) \
+ _Generic((ptr), \
+ int8_t*: iree_unaligned_load_le_u8((const uint8_t*)(ptr)), \
+ uint8_t*: iree_unaligned_load_le_u8((const uint8_t*)(ptr)), \
+ int16_t*: iree_unaligned_load_le_u16((const uint16_t*)(ptr)), \
+ uint16_t*: iree_unaligned_load_le_u16((const uint16_t*)(ptr)), \
+ int32_t*: iree_unaligned_load_le_u32((const uint32_t*)(ptr)), \
+ uint32_t*: iree_unaligned_load_le_u32((const uint32_t*)(ptr)), \
+ int64_t*: iree_unaligned_load_le_u64((const uint64_t*)(ptr)), \
+ uint64_t*: iree_unaligned_load_le_u64((const uint64_t*)(ptr)), \
+ float*: iree_unaligned_load_le_f32((const float*)(ptr)), \
+ double*: iree_unaligned_load_le_f64((const double*)(ptr)) \
+ )
+
+// Dereferences |ptr| and writes the given |value|.
+// Automatically handles unaligned accesses on architectures that may not
+// support them natively (or efficiently). Memory is treated as little-endian.
+#define iree_unaligned_store(ptr, value) \
+ _Generic((ptr), \
+ int8_t*: iree_unaligned_store_le_u8((uint8_t*)(ptr), value), \
+ uint8_t*: iree_unaligned_store_le_u8((uint8_t*)(ptr), value), \
+ int16_t*: iree_unaligned_store_le_u16((uint16_t*)(ptr), value), \
+ uint16_t*: iree_unaligned_store_le_u16((uint16_t*)(ptr), value), \
+ int32_t*: iree_unaligned_store_le_u32((uint32_t*)(ptr), value), \
+ uint32_t*: iree_unaligned_store_le_u32((uint32_t*)(ptr), value), \
+ int64_t*: iree_unaligned_store_le_u64((uint64_t*)(ptr), value), \
+ uint64_t*: iree_unaligned_store_le_u64((uint64_t*)(ptr), value), \
+ float*: iree_unaligned_store_le_f32((float*)(ptr), value), \
+ double*: iree_unaligned_store_le_f64((double*)(ptr), value) \
+ )
+
+// clang-format on
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_BASE_ALIGNMENT_H_
diff --git a/runtime/src/iree/base/allocator.c b/runtime/src/iree/base/allocator.c
new file mode 100644
index 0000000..d409370
--- /dev/null
+++ b/runtime/src/iree/base/allocator.c
@@ -0,0 +1,261 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// iree_allocator_t (std::allocator-like interface)
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_allocator_issue_alloc(
+ iree_allocator_t allocator, iree_allocator_command_t command,
+ iree_host_size_t byte_length, void** inout_ptr) {
+ if (IREE_UNLIKELY(!allocator.ctl)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "allocator has no control routine");
+ }
+ iree_allocator_alloc_params_t params = {
+ .byte_length = byte_length,
+ };
+ return allocator.ctl(allocator.self, command, ¶ms, inout_ptr);
+}
+
+IREE_API_EXPORT iree_status_t iree_allocator_malloc(
+ iree_allocator_t allocator, iree_host_size_t byte_length, void** out_ptr) {
+ return iree_allocator_issue_alloc(allocator, IREE_ALLOCATOR_COMMAND_CALLOC,
+ byte_length, out_ptr);
+}
+
+IREE_API_EXPORT iree_status_t iree_allocator_malloc_uninitialized(
+ iree_allocator_t allocator, iree_host_size_t byte_length, void** out_ptr) {
+ return iree_allocator_issue_alloc(allocator, IREE_ALLOCATOR_COMMAND_MALLOC,
+ byte_length, out_ptr);
+}
+
+IREE_API_EXPORT iree_status_t
+iree_allocator_realloc(iree_allocator_t allocator, iree_host_size_t byte_length,
+ void** inout_ptr) {
+ return iree_allocator_issue_alloc(allocator, IREE_ALLOCATOR_COMMAND_REALLOC,
+ byte_length, inout_ptr);
+}
+
+IREE_API_EXPORT iree_status_t
+iree_allocator_clone(iree_allocator_t allocator,
+ iree_const_byte_span_t source_bytes, void** out_ptr) {
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc_uninitialized(
+ allocator, source_bytes.data_length, out_ptr));
+ memcpy(*out_ptr, source_bytes.data, source_bytes.data_length);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT void iree_allocator_free(iree_allocator_t allocator,
+ void* ptr) {
+ if (ptr && allocator.ctl) {
+ iree_status_ignore(allocator.ctl(
+ allocator.self, IREE_ALLOCATOR_COMMAND_FREE, /*params=*/NULL, &ptr));
+ }
+}
+
+static iree_status_t iree_allocator_system_alloc(
+ iree_allocator_command_t command,
+ const iree_allocator_alloc_params_t* params, void** inout_ptr) {
+ IREE_ASSERT_ARGUMENT(params);
+ IREE_ASSERT_ARGUMENT(inout_ptr);
+ iree_host_size_t byte_length = params->byte_length;
+ if (IREE_UNLIKELY(byte_length == 0)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "allocations must be >0 bytes");
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ void* existing_ptr = *inout_ptr;
+ void* new_ptr = NULL;
+ if (existing_ptr && command == IREE_ALLOCATOR_COMMAND_REALLOC) {
+ new_ptr = realloc(existing_ptr, byte_length);
+ } else {
+ existing_ptr = NULL;
+ if (command == IREE_ALLOCATOR_COMMAND_CALLOC) {
+ new_ptr = calloc(1, byte_length);
+ } else {
+ new_ptr = malloc(byte_length);
+ }
+ }
+ if (!new_ptr) {
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "system allocator failed the request");
+ }
+
+ if (existing_ptr) {
+ IREE_TRACE_FREE(existing_ptr);
+ }
+ IREE_TRACE_ALLOC(new_ptr, byte_length);
+
+ *inout_ptr = new_ptr;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_allocator_system_free(void** inout_ptr) {
+ IREE_ASSERT_ARGUMENT(inout_ptr);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ void* ptr = *inout_ptr;
+ if (IREE_LIKELY(ptr != NULL)) {
+ IREE_TRACE_FREE(ptr);
+ free(ptr);
+ *inout_ptr = NULL;
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t
+iree_allocator_system_ctl(void* self, iree_allocator_command_t command,
+ const void* params, void** inout_ptr) {
+ switch (command) {
+ case IREE_ALLOCATOR_COMMAND_MALLOC:
+ case IREE_ALLOCATOR_COMMAND_CALLOC:
+ case IREE_ALLOCATOR_COMMAND_REALLOC:
+ return iree_allocator_system_alloc(
+ command, (const iree_allocator_alloc_params_t*)params, inout_ptr);
+ case IREE_ALLOCATOR_COMMAND_FREE:
+ return iree_allocator_system_free(inout_ptr);
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unsupported system allocator command");
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Aligned allocations via iree_allocator_t
+//===----------------------------------------------------------------------===//
+
+// Returns true if |alignment| is a power of two (or 0).
+static inline iree_host_size_t iree_alignment_is_pot(
+ iree_host_size_t alignment) {
+ return (alignment & (alignment - 1)) == 0;
+}
+
+// Returns a pointer into |unaligned_ptr| where |offset| matches |alignment|.
+static inline void* iree_aligned_ptr(void* unaligned_ptr,
+ iree_host_size_t alignment,
+ iree_host_size_t offset) {
+ return (void*)((((uintptr_t)unaligned_ptr + (alignment + sizeof(void*)) +
+ offset) &
+ ~(uintptr_t)(alignment - 1)) -
+ offset);
+}
+
+// Returns the base unaligned pointer for |aligned_ptr|.
+static inline void* iree_aligned_ptr_get_base(void* aligned_ptr) {
+ void** ptr_ref =
+ (void**)((uintptr_t)aligned_ptr & ~(uintptr_t)(sizeof(void*) - 1));
+ return ptr_ref[-1];
+}
+
+// Sets the base unaligned pointer in |aligned_ptr|.
+static inline void iree_aligned_ptr_set_base(void* aligned_ptr,
+ void* base_ptr) {
+ void** ptr_ref =
+ (void**)((uintptr_t)aligned_ptr & ~(uintptr_t)(sizeof(void*) - 1));
+ ptr_ref[-1] = base_ptr;
+}
+
+IREE_API_EXPORT iree_status_t iree_allocator_malloc_aligned(
+ iree_allocator_t allocator, iree_host_size_t byte_length,
+ iree_host_size_t min_alignment, iree_host_size_t offset, void** out_ptr) {
+ IREE_ASSERT_ARGUMENT(out_ptr);
+ if (IREE_UNLIKELY(byte_length == 0)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "allocations must be >0 bytes");
+ }
+ const iree_host_size_t alignment = iree_max(min_alignment, iree_max_align_t);
+ if (IREE_UNLIKELY(!iree_alignment_is_pot(alignment))) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "alignments must be powers of two (got %" PRIhsz ")", min_alignment);
+ }
+
+ // [base ptr] [padding...] [aligned data] [padding...]
+ const iree_host_size_t total_length =
+ sizeof(uintptr_t) + byte_length + alignment;
+ void* unaligned_ptr = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, total_length, (void**)&unaligned_ptr));
+ void* aligned_ptr = iree_aligned_ptr(unaligned_ptr, alignment, offset);
+
+ iree_aligned_ptr_set_base(aligned_ptr, unaligned_ptr);
+ *out_ptr = aligned_ptr;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_allocator_realloc_aligned(
+ iree_allocator_t allocator, iree_host_size_t byte_length,
+ iree_host_size_t min_alignment, iree_host_size_t offset, void** inout_ptr) {
+ IREE_ASSERT_ARGUMENT(inout_ptr);
+ if (!*inout_ptr) {
+ return iree_allocator_malloc_aligned(allocator, byte_length, min_alignment,
+ offset, inout_ptr);
+ }
+ if (IREE_UNLIKELY(byte_length == 0)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "allocations must be >0 bytes");
+ }
+ const iree_host_size_t alignment = iree_min(min_alignment, iree_max_align_t);
+ if (IREE_UNLIKELY(!iree_alignment_is_pot(alignment))) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "alignments must be powers of two (got %" PRIhsz ")", min_alignment);
+ }
+ void* aligned_ptr = *inout_ptr;
+ void* unaligned_ptr = iree_aligned_ptr_get_base(aligned_ptr);
+ if (IREE_UNLIKELY(aligned_ptr !=
+ iree_aligned_ptr(unaligned_ptr, alignment, offset))) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "reallocation must have the same alignment as the "
+ "original allocation (got %" PRIhsz ")",
+ min_alignment);
+ }
+
+ // Since the reallocated memory block may have a different unaligned base to
+ // aligned offset we may need to move the data. Capture the original offset
+ // into the unaligned base where the valid data resides.
+ uintptr_t old_offset = (uintptr_t)aligned_ptr - (uintptr_t)unaligned_ptr;
+
+ // [base ptr] [padding...] [aligned data] [padding...]
+ const iree_host_size_t total_length =
+ sizeof(uintptr_t) + byte_length + alignment;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_realloc(allocator, total_length, (void**)&unaligned_ptr));
+ aligned_ptr = iree_aligned_ptr(unaligned_ptr, alignment, offset);
+
+ const uint8_t* old_data = (uint8_t*)unaligned_ptr + old_offset;
+ uint8_t* new_data = (uint8_t*)aligned_ptr;
+ if (old_data != new_data) {
+ // Alignment at offset changed; copy data to the new aligned offset.
+ // NOTE: this is copying up to the *new* byte length, as we don't store the
+ // old length and don't know how much to copy. Since we've already
+ // reallocated we know this will always be in-bounds, but it's inefficient.
+ // NOTE: memmove instead of memcpy as the regions may overlap.
+ memmove(new_data, old_data, byte_length);
+ }
+
+ iree_aligned_ptr_set_base(aligned_ptr, unaligned_ptr);
+ *inout_ptr = aligned_ptr;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT void iree_allocator_free_aligned(iree_allocator_t allocator,
+ void* ptr) {
+ if (ptr) {
+ void* unaligned_ptr = iree_aligned_ptr_get_base(ptr);
+ iree_allocator_free(allocator, unaligned_ptr);
+ }
+}
diff --git a/runtime/src/iree/base/allocator.h b/runtime/src/iree/base/allocator.h
new file mode 100644
index 0000000..9ac26f4
--- /dev/null
+++ b/runtime/src/iree/base/allocator.h
@@ -0,0 +1,286 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_ALLOCATOR_H_
+#define IREE_BASE_ALLOCATOR_H_
+
+#include <memory.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/alignment.h"
+#include "iree/base/attributes.h"
+#include "iree/base/config.h"
+#include "iree/base/status.h"
+#include "iree/base/target_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// Types and Enums
+//===----------------------------------------------------------------------===//
+
+// Returns the number of elements in an array as a compile-time constant, which
+// can be used in defining new arrays. Fails at compile-time if |arr| is not a
+// static array (such as if used on a pointer type). Similar to `countof()`.
+//
+// Example:
+// uint8_t kConstantArray[512];
+// assert(IREE_ARRAYSIZE(kConstantArray) == 512);
+#define IREE_ARRAYSIZE(arr) (sizeof(arr) / sizeof(arr[0]))
+
+#define iree_min(lhs, rhs) ((lhs) <= (rhs) ? (lhs) : (rhs))
+#define iree_max(lhs, rhs) ((lhs) <= (rhs) ? (rhs) : (lhs))
+
+#if IREE_STATISTICS_ENABLE
+// Evalutes the expression code only if statistics are enabled.
+//
+// Example:
+// struct {
+// IREE_STATISTICS(uint32_t stats_only_value);
+// } my_object;
+// IREE_STATISTICS(my_object.stats_only_value = 5);
+// IREE_STATISTICS({
+// my_object.stats_only_value = 5;
+// });
+#define IREE_STATISTICS(expr) expr
+#else
+#define IREE_STATISTICS(expr)
+#endif // IREE_STATISTICS_ENABLE
+
+//===----------------------------------------------------------------------===//
+// Byte buffers and memory utilities
+//===----------------------------------------------------------------------===//
+
+// A span of mutable bytes (ala std::span of uint8_t).
+typedef struct iree_byte_span_t {
+ uint8_t* data;
+ iree_host_size_t data_length;
+} iree_byte_span_t;
+
+static inline iree_byte_span_t iree_make_byte_span(
+ void* data, iree_host_size_t data_length) {
+ iree_byte_span_t v = {(uint8_t*)data, data_length};
+ return v;
+}
+
+static inline iree_byte_span_t iree_byte_span_empty() {
+ iree_byte_span_t v = {NULL, 0};
+ return v;
+}
+
+static bool iree_byte_span_is_empty(iree_byte_span_t span) {
+ return span.data == NULL || span.data_length == 0;
+}
+
+// A span of constant bytes (ala std::span of const uint8_t).
+typedef struct iree_const_byte_span_t {
+ const uint8_t* data;
+ iree_host_size_t data_length;
+} iree_const_byte_span_t;
+
+static inline iree_const_byte_span_t iree_make_const_byte_span(
+ const void* data, iree_host_size_t data_length) {
+ iree_const_byte_span_t v = {(const uint8_t*)data, data_length};
+ return v;
+}
+
+static inline iree_const_byte_span_t iree_const_byte_span_empty() {
+ iree_const_byte_span_t v = {NULL, 0};
+ return v;
+}
+
+static bool iree_const_byte_span_is_empty(iree_const_byte_span_t span) {
+ return span.data == NULL || span.data_length == 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Totally shady stack allocation
+//===----------------------------------------------------------------------===//
+// TODO(benvanik): remove our uses of this or make them more explicit.
+
+#if defined(IREE_COMPILER_MSVC)
+// The safe malloca that may fall back to heap in the case of stack overflows:
+// https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/malloca?view=vs-2019
+// Because that gets really annoying to deal with during error handling we just
+// go for _alloca which may generate SEH exceptions if we blow the stack.
+#include <malloc.h>
+#define iree_alloca(sz) _alloca(sz)
+#else
+#include <alloca.h>
+#define iree_alloca(sz) alloca(sz)
+#endif // IREE_COMPILER_MSVC
+
+//===----------------------------------------------------------------------===//
+// iree_allocator_t (std::allocator-like interface)
+//===----------------------------------------------------------------------===//
+
+// Controls the behavior of an iree_allocator_ctl_fn_t callback function.
+typedef enum iree_allocator_command_e {
+ // Allocates |byte_length| of memory and stores the pointer in |inout_ptr|.
+ // Systems should align to 16 byte boundaries (or otherwise their natural
+ // SIMD alignment). The runtime pools internally and small allocations
+ // (usually) won't be made through this interface.
+ //
+ // iree_allocator_ctl_fn_t:
+ // params: iree_allocator_alloc_params_t
+ // inout_ptr: set to allocated pointer
+ IREE_ALLOCATOR_COMMAND_MALLOC = 0,
+
+ // As with IREE_ALLOCATOR_COMMAND_MALLOC but zeros the memory.
+ //
+ // The contents of the allocation *must* be zeroed by the allocator prior to
+ // returning. Allocators may be able to elide the zeroing if they allocate
+ // fresh pages from the system. It is always safe to zero contents if the
+ // behavior of the allocator is not under our control.
+ //
+ // iree_allocator_ctl_fn_t:
+ // params: iree_allocator_alloc_params_t
+ // inout_ptr: set to allocated pointer
+ IREE_ALLOCATOR_COMMAND_CALLOC,
+
+ // Tries to resize an allocation provided via |inout_ptr|, if possible.
+ // If the existing allocation is not reused then it is freed as if a call to
+ // iree_allocator_free had been called on it. If the allocation fails then
+ // the provided existing allocation is unmodified. Only pointers previously
+ // received from the iree_allocator_t are valid.
+ //
+ // iree_allocator_ctl_fn_t:
+ // params: iree_allocator_alloc_params_t
+ // inout_ptr: pointer of existing allocation; updated to realloced pointer
+ IREE_ALLOCATOR_COMMAND_REALLOC,
+
+ // Frees the memory pointed to by |inout_ptr|.
+ //
+ // iree_allocator_ctl_fn_t:
+ // params: unused
+ // inout_ptr: pointer to free
+ IREE_ALLOCATOR_COMMAND_FREE,
+} iree_allocator_command_t;
+
+// Parameters for various allocation commands.
+typedef struct iree_allocator_alloc_params_t {
+ // Minimum size, in bytes, of the allocation. The underlying allocator may
+ // pad the length out if needed.
+ iree_host_size_t byte_length;
+} iree_allocator_alloc_params_t;
+
+// Function pointer for an iree_allocator_t control function.
+// |command| provides the operation to perform. Optionally some commands may use
+// |params| to pass additional operation-specific parameters. |inout_ptr| usage
+// is defined by each operation but is generally a pointer to the pointer to
+// set to the newly allocated memory or a pointer to the pointer to free.
+typedef iree_status_t(IREE_API_PTR* iree_allocator_ctl_fn_t)(
+ void* self, iree_allocator_command_t command, const void* params,
+ void** inout_ptr);
+
+// An allocator for host-memory allocations.
+// IREE will attempt to use this in place of the system malloc and free.
+// Pass the iree_allocator_system() macro to use the system allocator.
+typedef struct iree_allocator_t {
+ // Control function data.
+ void* self;
+ // ioctl-style control function servicing all allocator-related commands.
+ // See iree_allocator_command_t for more information.
+ iree_allocator_ctl_fn_t ctl;
+} iree_allocator_t;
+
+// Allocates a block of |byte_length| bytes from the given allocator.
+// The contents of the returned memory is guaranteed to be zeroed.
+IREE_API_EXPORT iree_status_t iree_allocator_malloc(
+ iree_allocator_t allocator, iree_host_size_t byte_length, void** out_ptr);
+
+// Allocates a block of |byte_length| bytes from the given allocator.
+// The content of the buffer returned is undefined: it may be zeros, a
+// debug-fill pattern, or random memory from elsewhere in the process.
+// Only use this when immediately overwriting all memory.
+IREE_API_EXPORT iree_status_t iree_allocator_malloc_uninitialized(
+ iree_allocator_t allocator, iree_host_size_t byte_length, void** out_ptr);
+
+// Reallocates |inout_ptr| to |byte_length| bytes with the given allocator.
+// If the reallocation fails then the original |inout_ptr| is unmodified.
+//
+// WARNING: when extending the newly allocated bytes are undefined.
+// TODO(benvanik): make them zeros; we should have an _uninitialized if needed.
+IREE_API_EXPORT iree_status_t iree_allocator_realloc(
+ iree_allocator_t allocator, iree_host_size_t byte_length, void** inout_ptr);
+
+// Duplicates the given byte block by allocating memory and copying it in.
+IREE_API_EXPORT iree_status_t
+iree_allocator_clone(iree_allocator_t allocator,
+ iree_const_byte_span_t source_bytes, void** out_ptr);
+
+// Frees a previously-allocated block of memory to the given allocator.
+IREE_API_EXPORT void iree_allocator_free(iree_allocator_t allocator, void* ptr);
+
+// Default C allocator controller using malloc/free.
+IREE_API_EXPORT iree_status_t
+iree_allocator_system_ctl(void* self, iree_allocator_command_t command,
+ const void* params, void** inout_ptr);
+
+// Allocates using the iree_allocator_malloc and iree_allocator_free methods.
+// These will usually be backed by malloc and free.
+static inline iree_allocator_t iree_allocator_system(void) {
+ iree_allocator_t v = {NULL, iree_allocator_system_ctl};
+ return v;
+}
+
+// Does not perform any allocation or deallocation; used to wrap objects that
+// are owned by external code/live in read-only memory/etc.
+static inline iree_allocator_t iree_allocator_null(void) {
+ iree_allocator_t v = {NULL, NULL};
+ return v;
+}
+
+// Returns true if the allocator is `iree_allocator_null()`.
+static inline bool iree_allocator_is_null(iree_allocator_t allocator) {
+ return allocator.ctl == NULL;
+}
+
+//===----------------------------------------------------------------------===//
+// Aligned allocations via iree_allocator_t
+//===----------------------------------------------------------------------===//
+
+// Allocates memory of size |byte_length| where the byte starting at |offset|
+// has a minimum alignment of |min_alignment|. In many cases |offset| can be 0.
+//
+// The |offset| can be used to ensure the alignment-sensitive portion of a
+// combined allocation is aligned while any prefix metadata has system
+// alignment. For example:
+// typedef struct {
+// uint32_t some_metadata;
+// uint8_t data[];
+// } buffer_t;
+// buffer_t* buffer = NULL;
+// iree_allocator_malloc_aligned(allocator, sizeof(buffer_t) + length,
+// 4096, offsetof(buffer_t, data), &buffer);
+// // `buffer` has system alignment, but the `data` will be aligned on at
+// // least a 4096 boundary.
+//
+// The contents of the returned memory is guaranteed to be zeroed.
+IREE_API_EXPORT iree_status_t iree_allocator_malloc_aligned(
+ iree_allocator_t allocator, iree_host_size_t byte_length,
+ iree_host_size_t min_alignment, iree_host_size_t offset, void** out_ptr);
+
+// Reallocates memory to |byte_length|, growing or shrinking as needed.
+// Only valid on memory allocated with iree_allocator_malloc_aligned.
+// The newly reallocated memory will have the byte at |offset| aligned to at
+// least |min_alignment|.
+IREE_API_EXPORT iree_status_t iree_allocator_realloc_aligned(
+ iree_allocator_t allocator, iree_host_size_t byte_length,
+ iree_host_size_t min_alignment, iree_host_size_t offset, void** inout_ptr);
+
+// Frees a |ptr| previously returned from iree_allocator_malloc_aligned.
+IREE_API_EXPORT void iree_allocator_free_aligned(iree_allocator_t allocator,
+ void* ptr);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_ALLOCATOR_H_
diff --git a/runtime/src/iree/base/api.c b/runtime/src/iree/base/api.c
new file mode 100644
index 0000000..6ba51a3
--- /dev/null
+++ b/runtime/src/iree/base/api.c
@@ -0,0 +1,27 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/api.h"
+
+//===----------------------------------------------------------------------===//
+// IREE Core API
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t
+iree_api_version_check(iree_api_version_t expected_version,
+ iree_api_version_t* out_actual_version) {
+ if (!out_actual_version) {
+ return iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+ }
+ iree_api_version_t actual_version = IREE_API_VERSION_0;
+ *out_actual_version = actual_version;
+ return expected_version == actual_version
+ ? iree_ok_status()
+ : iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "IREE version mismatch; application expected "
+ "%d but IREE is compiled as %d",
+ expected_version, actual_version);
+}
diff --git a/runtime/src/iree/base/api.h b/runtime/src/iree/base/api.h
new file mode 100644
index 0000000..682c063
--- /dev/null
+++ b/runtime/src/iree/base/api.h
@@ -0,0 +1,143 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// API Versioning
+// -----------------------------------------------------------------------------
+//
+// The C API is designed to be versioned such that breaking changes either in
+// ABI (data types, struct sizes, etc) or signatures (function arguments change)
+// will result in a bump of the IREE_API_VERSION_LATEST value.
+//
+// When linked in statically the runtime should never have a version conflict,
+// however dynamic linking where the runtime is a shared object loaded at
+// runtime (via dlopen/etc) must always verify the version is as expected.
+//
+// In the current experimental state of the runtime the API may break frequently
+// and the version is pinned at 0.
+//
+// Example:
+// void* library = dlopen("iree_rt.so", RTLD_LAZY | RTLD_LOCAL);
+// iree_api_version_t actual_version;
+// iree_status_t status = \
+// ((PFN_iree_api_version_check)dlsym(library, "iree_api_version_check"))(
+// IREE_API_VERSION_LATEST, &actual_version);
+// IREE_CHECK_OK(status);
+// dlclose(library);
+//
+// Object Ownership and Lifetime
+// -----------------------------------------------------------------------------
+//
+// The API follows the CoreFoundation ownership policies:
+// https://developer.apple.com/library/archive/documentation/CoreFoundation/Conceptual/CFMemoryMgmt/Concepts/Ownership.html
+//
+// These boil down to:
+// * Objects returned from *_create or *_copy functions are owned by the caller
+// and must be released when the caller no longer needs them.
+// * Objects returned from accessors are not owned by the caller and must be
+// retained by the caller if the object lifetime needs to be extended.
+// * Objects passed to functions by argument may be retained by the callee if
+// required.
+//
+// Example:
+// iree_file_mapping_t* file_mapping;
+// s = iree_file_mapping_open_read(..., &file_mapping);
+// // file_mapping is now owned by this function.
+// s = iree_file_mapping_some_call(file_mapping, ...);
+// // Must release ownership when no longer required.
+// s = iree_file_mapping_release(file_mapping);
+//
+// String Formatting
+// -----------------------------------------------------------------------------
+//
+// Functions that produce variable-length strings follow a standard usage
+// pattern with the arguments:
+// `iree_host_size_t buffer_capacity`: total bytes including \0 available.
+// `char* buffer`: optional buffer to write into.
+// `iree_host_size_t* out_buffer_length`: required/actual length excluding \0.
+//
+// To query the size required for the output and allocate storage:
+// iree_host_size_t required_length = 0;
+// iree_format_xyz(/*buffer_capacity=*/0, /*buffer=*/NULL, &required_length);
+// iree_host_size_t buffer_capacity = required_length + 1;
+// char* buffer = iree_allocator_malloc(buffer_capacity);
+// iree_host_size_t actual_length = 0;
+// iree_format_xyz(buffer_capacity, buffer, &actual_length);
+// ASSERT(required_length == actual_length);
+//
+// To handle fixed-length maximum strings (common):
+// // Fails if the string is longer than 127 characters (127 + \0 >= 128).
+// char buffer[128];
+// IREE_RETURN_IF_ERROR(iree_format_xyz(sizeof(buffer), buffer, NULL));
+//
+// Try fixed-length and fallback to a dynamic allocation:
+// char inline_buffer[128];
+// iree_host_size_t required_length = 0;
+// iree_status_t inline_status = iree_format_xyz(sizeof(inline_buffer),
+// inline_buffer,
+// &required_length);
+// if (iree_status_is_out_of_range(inline_status)) {
+// // Spilled inline_buffer, need to allocate required_length bytes and
+// // try again.
+// // ... see above for example ...
+// } else if (iree_status_is_ok(inline_status)) {
+// // Fit inside inline_buffer, required_length contains actual length.
+// } else {
+// return inline_status;
+// }
+
+#ifndef IREE_BASE_API_H_
+#define IREE_BASE_API_H_
+
+#include "iree/base/alignment.h" // IWYU pragma: export
+#include "iree/base/allocator.h" // IWYU pragma: export
+#include "iree/base/assert.h" // IWYU pragma: export
+#include "iree/base/attributes.h" // IWYU pragma: export
+#include "iree/base/bitfield.h" // IWYU pragma: export
+#include "iree/base/config.h" // IWYU pragma: export
+#include "iree/base/loop.h" // IWYU pragma: export
+#include "iree/base/loop_inline.h" // IWYU pragma: export
+#include "iree/base/status.h" // IWYU pragma: export
+#include "iree/base/string_builder.h" // IWYU pragma: export
+#include "iree/base/string_view.h" // IWYU pragma: export
+#include "iree/base/time.h" // IWYU pragma: export
+#include "iree/base/wait_source.h" // IWYU pragma: export
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// IREE Core API
+//===----------------------------------------------------------------------===//
+
+// Sprinkle this wherever to make it easier to find structs/functions that are
+// not yet stable.
+#define IREE_API_UNSTABLE
+
+// Known versions of the API that can be referenced in code.
+// Out-of-bounds values are possible in forward-versioned changes.
+typedef enum iree_api_version_e {
+ IREE_API_VERSION_0 = 0,
+ // Always set to the latest version of the library from source.
+ IREE_API_VERSION_LATEST = IREE_API_VERSION_0,
+} iree_api_version_t;
+
+// Checks whether the |expected_version| of the caller matches the implemented
+// version of |out_actual_version|. Forward compatibility of the API is
+// supported but backward compatibility is not: newer binaries using older
+// shared libraries of the runtime will fail.
+//
+// Returns IREE_STATUS_OUT_OF_RANGE if the actual version is not compatible with
+// the expected version.
+IREE_API_EXPORT iree_status_t
+iree_api_version_check(iree_api_version_t expected_version,
+ iree_api_version_t* out_actual_version);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_API_H_
diff --git a/runtime/src/iree/base/assert.h b/runtime/src/iree/base/assert.h
new file mode 100644
index 0000000..930baab
--- /dev/null
+++ b/runtime/src/iree/base/assert.h
@@ -0,0 +1,77 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_ASSERT_H_
+#define IREE_BASE_ASSERT_H_
+
+#include <assert.h>
+
+#include "iree/base/config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// IREE_ASSERT macros
+//===----------------------------------------------------------------------===//
+// These are no-oped in builds with NDEBUG defined (by default anything but
+// `-c dbg`/`-DCMAKE_BUILD_TYPE=Debug`). They differ from assert in that
+// they avoid unused variable warnings when NDEBUG is defined. As with normal
+// assert() ensure that side-effecting behavior is avoided as the expression
+// will not be evaluated when the asserts are removed!
+
+#if defined(NDEBUG) // N(o) DEBUG
+
+// Assertions disabled:
+
+#define IREE_ASSERT(condition, ...) \
+ while (false && (condition)) { \
+ }
+
+// TODO(benvanik): replace the status_matchers version with a test macro.
+// #define IREE_ASSERT_OK(status) IREE_ASSERT(iree_status_is_ok(status))
+
+// However, we still want the compiler to parse x and y because
+// we don't want to lose potentially useful errors and warnings
+// (and want to hide unused variable warnings when asserts are disabled).
+// _IREE_ASSERT_CMP is a helper and should not be used outside of this file.
+#define _IREE_ASSERT_CMP(x, op, y, ...) \
+ while (false && ((void)(x), (void)(y), 0)) { \
+ }
+
+#else
+
+// Assertions enabled:
+
+#define IREE_ASSERT(condition, ...) assert(condition)
+
+// TODO(#2843): better logging of status assertions.
+// #define IREE_ASSERT_OK(status) IREE_ASSERT(iree_status_is_ok(status))
+
+#define _IREE_ASSERT_CMP(x, op, y, ...) IREE_ASSERT(((x)op(y)), __VA_ARGS__)
+
+#endif // NDEBUG
+
+#define IREE_ASSERT_ARGUMENT(name) IREE_ASSERT(name)
+
+#define IREE_ASSERT_TRUE(expr, ...) IREE_ASSERT(!!(expr), __VA_ARGS__)
+#define IREE_ASSERT_FALSE(expr, ...) IREE_ASSERT(!(expr), __VA_ARGS__)
+
+#define IREE_ASSERT_UNREACHABLE(...) IREE_ASSERT(false, __VA_ARGS__)
+
+#define IREE_ASSERT_EQ(x, y, ...) _IREE_ASSERT_CMP(x, ==, y, __VA_ARGS__)
+#define IREE_ASSERT_NE(x, y, ...) _IREE_ASSERT_CMP(x, !=, y, __VA_ARGS__)
+#define IREE_ASSERT_LE(x, y, ...) _IREE_ASSERT_CMP(x, <=, y, __VA_ARGS__)
+#define IREE_ASSERT_LT(x, y, ...) _IREE_ASSERT_CMP(x, <, y, __VA_ARGS__)
+#define IREE_ASSERT_GE(x, y, ...) _IREE_ASSERT_CMP(x, >=, y, __VA_ARGS__)
+#define IREE_ASSERT_GT(x, y, ...) _IREE_ASSERT_CMP(x, >, y, __VA_ARGS__)
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_ASSERT_H_
diff --git a/runtime/src/iree/base/attributes.h b/runtime/src/iree/base/attributes.h
new file mode 100644
index 0000000..bd396a9
--- /dev/null
+++ b/runtime/src/iree/base/attributes.h
@@ -0,0 +1,194 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_ATTRIBUTES_H_
+#define IREE_BASE_ATTRIBUTES_H_
+
+#include "iree/base/target_platform.h"
+
+//===----------------------------------------------------------------------===//
+// API/ABI interop
+//===----------------------------------------------------------------------===//
+
+// Denotes a method exported by the IREE API.
+// Any call annotated with this will be relatively stable.
+// Calls without this are considered private to the IREE implementation and
+// should not be relied upon.
+#ifdef __cplusplus
+#define IREE_API_EXPORT extern "C"
+#else
+#define IREE_API_EXPORT
+#endif // __cplusplus
+
+// Denotes a function pointer that is exposed as part of the IREE API.
+// Example:
+// iree_status_t(IREE_API_PTR* some_callback)(int value);
+#define IREE_API_PTR
+
+//===----------------------------------------------------------------------===//
+// IREE_HAVE_ATTRIBUTE
+//===----------------------------------------------------------------------===//
+
+// Queries for [[attribute]] identifiers in modern compilers.
+#ifdef __has_attribute
+#define IREE_HAVE_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define IREE_HAVE_ATTRIBUTE(x) 0
+#endif // __has_attribute
+
+//===----------------------------------------------------------------------===//
+// IREE_PRINTF_ATTRIBUTE
+//===----------------------------------------------------------------------===//
+
+// Tells the compiler to perform `printf` format string checking if the
+// compiler supports it; see the 'format' attribute in
+// <https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Function-Attributes.html>.
+#if IREE_HAVE_ATTRIBUTE(format) || (defined(__GNUC__) && !defined(__clang__))
+#define IREE_PRINTF_ATTRIBUTE(string_index, first_to_check) \
+ __attribute__((__format__(__printf__, string_index, first_to_check)))
+#else
+// TODO(benvanik): use _Printf_format_string_ in SAL for MSVC.
+#define IREE_PRINTF_ATTRIBUTE(string_index, first_to_check)
+#endif // IREE_HAVE_ATTRIBUTE
+
+//===----------------------------------------------------------------------===//
+// IREE_ATTRIBUTE_NORETURN
+//===----------------------------------------------------------------------===//
+
+// Tells the compiler that a given function never returns.
+#if IREE_HAVE_ATTRIBUTE(noreturn) || (defined(__GNUC__) && !defined(__clang__))
+#define IREE_ATTRIBUTE_NORETURN __attribute__((noreturn))
+#elif defined(_MSC_VER)
+#define IREE_ATTRIBUTE_NORETURN __declspec(noreturn)
+#else
+#define IREE_ATTRIBUTE_NORETURN
+#endif // IREE_HAVE_ATTRIBUTE(noreturn)
+
+//===----------------------------------------------------------------------===//
+// IREE_MUST_USE_RESULT
+//===----------------------------------------------------------------------===//
+
+// Annotation for function return values that ensures that they are used by the
+// caller.
+#if IREE_HAVE_ATTRIBUTE(nodiscard)
+#define IREE_MUST_USE_RESULT [[nodiscard]]
+#elif (defined(__clang__) && IREE_HAVE_ATTRIBUTE(warn_unused_result)) || \
+ (defined(__GNUC__) && (__GNUC__ >= 4))
+#define IREE_MUST_USE_RESULT __attribute__((warn_unused_result))
+#elif defined(_MSC_VER) && (_MSC_VER >= 1700)
+#define IREE_MUST_USE_RESULT _Check_return_
+#else
+#define IREE_MUST_USE_RESULT
+#endif // IREE_HAVE_ATTRIBUTE(nodiscard)
+
+//===----------------------------------------------------------------------===//
+// IREE_RESTRICT
+//===----------------------------------------------------------------------===//
+
+// `restrict` keyword, not supported by some older compilers.
+// We define our own macro in case dependencies use `restrict` differently.
+#if defined(_MSC_VER) && _MSC_VER >= 1900
+#define IREE_RESTRICT __restrict
+#elif defined(_MSC_VER)
+#define IREE_RESTRICT
+#elif defined(__cplusplus)
+#define IREE_RESTRICT __restrict__
+#else
+#define IREE_RESTRICT restrict
+#endif // _MSC_VER
+
+//===----------------------------------------------------------------------===//
+// IREE_ATTRIBUTE_ALWAYS_INLINE / IREE_ATTRIBUTE_NOINLINE
+//===----------------------------------------------------------------------===//
+
+// Forces functions to either inline or not inline. Introduced in gcc 3.1.
+#if IREE_HAVE_ATTRIBUTE(always_inline) || \
+ (defined(__GNUC__) && !defined(__clang__))
+#define IREE_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
+#else
+#define IREE_ATTRIBUTE_ALWAYS_INLINE
+#endif // IREE_HAVE_ATTRIBUTE(always_inline)
+
+#if IREE_HAVE_ATTRIBUTE(noinline) || (defined(__GNUC__) && !defined(__clang__))
+#define IREE_ATTRIBUTE_NOINLINE __attribute__((noinline))
+#else
+#define IREE_ATTRIBUTE_NOINLINE
+#endif // IREE_HAVE_ATTRIBUTE(noinline)
+
+//===----------------------------------------------------------------------===//
+// IREE_ATTRIBUTE_HOT / IREE_ATTRIBUTE_COLD
+//===----------------------------------------------------------------------===//
+
+// Tells GCC that a function is hot or cold. GCC can use this information to
+// improve static analysis, i.e. a conditional branch to a cold function
+// is likely to be not-taken.
+// This annotation is used for function declarations.
+//
+// Example:
+// int foo() IREE_ATTRIBUTE_HOT;
+#if IREE_HAVE_ATTRIBUTE(hot) || (defined(__GNUC__) && !defined(__clang__))
+#define IREE_ATTRIBUTE_HOT __attribute__((hot))
+#else
+#define IREE_ATTRIBUTE_HOT
+#endif // IREE_HAVE_ATTRIBUTE(hot)
+
+#if IREE_HAVE_ATTRIBUTE(cold) || (defined(__GNUC__) && !defined(__clang__))
+#define IREE_ATTRIBUTE_COLD __attribute__((cold))
+#else
+#define IREE_ATTRIBUTE_COLD
+#endif // IREE_HAVE_ATTRIBUTE(cold)
+
+//===----------------------------------------------------------------------===//
+// IREE_LIKELY / IREE_UNLIKELY
+//===----------------------------------------------------------------------===//
+
+// Compiler hint that can be used to indicate conditions that are very very very
+// likely or unlikely. This is most useful for ensuring that unlikely cases such
+// as error handling are moved off the mainline code path such that the code is
+// only paged in when an error occurs.
+//
+// Example:
+// if (IREE_UNLIKELY(something_failed)) {
+// return do_expensive_error_logging();
+// }
+#if defined(__GNUC__) || defined(__clang__)
+#define IREE_LIKELY(x) (__builtin_expect(!!(x), 1))
+#define IREE_UNLIKELY(x) (__builtin_expect(!!(x), 0))
+#else
+#define IREE_LIKELY(x) (x)
+#define IREE_UNLIKELY(x) (x)
+#endif // IREE_HAVE_ATTRIBUTE(likely)
+
+//===----------------------------------------------------------------------===//
+// IREE_ATTRIBUTE_PACKED
+//===----------------------------------------------------------------------===//
+
+#if IREE_HAVE_ATTRIBUTE(packed) || (defined(__GNUC__) && !defined(__clang__))
+#define IREE_ATTRIBUTE_PACKED __attribute__((__packed__))
+#else
+#define IREE_ATTRIBUTE_PACKED
+#endif // IREE_HAVE_ATTRIBUTE(packed)
+
+//===----------------------------------------------------------------------===//
+// IREE_ATTRIBUTE_UNUSED
+//===----------------------------------------------------------------------===//
+
+// Hints that a variable is _maybe_ unused. This is primarily to quiet
+// diagnostic messages about unused variables that crop up around variables
+// passed to assert/logging/etc that gets stripped in certain configurations.
+//
+// Example:
+// int some_info IREE_ATTRIBUTE_UNUSED = compute_debug_info();
+// assert(some_info > 0); // stripped in NDEBUG
+#if IREE_HAVE_ATTRIBUTE(maybe_unused) && defined(__clang__)
+#define IREE_ATTRIBUTE_UNUSED __attribute__((maybe_unused))
+#elif IREE_HAVE_ATTRIBUTE(unused) || (defined(__GNUC__) && !defined(__clang__))
+#define IREE_ATTRIBUTE_UNUSED __attribute__((unused))
+#else
+#define IREE_ATTRIBUTE_UNUSED
+#endif // IREE_HAVE_ATTRIBUTE(maybe_unused / unused)
+
+#endif // IREE_BASE_ATTRIBUTES_H_
diff --git a/runtime/src/iree/base/bitfield.c b/runtime/src/iree/base/bitfield.c
new file mode 100644
index 0000000..15a46b0
--- /dev/null
+++ b/runtime/src/iree/base/bitfield.c
@@ -0,0 +1,55 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/bitfield.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+IREE_API_EXPORT iree_status_t iree_bitfield_format(
+ uint32_t value, const iree_bitfield_string_mapping_t* mappings,
+ iree_host_size_t mapping_count, iree_string_builder_t* string_builder) {
+ uint32_t remaining_bits = value;
+ int i = 0;
+ for (iree_host_size_t mapping_index = 0; mapping_index < mapping_count;
+ ++mapping_index) {
+ const iree_bitfield_string_mapping_t mapping = mappings[mapping_index];
+ if ((remaining_bits & mapping.bits) == mapping.bits) {
+ if (i > 0) {
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_string(string_builder, IREE_SV("|")));
+ }
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_string(string_builder, mapping.string));
+ remaining_bits &= ~mapping.bits;
+ ++i;
+ }
+ }
+ if (remaining_bits != 0u) {
+ if (i > 0) {
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_string(string_builder, IREE_SV("|")));
+ }
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ string_builder, "%Xh", remaining_bits));
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_string_view_t iree_bitfield_format_inline(
+ uint32_t value, const iree_bitfield_string_mapping_t* mappings,
+ iree_host_size_t mapping_count, iree_bitfield_string_temp_t* out_temp) {
+ iree_string_builder_t string_builder;
+ iree_string_builder_initialize_with_storage(
+ out_temp->buffer, IREE_ARRAYSIZE(out_temp->buffer), &string_builder);
+ iree_status_t status =
+ iree_bitfield_format(value, mappings, mapping_count, &string_builder);
+ if (iree_status_is_ok(status)) {
+ return iree_string_builder_view(&string_builder);
+ }
+ iree_status_ignore(status);
+ return IREE_SV("(error)");
+}
diff --git a/runtime/src/iree/base/bitfield.h b/runtime/src/iree/base/bitfield.h
new file mode 100644
index 0000000..e67fce5
--- /dev/null
+++ b/runtime/src/iree/base/bitfield.h
@@ -0,0 +1,85 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_BITFIELD_H_
+#define IREE_BASE_BITFIELD_H_
+
+#include "iree/base/attributes.h"
+#include "iree/base/string_builder.h"
+#include "iree/base/string_view.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// Bitfield utilities
+//===----------------------------------------------------------------------===//
+
+// Returns true if any bit from |rhs| is set in |lhs|.
+#define iree_any_bit_set(lhs, rhs) (((lhs) & (rhs)) != 0)
+// Returns true iff all bits from |rhs| are set in |lhs|.
+#define iree_all_bits_set(lhs, rhs) (((lhs) & (rhs)) == (rhs))
+
+// Maps bits within a bitfield to a string literal.
+typedef struct iree_bitfield_string_mapping_t {
+ uint32_t bits;
+ iree_string_view_t string;
+} iree_bitfield_string_mapping_t;
+
+// Appends the formatted contents of the given bitfield value.
+// Processes values in the order of the mapping table provided and will only
+// use each bit once. Use this to prioritize combined flags over split ones.
+//
+// Usage:
+// // Static mapping table:
+// static const iree_bitfield_string_mapping_t my_bitfield_mappings[] = {
+// {MY_BITFIELD_ALL, IREE_SVL("ALL")}, // combined flags first
+// {MY_BITFIELD_A, IREE_SVL("A")},
+// {MY_BITFIELD_B, IREE_SVL("B")},
+// {MY_BITFIELD_C, IREE_SVL("C")},
+// };
+//
+// // Produces the string "A|B":
+// IREE_RETURN_IF_ERROR(iree_bitfield_format(
+// MY_BITFIELD_A | MY_BITFIELD_B,
+// my_bitfield_mappings, IREE_ARRAYSIZE(my_bitfield_mappings),
+// &string_builder));
+//
+// // Produces the string "ALL":
+// IREE_RETURN_IF_ERROR(iree_bitfield_format(
+// MY_BITFIELD_A | MY_BITFIELD_B | MY_BITFIELD_C,
+// my_bitfield_mappings, IREE_ARRAYSIZE(my_bitfield_mappings),
+// &string_builder));
+IREE_API_EXPORT iree_status_t iree_bitfield_format(
+ uint32_t value, const iree_bitfield_string_mapping_t* mappings,
+ iree_host_size_t mapping_count, iree_string_builder_t* string_builder);
+
+// Stack storage for iree_bitfield_format_inline temporary strings.
+typedef struct iree_bitfield_string_temp_t {
+ char buffer[128];
+} iree_bitfield_string_temp_t;
+
+// Appends the formatted contents of the given bitfield value.
+// As with iree_bitfield_format only the storage for the formatted string is
+// allocated inline on the stack.
+//
+// Usage:
+// // Produces the string "A|B":
+// iree_bitfield_string_temp_t temp;
+// iree_string_view_t my_str = iree_bitfield_format_inline(
+// MY_BITFIELD_A | MY_BITFIELD_B,
+// my_bitfield_mappings, IREE_ARRAYSIZE(my_bitfield_mappings),
+// &temp);
+IREE_API_EXPORT iree_string_view_t iree_bitfield_format_inline(
+ uint32_t value, const iree_bitfield_string_mapping_t* mappings,
+ iree_host_size_t mapping_count, iree_bitfield_string_temp_t* out_temp);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_BITFIELD_H_
diff --git a/runtime/src/iree/base/bitfield_test.cc b/runtime/src/iree/base/bitfield_test.cc
new file mode 100644
index 0000000..c6e9356
--- /dev/null
+++ b/runtime/src/iree/base/bitfield_test.cc
@@ -0,0 +1,83 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <string>
+
+#include "iree/base/api.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace {
+
+enum my_bitfield_e {
+ MY_BITFIELD_NONE = 0,
+ MY_BITFIELD_A = 1 << 0,
+ MY_BITFIELD_B = 1 << 1,
+ MY_BITFIELD_ALL = MY_BITFIELD_A | MY_BITFIELD_B,
+};
+typedef uint32_t my_bitfield_t;
+
+template <size_t mapping_count>
+std::string FormatBitfieldValue(
+ uint32_t value,
+ const iree_bitfield_string_mapping_t (&mappings)[mapping_count]) {
+ iree_bitfield_string_temp_t temp;
+ auto sv = iree_bitfield_format_inline(value, mappings, mapping_count, &temp);
+ return std::string(sv.data, sv.size);
+}
+
+// Tests general usage.
+TEST(BitfieldTest, FormatBitfieldValue) {
+ static const iree_bitfield_string_mapping_t mappings[] = {
+ {MY_BITFIELD_A, IREE_SV("A")},
+ {MY_BITFIELD_B, IREE_SV("B")},
+ };
+ EXPECT_EQ("", FormatBitfieldValue(MY_BITFIELD_NONE, mappings));
+ EXPECT_EQ("A", FormatBitfieldValue(MY_BITFIELD_A, mappings));
+ EXPECT_EQ("A|B",
+ FormatBitfieldValue(MY_BITFIELD_A | MY_BITFIELD_B, mappings));
+}
+
+// Tests that empty mapping tables are fine.
+TEST(BitfieldTest, FormatBitfieldValueEmpty) {
+ static const iree_bitfield_string_mapping_t mappings[1] = {
+ {0, IREE_SV("UNUSED")},
+ };
+ iree_bitfield_string_temp_t temp;
+ auto sv = iree_bitfield_format_inline(MY_BITFIELD_NONE, mappings, 0, &temp);
+ EXPECT_TRUE(iree_string_view_is_empty(sv));
+}
+
+// Tests that values not found in the mappings are still displayed.
+TEST(BitfieldTest, FormatBitfieldValueUnhandledValues) {
+ EXPECT_EQ("A|2h", FormatBitfieldValue(MY_BITFIELD_A | MY_BITFIELD_B,
+ {
+ {MY_BITFIELD_A, IREE_SV("A")},
+ }));
+}
+
+// Tests priority order in the mapping table.
+TEST(BitfieldTest, FormatBitfieldValuePriority) {
+ // No priority, will do separate.
+ EXPECT_EQ("A|B", FormatBitfieldValue(MY_BITFIELD_A | MY_BITFIELD_B,
+ {
+ {MY_BITFIELD_A, IREE_SV("A")},
+ {MY_BITFIELD_B, IREE_SV("B")},
+ {MY_BITFIELD_ALL, IREE_SV("ALL")},
+ }));
+
+ // Priority on the combined flag, use that instead.
+ EXPECT_EQ("ALL", FormatBitfieldValue(MY_BITFIELD_A | MY_BITFIELD_B,
+ {
+ {MY_BITFIELD_ALL, IREE_SV("ALL")},
+ {MY_BITFIELD_A, IREE_SV("A")},
+ {MY_BITFIELD_B, IREE_SV("B")},
+ }));
+}
+
+} // namespace
+} // namespace iree
diff --git a/runtime/src/iree/base/config.h b/runtime/src/iree/base/config.h
new file mode 100644
index 0000000..49a7a93
--- /dev/null
+++ b/runtime/src/iree/base/config.h
@@ -0,0 +1,254 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+//===----------------------------------------------------------------------===//
+//
+// ██ ██ █████ ██████ ███ ██ ██ ███ ██ ██████
+// ██ ██ ██ ██ ██ ██ ████ ██ ██ ████ ██ ██
+// ██ █ ██ ███████ ██████ ██ ██ ██ ██ ██ ██ ██ ██ ███
+// ██ ███ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██
+// ███ ███ ██ ██ ██ ██ ██ ████ ██ ██ ████ ██████
+//
+//===----------------------------------------------------------------------===//
+//
+// This file controls global configuration parameters used throughout IREE.
+// Each option added here should be considered something worth enabling an
+// entirely new testing configuration to test and may involve fanning out many
+// configurations depending on which flags are mutually non-exclusive.
+// Err on the side of using runtime flags for options that have minimal impact
+// to code size or toolchain requirements of our more constrained targets.
+//
+// Examples of good configuration settings:
+// - remote HAL device pointer size (cannot be inferred from local config)
+// - no-op override on synchronization primitives (unsafe, untested)
+//
+// Examples of bad configuration settings:
+// - which HAL backend to use (better as build configuration; link what you use)
+
+#ifndef IREE_BASE_CONFIG_H_
+#define IREE_BASE_CONFIG_H_
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#include "iree/base/target_platform.h"
+
+//===----------------------------------------------------------------------===//
+// User configuration overrides
+//===----------------------------------------------------------------------===//
+// A user include file always included prior to any IREE configuration. This is
+// used to override the default configuration in this file without needing to
+// modify the IREE code.
+//
+// Specify a custom file with `-DIREE_USER_CONFIG_H="my_config.h"`.
+
+#if defined(IREE_USER_CONFIG_H)
+#include IREE_USER_CONFIG_H
+#endif // IREE_USER_CONFIG_H
+
+//===----------------------------------------------------------------------===//
+// Pointer size specification
+//===----------------------------------------------------------------------===//
+// IREE uses two pointer classes throughout its code:
+//
+// `iree_host_size_t`:
+// The native pointer size of the local "host" code. This is always C's
+// size_t but is aliased to make it easier to differentiate from
+// "unspecified" size_t and iree_device_size_t. Always prefer using this for
+// sizes of pointers that never leave the host.
+//
+// `iree_device_size_t`:
+// The pointer size - possibly larger than needed - for remote "device" code.
+// As the host and device may be running on entirely different machines it is
+// often best to use a conservative value for this: a 32-bit host may be
+// submitting work for a 64-bit device, and using a 32-bit size_t for device
+// pointers would truncate bits and prevent round-tripping.
+//
+// The specific values for these can be overridden with configuration settings:
+
+#if !defined(IREE_HOST_SIZE_T)
+#define IREE_HOST_SIZE_T size_t
+#define PRIhsz "zu"
+#endif // !IREE_HOST_SIZE_T
+
+// Size, in bytes, of a buffer on the local host.
+typedef IREE_HOST_SIZE_T iree_host_size_t;
+
+// Maximum representable value in iree_host_size_t.
+#define IREE_HOST_SIZE_MAX \
+ (sizeof(iree_host_size_t) == 4 ? UINT32_MAX : UINT64_MAX)
+
+#if !defined(IREE_DEVICE_SIZE_T)
+#define IREE_DEVICE_SIZE_T uint64_t
+#define PRIdsz PRIu64
+#endif // !IREE_DEVICE_SIZE_T
+
+// Size, in bytes, of a buffer on remote devices.
+typedef IREE_DEVICE_SIZE_T iree_device_size_t;
+
+// Maximum representable value in iree_device_size_t.
+#define IREE_DEVICE_SIZE_MAX \
+ (sizeof(iree_device_size_t) == 4 ? UINT32_MAX : UINT64_MAX)
+
+//===----------------------------------------------------------------------===//
+// iree_status_t configuration
+//===----------------------------------------------------------------------===//
+// Controls how much information an iree_status_t carries. When set to 0 all of
+// iree_status_t will be turned into just integer results that will never
+// allocate and all string messages will be stripped. Of course, this isn't
+// very useful and the higher modes should be preferred unless binary size is
+// a major concern.
+//
+// IREE_STATUS_MODE = 0: statuses are just integers
+// IREE_STATUS_MODE = 1: statuses have source location of error
+// IREE_STATUS_MODE = 2: statuses also have custom annotations
+// IREE_STATUS_MODE = 3: statuses also have stack traces of the error site
+
+// If no status mode override is provided we'll change the behavior based on
+// build configuration.
+#if !defined(IREE_STATUS_MODE)
+#ifdef NDEBUG
+// Release mode: just source location.
+#define IREE_STATUS_MODE 2
+#else
+// Debug mode: annotations and stack traces.
+#define IREE_STATUS_MODE 3
+#endif // NDEBUG
+#endif // !IREE_STATUS_MODE
+
+//===----------------------------------------------------------------------===//
+// Synchronization and threading
+//===----------------------------------------------------------------------===//
+// On ultra-tiny systems where there may only be a single core - or a single
+// core that is guaranteed to ever call an IREE API - all synchronization
+// primitives used throughout IREE can be turned into no-ops. Note that behavior
+// is undefined if there is use of any `iree_*` API call or memory that is
+// owned by IREE from multiple threads concurrently or across threads without
+// proper barriers in place. Unless your target system is in a similar class to
+// an Arduino this is definitely not what you want.
+
+#if !defined(IREE_SYNCHRONIZATION_DISABLE_UNSAFE)
+#define IREE_SYNCHRONIZATION_DISABLE_UNSAFE 0
+#endif // !IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+
+//===----------------------------------------------------------------------===//
+// File I/O
+//===----------------------------------------------------------------------===//
+// On platforms without file systems or in applications where no file I/O
+// utilties are used, all file I/O operations can be stripped out. Functions
+// relying on file I/O will still be defined, but they will return errors.
+
+#if !defined(IREE_FILE_IO_ENABLE)
+#define IREE_FILE_IO_ENABLE 1
+#endif // !IREE_FILE_IO_ENABLE
+
+//===----------------------------------------------------------------------===//
+// Statistics/reporting
+//===----------------------------------------------------------------------===//
+// Conditionally enables programmatic access to aggregate statistics. When
+// enabled statistics requires additional per-operation logic and per-resource
+// state that can bloat otherwise minimal structures. Shared resources may also
+// require synchronization where there otherwise would not be any.
+
+#if !defined(IREE_STATISTICS_ENABLE)
+#define IREE_STATISTICS_ENABLE 1
+#endif // !IREE_STATISTICS_ENABLE
+
+//===----------------------------------------------------------------------===//
+// IREE HAL configuration
+//===----------------------------------------------------------------------===//
+// Enables optional HAL features. Each of these may add several KB to the final
+// binary when linked dynamically.
+
+#if !defined(IREE_HAL_HEAP_BUFFER_ALIGNMENT)
+// Power of two byte alignment required on all host heap buffers.
+// Executables are compiled with alignment expectations and the runtime
+// alignment must be greater than or equal to the alignment set in the compiler.
+// External buffers wrapped by HAL buffers must meet this alignment requirement.
+#define IREE_HAL_HEAP_BUFFER_ALIGNMENT 64
+#endif // IREE_HAL_HEAP_BUFFER_ALIGNMENT
+
+#if !defined(IREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE)
+// Enables additional validation of commands issued against command buffers.
+// This adds small amounts of per-command overhead but in all but the most
+// constrained environments it's recommended to keep it enabled in order to get
+// the really nice error messages.
+#define IREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE 1
+#endif // IREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE
+
+#if !defined(IREE_HAL_MODULE_STRING_UTIL_ENABLE)
+// Enables HAL module methods that perform string printing/parsing.
+// This functionality pulls in a large amount of string manipulation code that
+// can be elided if these ops will not be used at runtime. When disabled
+// applications can still call the parse/print routines directly but compiled
+// modules can not.
+#define IREE_HAL_MODULE_STRING_UTIL_ENABLE 1
+#endif // IREE_HAL_MODULE_STRING_UTIL_ENABLE
+
+//===----------------------------------------------------------------------===//
+// IREE VM configuration
+//===----------------------------------------------------------------------===//
+// Enables optional VM features. Each of these adds a few KB to the final binary
+// when using the IREE VM. The compiler must be configured to the same set of
+// available extensions in order to ensure that the compiled modules only use
+// features available on the target they are to run on.
+//
+// See the `-iree-vm-target-extension-*` compiler options for more information.
+
+#if !defined(IREE_VM_BACKTRACE_ENABLE)
+// Enables backtraces in VM failures when debugging information is available.
+#define IREE_VM_BACKTRACE_ENABLE 1
+#endif // !IREE_VM_BACKTRACE_ENABLE
+
+#if !defined(IREE_VM_EXECUTION_TRACING_ENABLE)
+// Enables disassembly of vm bytecode functions and stderr dumping of execution.
+// Increases code size quite, lowers VM performance, and is generally unsafe;
+// include only when debugging or running on trusted inputs.
+#ifdef NDEBUG
+#define IREE_VM_EXECUTION_TRACING_ENABLE 0
+#else
+#define IREE_VM_EXECUTION_TRACING_ENABLE 1
+#endif // NDEBUG
+#endif // !IREE_VM_EXECUTION_TRACING_ENABLE
+
+#if !defined(IREE_VM_EXECUTION_TRACING_FORCE_ENABLE)
+// Forces tracing of VM execution by default ignoring runtime flags that may
+// otherwise control the behavior. This can be used to enable tracing in tools
+// that do not have flag parsing or plumbing for per-invocation flags.
+#define IREE_VM_EXECUTION_TRACING_FORCE_ENABLE 0
+#endif // !IREE_VM_EXECUTION_TRACING_FORCE_ENABLE
+#if IREE_VM_EXECUTION_TRACING_FORCE_ENABLE
+#define IREE_VM_EXECUTION_TRACING_ENABLE 1
+#endif // IREE_VM_EXECUTION_TRACING_FORCE_ENABLE
+
+#if !defined(IREE_VM_EXECUTION_TRACING_SRC_LOC_ENABLE)
+// Enables printing of the source location of an op when tracing its execution.
+// This may be messy depending on the origin of the locations in the program;
+// for example today the python locs are entire stack traces. Improvements to
+// printing of more complex source locations (or a way to prune them in the
+// compiler) would let this be turned on by default.
+#define IREE_VM_EXECUTION_TRACING_SRC_LOC_ENABLE 0
+#endif // !IREE_VM_EXECUTION_TRACING_SRC_LOC_ENABLE
+
+#if !defined(IREE_VM_EXT_I64_ENABLE)
+// Enables the 64-bit integer instruction extension.
+// Targeted from the compiler with `-iree-vm-target-extension-i64`.
+#define IREE_VM_EXT_I64_ENABLE 1
+#endif // !IREE_VM_EXT_I64_ENABLE
+
+#if !defined(IREE_VM_EXT_F32_ENABLE)
+// Enables the 32-bit floating-point instruction extension.
+// Targeted from the compiler with `-iree-vm-target-extension-f32`.
+#define IREE_VM_EXT_F32_ENABLE 1
+#endif // !IREE_VM_EXT_F32_ENABLE
+
+#if !defined(IREE_VM_EXT_F64_ENABLE)
+// Enables the 64-bit floating-point instruction extension.
+// Targeted from the compiler with `-iree-vm-target-extension-f64`.
+#define IREE_VM_EXT_F64_ENABLE 0
+#endif // !IREE_VM_EXT_F64_ENABLE
+
+#endif // IREE_BASE_CONFIG_H_
diff --git a/runtime/src/iree/base/internal/BUILD b/runtime/src/iree/base/internal/BUILD
new file mode 100644
index 0000000..725bc0d
--- /dev/null
+++ b/runtime/src/iree/base/internal/BUILD
@@ -0,0 +1,414 @@
+# Copyright 2019 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Implementations for iree/base/.
+# These are not part of the IREE API. Though they may be used by external
+# projects their API may change at any time.
+
+load("//iree:build_defs.oss.bzl", "iree_cmake_extra_content", "iree_runtime_cc_library", "iree_runtime_cc_test")
+load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")
+load("//build_tools/bazel:cc_binary_benchmark.bzl", "cc_binary_benchmark")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+#===------------------------------------------------------------------------===#
+# Core headers (internal)
+#===------------------------------------------------------------------------===#
+# Put files here that large percentages of the code include only; adding
+# infrequently used files here will cause extraneous large rebuilds.
+
+iree_runtime_cc_library(
+ name = "internal",
+ srcs = [
+ "atomics_clang.h",
+ "atomics_disabled.h",
+ "atomics_gcc.h",
+ "atomics_msvc.h",
+ ],
+ hdrs = [
+ "atomics.h",
+ "debugging.h",
+ "inline_array.h",
+ "math.h",
+ ],
+ deps = [
+ "//runtime/src/iree/base:core_headers",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "atomics_test",
+ srcs = ["atomics_test.cc"],
+ deps = [
+ ":internal",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "math_test",
+ srcs = ["math_test.cc"],
+ deps = [
+ ":internal",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+#===------------------------------------------------------------------------===#
+# Utilities
+#===------------------------------------------------------------------------===#
+
+iree_runtime_cc_library(
+ name = "arena",
+ srcs = ["arena.c"],
+ hdrs = ["arena.h"],
+ deps = [
+ ":atomic_slist",
+ ":synchronization",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "atomic_slist",
+ srcs = ["atomic_slist.c"],
+ hdrs = ["atomic_slist.h"],
+ deps = [
+ ":internal",
+ ":synchronization",
+ "//runtime/src/iree/base:core_headers",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "atomic_slist_test",
+ srcs = ["atomic_slist_test.cc"],
+ deps = [
+ ":atomic_slist",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "cpu",
+ srcs = ["cpu.c"],
+ hdrs = ["cpu.h"],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "dynamic_library",
+ srcs = [
+ "dynamic_library_posix.c",
+ "dynamic_library_win32.c",
+ ],
+ hdrs = ["dynamic_library.h"],
+ deps = [
+ ":file_path",
+ ":internal",
+ ":synchronization",
+ "//build_tools:default_linkopts",
+ "//build_tools:dl",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "file_io",
+ srcs = ["file_io.c"],
+ hdrs = ["file_io.h"],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "file_io_test",
+ srcs = ["file_io_test.cc"],
+ deps = [
+ ":file_io",
+ "//runtime/src/iree/base:cc",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:logging",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "file_path",
+ srcs = ["file_path.c"],
+ hdrs = ["file_path.h"],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "file_path_test",
+ srcs = [
+ "file_path_test.cc",
+ ],
+ deps = [
+ ":file_path",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "flags",
+ srcs = ["flags.c"],
+ hdrs = ["flags.h"],
+ deps = [
+ ":file_io",
+ ":internal",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:tracing",
+ ],
+)
+
+cc_binary(
+ name = "flags_demo",
+ srcs = ["flags_demo.c"],
+ deps = [
+ ":flags",
+ "//runtime/src/iree/base",
+ ],
+)
+
+iree_lit_test_suite(
+ name = "flags_test",
+ srcs = ["flags_test.txt"],
+ cfg = "//runtime:lit.cfg.py",
+ tags = ["hostonly"],
+ tools = [
+ ":flags_demo",
+ "@llvm-project//llvm:FileCheck",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "fpu_state",
+ srcs = ["fpu_state.c"],
+ hdrs = ["fpu_state.h"],
+ deps = [
+ ":internal",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ ],
+)
+
+cc_binary_benchmark(
+ name = "fpu_state_benchmark",
+ srcs = ["fpu_state_benchmark.cc"],
+ deps = [
+ ":fpu_state",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/testing:benchmark_main",
+ "@com_google_benchmark//:benchmark",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "fpu_state_test",
+ srcs = ["fpu_state_test.cc"],
+ deps = [
+ ":fpu_state",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "main",
+ srcs = [
+ "main_posix.c",
+ "main_win32.c",
+ ],
+ hdrs = ["main.h"],
+ deps = [
+ "//runtime/src/iree/base:core_headers",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "prng",
+ hdrs = ["prng.h"],
+ deps = [
+ ":internal",
+ "//runtime/src/iree/base:core_headers",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "prng_test",
+ srcs = ["prng_test.cc"],
+ deps = [
+ ":prng",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "span",
+ hdrs = ["span.h"],
+)
+
+iree_runtime_cc_library(
+ name = "synchronization",
+ srcs = [
+ "synchronization.c",
+ ],
+ hdrs = [
+ "call_once.h",
+ "synchronization.h",
+ ],
+ deps = [
+ ":internal",
+ "//build_tools:default_linkopts",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ ],
+)
+
+cc_binary_benchmark(
+ name = "synchronization_benchmark",
+ testonly = True,
+ srcs = ["synchronization_benchmark.cc"],
+ deps = [
+ ":synchronization",
+ "//runtime/src/iree/testing:benchmark_main",
+ "@com_google_benchmark//:benchmark",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "synchronization_test",
+ srcs = ["synchronization_test.cc"],
+ deps = [
+ ":synchronization",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "wait_handle",
+ srcs = [
+ "wait_handle.c",
+ "wait_handle_epoll.c",
+ "wait_handle_impl.h",
+ "wait_handle_inproc.c",
+ "wait_handle_kqueue.c",
+ "wait_handle_null.c",
+ "wait_handle_poll.c",
+ "wait_handle_posix.c",
+ "wait_handle_posix.h",
+ "wait_handle_win32.c",
+ ],
+ hdrs = ["wait_handle.h"],
+ deps = [
+ ":synchronization",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "wait_handle_test",
+ srcs = ["wait_handle_test.cc"],
+ deps = [
+ ":wait_handle",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+#===------------------------------------------------------------------------===#
+# Utilities with thread dependencies
+#===------------------------------------------------------------------------===#
+
+iree_cmake_extra_content(
+ content = """
+if(NOT ${IREE_ENABLE_THREADING})
+ return()
+endif()
+""",
+ inline = True,
+)
+
+iree_runtime_cc_library(
+ name = "event_pool",
+ srcs = ["event_pool.c"],
+ hdrs = ["event_pool.h"],
+ deps = [
+ ":internal",
+ ":synchronization",
+ ":wait_handle",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "threading",
+ srcs = [
+ "threading.c",
+ "threading_darwin.c",
+ "threading_impl.h",
+ "threading_pthreads.c",
+ "threading_win32.c",
+ ],
+ hdrs = ["threading.h"],
+ deps = [
+ ":internal",
+ ":synchronization",
+ "//build_tools:default_linkopts",
+ "//build_tools:dl",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "threading_test",
+ srcs = [
+ "threading_impl.h",
+ "threading_test.cc",
+ ],
+ deps = [
+ ":internal",
+ ":synchronization",
+ ":threading",
+ "//runtime/src/iree/base:cc",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
diff --git a/runtime/src/iree/base/internal/CMakeLists.txt b/runtime/src/iree/base/internal/CMakeLists.txt
new file mode 100644
index 0000000..d0e59ca
--- /dev/null
+++ b/runtime/src/iree/base/internal/CMakeLists.txt
@@ -0,0 +1,430 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/base/internal/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ internal
+ HDRS
+ "atomics.h"
+ "debugging.h"
+ "inline_array.h"
+ "math.h"
+ SRCS
+ "atomics_clang.h"
+ "atomics_disabled.h"
+ "atomics_gcc.h"
+ "atomics_msvc.h"
+ DEPS
+ iree::base::core_headers
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ atomics_test
+ SRCS
+ "atomics_test.cc"
+ DEPS
+ ::internal
+ iree::base::core_headers
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_test(
+ NAME
+ math_test
+ SRCS
+ "math_test.cc"
+ DEPS
+ ::internal
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_library(
+ NAME
+ arena
+ HDRS
+ "arena.h"
+ SRCS
+ "arena.c"
+ DEPS
+ ::atomic_slist
+ ::synchronization
+ iree::base
+ iree::base::core_headers
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ atomic_slist
+ HDRS
+ "atomic_slist.h"
+ SRCS
+ "atomic_slist.c"
+ DEPS
+ ::internal
+ ::synchronization
+ iree::base::core_headers
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ atomic_slist_test
+ SRCS
+ "atomic_slist_test.cc"
+ DEPS
+ ::atomic_slist
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_library(
+ NAME
+ cpu
+ HDRS
+ "cpu.h"
+ SRCS
+ "cpu.c"
+ DEPS
+ iree::base
+ iree::base::core_headers
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ dynamic_library
+ HDRS
+ "dynamic_library.h"
+ SRCS
+ "dynamic_library_posix.c"
+ "dynamic_library_win32.c"
+ DEPS
+ ${CMAKE_DL_LIBS}
+ ::file_path
+ ::internal
+ ::synchronization
+ iree::base::core_headers
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ file_io
+ HDRS
+ "file_io.h"
+ SRCS
+ "file_io.c"
+ DEPS
+ iree::base
+ iree::base::core_headers
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ file_io_test
+ SRCS
+ "file_io_test.cc"
+ DEPS
+ ::file_io
+ iree::base::cc
+ iree::base::core_headers
+ iree::base::logging
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_library(
+ NAME
+ file_path
+ HDRS
+ "file_path.h"
+ SRCS
+ "file_path.c"
+ DEPS
+ iree::base
+ iree::base::core_headers
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ file_path_test
+ SRCS
+ "file_path_test.cc"
+ DEPS
+ ::file_path
+ iree::base::core_headers
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_library(
+ NAME
+ flags
+ HDRS
+ "flags.h"
+ SRCS
+ "flags.c"
+ DEPS
+ ::file_io
+ ::internal
+ iree::base
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_binary(
+ NAME
+ flags_demo
+ SRCS
+ "flags_demo.c"
+ DEPS
+ ::flags
+ iree::base
+)
+
+iree_lit_test_suite(
+ NAME
+ flags_test
+ SRCS
+ "flags_test.txt"
+ TOOLS
+ ::flags_demo
+ FileCheck
+ LABELS
+ "hostonly"
+)
+
+iree_cc_library(
+ NAME
+ fpu_state
+ HDRS
+ "fpu_state.h"
+ SRCS
+ "fpu_state.c"
+ DEPS
+ ::internal
+ iree::base
+ iree::base::core_headers
+ PUBLIC
+)
+
+iree_cc_binary_benchmark(
+ NAME
+ fpu_state_benchmark
+ SRCS
+ "fpu_state_benchmark.cc"
+ DEPS
+ ::fpu_state
+ benchmark
+ iree::base
+ iree::testing::benchmark_main
+ TESTONLY
+)
+
+iree_cc_test(
+ NAME
+ fpu_state_test
+ SRCS
+ "fpu_state_test.cc"
+ DEPS
+ ::fpu_state
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_library(
+ NAME
+ main
+ HDRS
+ "main.h"
+ SRCS
+ "main_posix.c"
+ "main_win32.c"
+ DEPS
+ iree::base::core_headers
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ prng
+ HDRS
+ "prng.h"
+ DEPS
+ ::internal
+ iree::base::core_headers
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ prng_test
+ SRCS
+ "prng_test.cc"
+ DEPS
+ ::prng
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_library(
+ NAME
+ span
+ HDRS
+ "span.h"
+ DEPS
+
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ synchronization
+ HDRS
+ "call_once.h"
+ "synchronization.h"
+ SRCS
+ "synchronization.c"
+ DEPS
+ ::internal
+ iree::base
+ iree::base::core_headers
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_binary_benchmark(
+ NAME
+ synchronization_benchmark
+ SRCS
+ "synchronization_benchmark.cc"
+ DEPS
+ ::synchronization
+ benchmark
+ iree::testing::benchmark_main
+ TESTONLY
+)
+
+iree_cc_test(
+ NAME
+ synchronization_test
+ SRCS
+ "synchronization_test.cc"
+ DEPS
+ ::synchronization
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_library(
+ NAME
+ wait_handle
+ HDRS
+ "wait_handle.h"
+ SRCS
+ "wait_handle.c"
+ "wait_handle_epoll.c"
+ "wait_handle_impl.h"
+ "wait_handle_inproc.c"
+ "wait_handle_kqueue.c"
+ "wait_handle_null.c"
+ "wait_handle_poll.c"
+ "wait_handle_posix.c"
+ "wait_handle_posix.h"
+ "wait_handle_win32.c"
+ DEPS
+ ::synchronization
+ iree::base
+ iree::base::core_headers
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ wait_handle_test
+ SRCS
+ "wait_handle_test.cc"
+ DEPS
+ ::wait_handle
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+if(NOT ${IREE_ENABLE_THREADING})
+ return()
+endif()
+
+iree_cc_library(
+ NAME
+ event_pool
+ HDRS
+ "event_pool.h"
+ SRCS
+ "event_pool.c"
+ DEPS
+ ::internal
+ ::synchronization
+ ::wait_handle
+ iree::base
+ iree::base::core_headers
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ threading
+ HDRS
+ "threading.h"
+ SRCS
+ "threading.c"
+ "threading_darwin.c"
+ "threading_impl.h"
+ "threading_pthreads.c"
+ "threading_win32.c"
+ DEPS
+ ${CMAKE_DL_LIBS}
+ ::internal
+ ::synchronization
+ iree::base
+ iree::base::core_headers
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ threading_test
+ SRCS
+ "threading_impl.h"
+ "threading_test.cc"
+ DEPS
+ ::internal
+ ::synchronization
+ ::threading
+ iree::base::cc
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/base/internal/arena.c b/runtime/src/iree/base/internal/arena.c
new file mode 100644
index 0000000..81853d4
--- /dev/null
+++ b/runtime/src/iree/base/internal/arena.c
@@ -0,0 +1,227 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/arena.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// iree_arena_block_pool_t
+//===----------------------------------------------------------------------===//
+
+void iree_arena_block_pool_initialize(iree_host_size_t total_block_size,
+ iree_allocator_t block_allocator,
+ iree_arena_block_pool_t* out_block_pool) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ memset(out_block_pool, 0, sizeof(*out_block_pool));
+ out_block_pool->total_block_size = total_block_size;
+ out_block_pool->usable_block_size =
+ total_block_size - sizeof(iree_arena_block_t);
+ out_block_pool->block_allocator = block_allocator;
+ iree_atomic_arena_block_slist_initialize(&out_block_pool->available_slist);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_arena_block_pool_deinitialize(iree_arena_block_pool_t* block_pool) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Since all blocks must have been released we can just reuse trim (today) as
+ // it doesn't retain any blocks.
+ iree_arena_block_pool_trim(block_pool);
+ iree_atomic_arena_block_slist_deinitialize(&block_pool->available_slist);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_arena_block_pool_trim(iree_arena_block_pool_t* block_pool) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_arena_block_t* head = NULL;
+ iree_atomic_arena_block_slist_flush(
+ &block_pool->available_slist,
+ IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_LIFO, &head, NULL);
+ while (head) {
+ void* ptr = (uint8_t*)head - block_pool->usable_block_size;
+ head = head->next;
+ iree_allocator_free(block_pool->block_allocator, ptr);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_arena_block_pool_acquire(iree_arena_block_pool_t* block_pool,
+ iree_arena_block_t** out_block) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_arena_block_t* block =
+ iree_atomic_arena_block_slist_pop(&block_pool->available_slist);
+
+ if (!block) {
+ // No blocks available; allocate one now.
+ // Note that it's possible for there to be a race here where one thread
+ // releases a block to the pool while we are trying to acquire one - in that
+ // case we may end up allocating a block when perhaps we didn't need to but
+ // that's fine - it's just one block and the contention means there's likely
+ // to be a need for more anyway.
+ uint8_t* block_base = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc_uninitialized(block_pool->block_allocator,
+ block_pool->total_block_size,
+ (void**)&block_base));
+ block = (iree_arena_block_t*)(block_base + block_pool->usable_block_size);
+ }
+
+ block->next = NULL;
+ *out_block = block;
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+void iree_arena_block_pool_release(iree_arena_block_pool_t* block_pool,
+ iree_arena_block_t* block_head,
+ iree_arena_block_t* block_tail) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_atomic_arena_block_slist_concat(&block_pool->available_slist, block_head,
+ block_tail);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_arena_allocator_t
+//===----------------------------------------------------------------------===//
+
+void iree_arena_initialize(iree_arena_block_pool_t* block_pool,
+ iree_arena_allocator_t* out_arena) {
+ memset(out_arena, 0, sizeof(*out_arena));
+ out_arena->block_pool = block_pool;
+}
+
+void iree_arena_deinitialize(iree_arena_allocator_t* arena) {
+ iree_arena_reset(arena);
+}
+
+void iree_arena_reset(iree_arena_allocator_t* arena) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ if (arena->allocation_head != NULL) {
+ iree_arena_oversized_allocation_t* head = arena->allocation_head;
+ do {
+ void* ptr = (void*)head;
+ head = head->next;
+ iree_allocator_free(arena->block_pool->block_allocator, ptr);
+ } while (head);
+ arena->allocation_head = NULL;
+ }
+ if (arena->block_head != NULL) {
+ iree_arena_block_pool_release(arena->block_pool, arena->block_head,
+ arena->block_tail);
+ arena->block_head = NULL;
+ arena->block_tail = NULL;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_arena_allocate(iree_arena_allocator_t* arena,
+ iree_host_size_t byte_length,
+ void** out_ptr) {
+ *out_ptr = NULL;
+
+ iree_arena_block_pool_t* block_pool = arena->block_pool;
+
+ if (byte_length > block_pool->usable_block_size) {
+ // Oversized allocation that can't be handled by the block pool. We'll
+ // allocate directly from the system allocator and track it ourselves for
+ // freeing during reset.
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_host_size_t allocation_size =
+ sizeof(iree_arena_oversized_allocation_t) + byte_length;
+ iree_arena_oversized_allocation_t* allocation = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_allocator_malloc_uninitialized(
+ block_pool->block_allocator, allocation_size, (void**)&allocation));
+ allocation->next = arena->allocation_head;
+ arena->allocation_head = allocation;
+ arena->total_allocation_size += allocation_size;
+ arena->used_allocation_size += byte_length;
+ *out_ptr = (uint8_t*)allocation + sizeof(iree_arena_oversized_allocation_t);
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+ }
+
+ // Pad length allocated so that each pointer bump is always ending at an
+ // aligned address and the next allocation will start aligned.
+ iree_host_size_t aligned_length =
+ iree_host_align(byte_length, iree_max_align_t);
+
+ // Check to see if the current block (if any) has space - if not, get another.
+ if (arena->block_head == NULL ||
+ arena->block_bytes_remaining < aligned_length) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_arena_block_t* block = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_arena_block_pool_acquire(arena->block_pool, &block));
+ block->next = arena->block_head;
+ arena->block_head = block;
+ if (!arena->block_tail) arena->block_tail = block;
+ arena->total_allocation_size += block_pool->total_block_size;
+ arena->block_bytes_remaining = block_pool->usable_block_size;
+ IREE_TRACE_ZONE_END(z0);
+ }
+
+ // Slice out the allocation from the current block.
+ void* ptr = (uint8_t*)arena->block_head - arena->block_bytes_remaining;
+ arena->block_bytes_remaining -= aligned_length;
+ arena->used_allocation_size += aligned_length;
+ *out_ptr = ptr;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_arena_allocator_ctl(void* self,
+ iree_allocator_command_t command,
+ const void* params,
+ void** inout_ptr) {
+ iree_arena_allocator_t* arena = (iree_arena_allocator_t*)self;
+ switch (command) {
+ case IREE_ALLOCATOR_COMMAND_MALLOC:
+ case IREE_ALLOCATOR_COMMAND_CALLOC: {
+ const iree_allocator_alloc_params_t* alloc_params =
+ (const iree_allocator_alloc_params_t*)params;
+ IREE_RETURN_IF_ERROR(
+ iree_arena_allocate(arena, alloc_params->byte_length, inout_ptr));
+ if (command == IREE_ALLOCATOR_COMMAND_CALLOC) {
+ memset(*inout_ptr, 0, alloc_params->byte_length);
+ }
+ return iree_ok_status();
+ }
+ case IREE_ALLOCATOR_COMMAND_FREE: {
+ // Do nothing: can't free from an arena.
+ return iree_ok_status();
+ }
+ default:
+ // NOTE: we could try to support IREE_ALLOCATOR_COMMAND_REALLOC, but
+ // it requires the original size to be able to do properly (without
+ // copying memory we shouldn't have access to). For this and other reasons
+ // we very rarely realloc in IREE so having this limitation isn't too bad.
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unsupported iree_arena_t allocator command");
+ }
+}
+
+iree_allocator_t iree_arena_allocator(iree_arena_allocator_t* arena) {
+ iree_allocator_t v = {
+ .self = arena,
+ .ctl = iree_arena_allocator_ctl,
+ };
+ return v;
+}
diff --git a/runtime/src/iree/base/internal/arena.h b/runtime/src/iree/base/internal/arena.h
new file mode 100644
index 0000000..1d0afae
--- /dev/null
+++ b/runtime/src/iree/base/internal/arena.h
@@ -0,0 +1,153 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_ARENA_H_
+#define IREE_BASE_INTERNAL_ARENA_H_
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/atomic_slist.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_arena_block_pool_t
+//===----------------------------------------------------------------------===//
+
+struct iree_arena_block_t;
+
+// NOTE: this struct is at the *end* of allocated blocks such that we don't mess
+// with alignment - byte 0 of a block is always byte 0 of the allocation from
+// the system. We can do this as all blocks have the same size so computing the
+// footer offset from a pointer is easy.
+typedef struct iree_arena_block_t {
+ struct iree_arena_block_t* next;
+} iree_arena_block_t;
+
+// An atomic approximately LIFO singly-linked list.
+IREE_TYPED_ATOMIC_SLIST_WRAPPER(iree_atomic_arena_block, iree_arena_block_t,
+ offsetof(iree_arena_block_t, next));
+
+// A simple atomic fixed-size block pool.
+// Blocks are allocated from the system as required and kept in the pool to
+// satisfy future requests. Blocks are all of a uniform size specified when the
+// pool is created. It's recommended that power-of-two sizes are used for the
+// blocks so that the underlying allocator is more likely to bucket them
+// appropriately.
+//
+// Thread-safe; multiple threads may acquire and release blocks from the pool.
+// The underlying allocator must also be thread-safe.
+typedef struct iree_arena_block_pool_t {
+ // Block size, in bytes. All blocks in the available_slist will have this
+ // byte size which includes the iree_arena_block_t footer.
+ iree_host_size_t total_block_size;
+ // Block size, in bytes, of the usable bytes within a block.
+ iree_host_size_t usable_block_size;
+ // Allocator used for allocating/freeing each allocation block.
+ iree_allocator_t block_allocator;
+ // Linked list of free blocks (LIFO).
+ iree_atomic_arena_block_slist_t available_slist;
+} iree_arena_block_pool_t;
+
+// Initializes a new block pool in |out_block_pool|.
+// |block_allocator| will be used to allocate and free blocks for the pool.
+// Each block allocated will be |total_block_size| but have a slightly smaller
+// usable size due to the tracking overhead. Prefer powers of two.
+void iree_arena_block_pool_initialize(iree_host_size_t total_block_size,
+ iree_allocator_t block_allocator,
+ iree_arena_block_pool_t* out_block_pool);
+
+// Deinitializes a block pool and frees all allocations.
+// All blocks that were acquired from the pool must have already been released
+// back to it.
+void iree_arena_block_pool_deinitialize(iree_arena_block_pool_t* block_pool);
+
+// Trims the pool by freeing unused blocks back to the allocator.
+// Acquired blocks are not freed and remain valid.
+void iree_arena_block_pool_trim(iree_arena_block_pool_t* block_pool);
+
+// Acquires a single block from the pool and returns it in |out_block|.
+// The block may be either a new allocation with undefined contents or a reused
+// prior allocation with undefined contents.
+iree_status_t iree_arena_block_pool_acquire(iree_arena_block_pool_t* block_pool,
+ iree_arena_block_t** out_block);
+
+// Releases one or more blocks back to the block pool.
+// Any blocks chained in |block_head| will also be released allowing for
+// low-overhead resets when the blocks are already tracked in linked lists.
+void iree_arena_block_pool_release(iree_arena_block_pool_t* block_pool,
+ iree_arena_block_t* block_head,
+ iree_arena_block_t* block_tail);
+
+//===----------------------------------------------------------------------===//
+// iree_arena_allocator_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_arena_oversized_allocation_t {
+ struct iree_arena_oversized_allocation_t* next;
+} iree_arena_oversized_allocation_t;
+
+// A lightweight bump-pointer arena allocator using a shared block pool.
+// As allocations are made from the arena and block capacity is exhausted new
+// blocks will be acquired from the pool. Upon being reset all blocks will be
+// released back to the pool for reuse by either the same arena in the future or
+// other arenas sharing the same pool.
+//
+// The size of each allocated block used by the arena is inherited from the
+// block pool. Allocations from the arena may exceed the block size but will
+// incur additional allocation overhead as the block pool is bypassed and the
+// system allocator is directly used to service the request.
+//
+// Thread-compatible; the shared block pool is thread-safe and may be used by
+// arenas on multiple threads but each arena must only be used by a single
+// thread.
+typedef struct iree_arena_allocator_t {
+ // Fixed-size block pool used to acquire new blocks for the arena.
+ iree_arena_block_pool_t* block_pool;
+ // Total bytes allocated to the arena from the block pool or system allocator.
+ iree_host_size_t total_allocation_size;
+ // Total bytes allocated from the arena; the utilization of the arena can be
+ // checked with `used_allocation_size / total_allocation_size`.
+ iree_host_size_t used_allocation_size;
+ // Linked list of oversized allocations made directly from the system
+ // allocator used by the block pool.
+ iree_arena_oversized_allocation_t* allocation_head;
+ // Linked list of allocated blocks maintained so that reset can release them.
+ iree_arena_block_t* block_head;
+ iree_arena_block_t* block_tail;
+ // The number of bytes remaining in the block pointed to by block_head.
+ iree_host_size_t block_bytes_remaining;
+} iree_arena_allocator_t;
+
+// Initializes an arena that will use |block_pool| for allocating blocks as
+// needed.
+void iree_arena_initialize(iree_arena_block_pool_t* block_pool,
+ iree_arena_allocator_t* out_arena);
+
+// Deinitializes the arena and returns allocated blocks to the parent pool.
+void iree_arena_deinitialize(iree_arena_allocator_t* arena);
+
+// Resets the entire arena and returns allocated blocks to the parent pool.
+void iree_arena_reset(iree_arena_allocator_t* arena);
+
+// Allocates |byte_length| contiguous bytes from the arena.
+// The returned bytes will have undefined contents and must be initialized by
+// the caller.
+iree_status_t iree_arena_allocate(iree_arena_allocator_t* arena,
+ iree_host_size_t byte_length, void** out_ptr);
+
+// Returns an iree_allocator_t that allocates from the given |arena|.
+// Frees are ignored as arenas can only be reset as a whole.
+iree_allocator_t iree_arena_allocator(iree_arena_allocator_t* arena);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_INTERNAL_ARENA_H_
diff --git a/runtime/src/iree/base/internal/atomic_slist.c b/runtime/src/iree/base/internal/atomic_slist.c
new file mode 100644
index 0000000..3f4a27b
--- /dev/null
+++ b/runtime/src/iree/base/internal/atomic_slist.c
@@ -0,0 +1,111 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/atomic_slist.h"
+
+#include <string.h>
+
+#include "iree/base/attributes.h"
+
+// TODO(benvanik): add TSAN annotations when switched to atomics:
+// https://github.com/gcc-mirror/gcc/blob/master/libsanitizer/include/sanitizer/tsan_interface_atomic.h
+// https://reviews.llvm.org/D18500
+
+void iree_atomic_slist_initialize(iree_atomic_slist_t* out_list) {
+ memset(out_list, 0, sizeof(*out_list));
+ iree_slim_mutex_initialize(&out_list->mutex);
+}
+
+void iree_atomic_slist_deinitialize(iree_atomic_slist_t* list) {
+ // TODO(benvanik): assert empty.
+ iree_slim_mutex_deinitialize(&list->mutex);
+ memset(list, 0, sizeof(*list));
+}
+
+void iree_atomic_slist_concat(iree_atomic_slist_t* list,
+ iree_atomic_slist_entry_t* head,
+ iree_atomic_slist_entry_t* tail) {
+ if (IREE_UNLIKELY(!head)) return;
+ iree_slim_mutex_lock(&list->mutex);
+ tail->next = list->head;
+ list->head = head;
+ iree_slim_mutex_unlock(&list->mutex);
+}
+
+void iree_atomic_slist_push(iree_atomic_slist_t* list,
+ iree_atomic_slist_entry_t* entry) {
+ iree_slim_mutex_lock(&list->mutex);
+ iree_atomic_slist_push_unsafe(list, entry);
+ iree_slim_mutex_unlock(&list->mutex);
+}
+
+void iree_atomic_slist_push_unsafe(iree_atomic_slist_t* list,
+ iree_atomic_slist_entry_t* entry) {
+ // NOTE: no lock is held here and no atomic operation will be used when this
+ // is actually made atomic.
+ entry->next = list->head;
+ list->head = entry;
+}
+
+iree_atomic_slist_entry_t* iree_atomic_slist_pop(iree_atomic_slist_t* list) {
+ iree_slim_mutex_lock(&list->mutex);
+ iree_atomic_slist_entry_t* entry = list->head;
+ if (entry != NULL) {
+ list->head = entry->next;
+ entry->next = NULL;
+ }
+ iree_slim_mutex_unlock(&list->mutex);
+ return entry;
+}
+
+bool iree_atomic_slist_flush(iree_atomic_slist_t* list,
+ iree_atomic_slist_flush_order_t flush_order,
+ iree_atomic_slist_entry_t** out_head,
+ iree_atomic_slist_entry_t** out_tail) {
+ // Exchange list head with NULL to steal the entire list. The list will be in
+ // the native LIFO order of the slist.
+ iree_slim_mutex_lock(&list->mutex);
+ iree_atomic_slist_entry_t* head = list->head;
+ list->head = NULL;
+ iree_slim_mutex_unlock(&list->mutex);
+ if (!head) return false;
+
+ switch (flush_order) {
+ case IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_LIFO: {
+ // List is already in native LIFO order. If the user wants a tail we have
+ // to scan for it, though, which we really only want to do when required
+ // as it's a linked list pointer walk.
+ *out_head = head;
+ if (out_tail) {
+ iree_atomic_slist_entry_t* p = head;
+ while (p->next) p = p->next;
+ *out_tail = p;
+ }
+ break;
+ }
+ case IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_FIFO: {
+ // Reverse the list in a single scan. list_head is our tail, so scan
+ // forward to find our head. Since we have to walk the whole list anyway
+ // we can cheaply give both the head and tail to the caller.
+ iree_atomic_slist_entry_t* tail = head;
+ if (out_tail) *out_tail = tail;
+ iree_atomic_slist_entry_t* p = head;
+ do {
+ iree_atomic_slist_entry_t* next = p->next;
+ p->next = head;
+ head = p;
+ p = next;
+ } while (p != NULL);
+ tail->next = NULL;
+ *out_head = head;
+ break;
+ }
+ default:
+ return false;
+ }
+
+ return true;
+}
diff --git a/runtime/src/iree/base/internal/atomic_slist.h b/runtime/src/iree/base/internal/atomic_slist.h
new file mode 100644
index 0000000..eaf852c
--- /dev/null
+++ b/runtime/src/iree/base/internal/atomic_slist.h
@@ -0,0 +1,257 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// NOTE: the best kind of synchronization is no synchronization; always try to
+// design your algorithm so that you don't need anything from this file :)
+// See https://travisdowns.github.io/blog/2020/07/06/concurrency-costs.html
+
+#ifndef IREE_BASE_INTERNAL_ATOMIC_SLIST_H_
+#define IREE_BASE_INTERNAL_ATOMIC_SLIST_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/alignment.h"
+#include "iree/base/internal/atomics.h"
+#include "iree/base/internal/synchronization.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// The embedded pointer to the next entry in the slist. This points to the
+// internal iree_atomic_slist_entry_t, *not* the user-provided pointer.
+typedef void* iree_atomic_slist_intrusive_ptr_t;
+
+// DO NOT USE: implementation detail.
+typedef struct iree_atomic_slist_entry_t {
+ struct iree_atomic_slist_entry_t* next;
+} iree_atomic_slist_entry_t;
+
+// Lightweight contention-avoiding singly linked list.
+// This models optimistically-ordered LIFO behavior (stack push/pop) using
+// atomic primitives.
+//
+// ***************************************************
+// ******** ONLY APPROXIMATE ORDER GUARANTEES ********
+// ***************************************************
+//
+// This makes it extremely efficient for when only eventual consistency across
+// producers and consumers is required. The most common example is free lists
+// where all that matters is that entries make it into the list and not that
+// they have any particular order between them. Work queues where all tasks
+// within the queue are able to execute in any order like with wavefront-style
+// scheduling can also benefit from this relaxed behavior.
+//
+// If a strict ordering is required this can be used as a primitive to construct
+// a flat-combining data structure where data structure change requests are
+// published to this list and a combiner is chosen to land the published data in
+// an appropriate order:
+// http://people.csail.mit.edu/shanir/publications/Flat%20Combining%20SPAA%2010.pdf
+//
+// There's often still a benefit in unordered scenarios of having LIFO behavior
+// as it promotes cache-friendly small linked lists when there is a small number
+// of producers and consumers (1:1 is the best case), though as the producer and
+// consumer count increases the LIFO behavior can pessimize performance as there
+// is more contention for the list head pointer. Prefer to shard across multiple
+// per-core/thread lists and use techniques like flat-combining for the
+// cross-core/thread aggregation/sequencing.
+//
+// This API modeled roughly on the Windows SList type:
+// https://docs.microsoft.com/en-us/windows/win32/sync/interlocked-singly-linked-lists
+// which is roughly compatible with the Apple OSAtomic queue:
+// https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/OSAtomicEnqueue.3.html
+// https://opensource.apple.com/source/libplatform/libplatform-125/include/libkern/OSAtomicQueue.h.auto.html
+//
+// Usage:
+// https://docs.microsoft.com/en-us/windows/win32/sync/using-singly-linked-lists
+//
+// WARNING: this is an extremely sharp pufferfish-esque API. Don't use it. 🐡
+//
+// TODO(benvanik): verify behavior (and worthwhileness) of supporting platform
+// primitives. The benefit of something like OSAtomicEnqueue/Dequeue is that it
+// may have better tooling (TSAN), special intrinsic handling in the compiler,
+// etc. That said, the Windows Interlocked* variants don't seem to. Having a
+// single heavily tested implementation seems more worthwhile than several.
+typedef iree_alignas(iree_max_align_t) struct {
+ // TODO(benvanik): spend some time golfing this. Unblocking myself for now :)
+ iree_slim_mutex_t mutex;
+ iree_atomic_slist_entry_t* head;
+} iree_atomic_slist_t;
+
+// Initializes an slist handle to an empty list.
+// Lists must be flushed to empty and deinitialized when no longer needed with
+// iree_atomic_slist_deinitialize.
+//
+// NOTE: not thread-safe; existing |out_list| contents are discarded.
+void iree_atomic_slist_initialize(iree_atomic_slist_t* out_list);
+
+// Deinitializes an slist.
+// The list must be empty; callers are expected to flush the list from the same
+// thread making this call when it is guaranteed no other thread may be trying
+// to use the list.
+//
+// NOTE: not thread-safe; |list| must not be used by any other thread.
+void iree_atomic_slist_deinitialize(iree_atomic_slist_t* list);
+
+// Concatenates a span of entries into the list in the order they are provided.
+//
+// Example:
+// existing slist: C B A
+// provided span: 1 2 3
+// resulting slist: 1 2 3 C B A
+void iree_atomic_slist_concat(iree_atomic_slist_t* list,
+ iree_atomic_slist_entry_t* head,
+ iree_atomic_slist_entry_t* tail);
+
+// Pushes an entry into the list.
+//
+// existing slist: C B A
+// provided entry: 1
+// resulting slist: 1 C B A
+void iree_atomic_slist_push(iree_atomic_slist_t* list,
+ iree_atomic_slist_entry_t* entry);
+
+// Pushes an entry into the list without using an atomic update.
+// This is useful for when |list| is known to be inaccessible to any other
+// thread, such as when populating a stack-local list prior to sharing it.
+void iree_atomic_slist_push_unsafe(iree_atomic_slist_t* list,
+ iree_atomic_slist_entry_t* entry);
+
+// Pops the most recently pushed entry from the list and returns it.
+// Returns NULL if the list was empty at the time it was queried.
+//
+// existing slist: C B A
+// resulting slist: B A
+// returned entry: C
+iree_atomic_slist_entry_t* iree_atomic_slist_pop(iree_atomic_slist_t* list);
+
+// Defines the approximate order in which a span of flushed entries is returned.
+typedef enum iree_atomic_slist_flush_order_e {
+ // |out_head| and |out_tail| will be set to a span of the entries roughly in
+ // the order they were pushed to the list in LIFO (stack) order.
+ //
+ // Example:
+ // slist: C B A
+ // result: C B A (or when contended possibly C A B)
+ IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_LIFO = 0,
+ // |out_head| and |out_tail| will be set to the first and last entries
+ // pushed respectively, turning this LIFO slist into a FIFO queue.
+ //
+ // Example:
+ // slist: C B A
+ // result: A B C (or when contended possibly B A C)
+ IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_FIFO,
+} iree_atomic_slist_flush_order_t;
+
+// Removes all items from the list and returns them in **APPROXIMATELY** the
+// |flush_order| requested. As there are no order guarantees there may be slight
+// transpositions of entries that were pushed from multiple processors or even
+// interleaved entries within spans of entries pushed with
+// iree_atomic_slist_concat.
+//
+// If |out_tail| is not required it can be omitted and this may avoid the
+// need for the flush to walk the list and touch each entry.
+//
+// Returns true if any items were present and false if the output list is empty.
+// Note that because atomic data structures can race it's possible for there to
+// both be something in the list prior to this call and something in the list
+// after the call and yet the return can still be false.
+bool iree_atomic_slist_flush(iree_atomic_slist_t* list,
+ iree_atomic_slist_flush_order_t flush_order,
+ iree_atomic_slist_entry_t** out_head,
+ iree_atomic_slist_entry_t** out_tail);
+
+//==============================================================================
+// Typed wrapper generator for iree_atomic_slist_t
+//==============================================================================
+
+// Typed and named wrappers for making atomic slists easier to work with.
+//
+// Usage:
+// typedef struct {
+// int some_fields;
+// iree_atomic_slist_intrusive_ptr_t slist_next;
+// int more_fields;
+// } my_type_t;
+// IREE_TYPED_ATOMIC_SLIST_WRAPPER(my_type, my_type_t,
+// offsetof(my_type_t, slist_next));
+//
+// my_type_slist_t list;
+// my_type_slist_initialize(&list);
+// my_type_t* entry = allocate_my_type(123);
+// my_type_slist_push(&list, entry);
+// entry = my_type_slist_pop(&list);
+#define IREE_TYPED_ATOMIC_SLIST_WRAPPER(name, type, next_offset) \
+ static inline iree_atomic_slist_entry_t* name##_slist_entry_from_ptr( \
+ type* entry) { \
+ return entry \
+ ? ((iree_atomic_slist_entry_t*)((uint8_t*)entry + next_offset)) \
+ : NULL; \
+ } \
+ static inline type* name##_slist_entry_to_ptr( \
+ iree_atomic_slist_entry_t* entry) { \
+ return entry ? (type*)(((uint8_t*)entry) - next_offset) : NULL; \
+ } \
+ \
+ static inline type* name##_slist_get_next(type* entry) { \
+ if (!entry) return NULL; \
+ return name##_slist_entry_to_ptr( \
+ ((iree_atomic_slist_entry_t*)((uint8_t*)entry + next_offset))->next); \
+ } \
+ static inline void name##_slist_set_next(type* entry, type* next) { \
+ name##_slist_entry_from_ptr(entry)->next = \
+ name##_slist_entry_from_ptr(next); \
+ } \
+ \
+ typedef iree_alignas(iree_max_align_t) struct { \
+ iree_atomic_slist_t impl; \
+ } name##_slist_t; \
+ \
+ static inline void name##_slist_initialize(name##_slist_t* out_list) { \
+ iree_atomic_slist_initialize(&out_list->impl); \
+ } \
+ static inline void name##_slist_deinitialize(name##_slist_t* list) { \
+ iree_atomic_slist_deinitialize(&list->impl); \
+ } \
+ \
+ static inline void name##_slist_push(name##_slist_t* list, type* entry) { \
+ iree_atomic_slist_push(&list->impl, name##_slist_entry_from_ptr(entry)); \
+ } \
+ static inline void name##_slist_push_unsafe(name##_slist_t* list, \
+ type* entry) { \
+ iree_atomic_slist_push_unsafe(&list->impl, \
+ name##_slist_entry_from_ptr(entry)); \
+ } \
+ static inline void name##_slist_concat(name##_slist_t* list, type* head, \
+ type* tail) { \
+ iree_atomic_slist_concat(&list->impl, name##_slist_entry_from_ptr(head), \
+ name##_slist_entry_from_ptr(tail)); \
+ } \
+ static inline type* name##_slist_pop(name##_slist_t* list) { \
+ return name##_slist_entry_to_ptr(iree_atomic_slist_pop(&list->impl)); \
+ } \
+ \
+ static inline bool name##_slist_flush( \
+ name##_slist_t* list, iree_atomic_slist_flush_order_t flush_order, \
+ type** out_head, type** out_tail) { \
+ iree_atomic_slist_entry_t* head = NULL; \
+ iree_atomic_slist_entry_t* tail = NULL; \
+ if (!iree_atomic_slist_flush(&list->impl, flush_order, &head, \
+ out_tail ? &tail : NULL)) { \
+ return false; /* empty list */ \
+ } \
+ *out_head = name##_slist_entry_to_ptr(head); \
+ if (out_tail) *out_tail = name##_slist_entry_to_ptr(tail); \
+ return true; \
+ }
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_BASE_INTERNAL_ATOMIC_SLIST_H_
diff --git a/runtime/src/iree/base/internal/atomic_slist_test.cc b/runtime/src/iree/base/internal/atomic_slist_test.cc
new file mode 100644
index 0000000..120838c
--- /dev/null
+++ b/runtime/src/iree/base/internal/atomic_slist_test.cc
@@ -0,0 +1,185 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/atomic_slist.h"
+
+#include <vector>
+
+#include "iree/testing/gtest.h"
+
+namespace {
+
+struct dummy_entry_t {
+ // NOTE: we purposefully offset the entry pointer
+ size_t value = 0;
+ iree_atomic_slist_intrusive_ptr_t slist_next = NULL;
+};
+IREE_TYPED_ATOMIC_SLIST_WRAPPER(dummy, dummy_entry_t,
+ offsetof(dummy_entry_t, slist_next));
+
+std::vector<dummy_entry_t> MakeDummySListItems(size_t base_index,
+ size_t count) {
+ std::vector<dummy_entry_t> items(count);
+ for (size_t i = 0; i < count; ++i) {
+ items[i].value = base_index + i;
+ }
+ return items;
+}
+
+TEST(AtomicSList, Lifetime) {
+ iree_atomic_slist_t list; // NOTE: intentionally uninitialized.
+ iree_atomic_slist_initialize(&list);
+ iree_atomic_slist_deinitialize(&list);
+}
+
+TEST(AtomicSList, BasicUsage) {
+ dummy_slist_t list;
+ dummy_slist_initialize(&list);
+
+ // List starts empty.
+ EXPECT_EQ(NULL, dummy_slist_pop(&list));
+
+ // Push some items into the list (LIFO order).
+ // New contents: 5 4 3 2 1 0
+ auto item_storage = MakeDummySListItems(0, 6);
+ for (size_t i = 0; i < item_storage.size(); ++i) {
+ dummy_slist_push(&list, &item_storage[i]);
+ }
+
+ // Now pop them out - they should be in reverse order.
+ // New contents: e
+ for (size_t i = 0; i < item_storage.size(); ++i) {
+ dummy_entry_t* p = dummy_slist_pop(&list);
+ ASSERT_TRUE(p);
+ EXPECT_EQ(item_storage.size() - i - 1, p->value);
+ }
+
+ // List ends empty.
+ EXPECT_EQ(NULL, dummy_slist_pop(&list));
+
+ dummy_slist_deinitialize(&list);
+}
+
+TEST(AtomicSList, Concat) {
+ dummy_slist_t list;
+ dummy_slist_initialize(&list);
+
+ // Push some initial items into the list (LIFO order).
+ // New contents: 1 0
+ auto initial_item_storage = MakeDummySListItems(0, 2);
+ for (size_t i = 0; i < initial_item_storage.size(); ++i) {
+ dummy_slist_push(&list, &initial_item_storage[i]);
+ }
+
+ // Stitch items together modeling what a user may do when building the list
+ // themselves.
+ // Items: 2 3 4
+ auto span_item_storage = MakeDummySListItems(2, 3);
+ for (size_t i = 0; i < span_item_storage.size() - 1; ++i) {
+ dummy_slist_set_next(&span_item_storage[i], &span_item_storage[i + 1]);
+ }
+
+ // Push all of the items to the list at once.
+ // New contents: 2 3 4 1 0
+ dummy_slist_concat(&list, &span_item_storage.front(),
+ &span_item_storage.back());
+
+ // Pop the span items and verify they are in the correct order: we effectively
+ // pushed them such that popping is FIFO (2->4).
+ // New contents: 1 0
+ for (size_t i = 0; i < span_item_storage.size(); ++i) {
+ dummy_entry_t* p = dummy_slist_pop(&list);
+ ASSERT_TRUE(p);
+ EXPECT_EQ(/*base_index=*/2 + i, p->value);
+ }
+
+ // Pop the initial items and ensure they survived.
+ // New contents: e
+ for (size_t i = 0; i < initial_item_storage.size(); ++i) {
+ dummy_entry_t* p = dummy_slist_pop(&list);
+ ASSERT_TRUE(p);
+ EXPECT_EQ(initial_item_storage.size() - i - 1, p->value);
+ }
+
+ dummy_slist_deinitialize(&list);
+}
+
+TEST(AtomicSList, FlushLIFO) {
+ dummy_slist_t list;
+ dummy_slist_initialize(&list);
+
+ // Flushing when empty is ok.
+ dummy_entry_t* head = NULL;
+ dummy_entry_t* tail = NULL;
+ EXPECT_FALSE(dummy_slist_flush(
+ &list, IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_LIFO, &head, &tail));
+
+ // Push items into the list (LIFO order).
+ // New contents: 3 2 1 0
+ auto item_storage = MakeDummySListItems(0, 4);
+ for (size_t i = 0; i < item_storage.size(); ++i) {
+ dummy_slist_push(&list, &item_storage[i]);
+ }
+
+ // Flush in LIFO order and verify empty.
+ // New contents: e
+ EXPECT_TRUE(dummy_slist_flush(
+ &list, IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_LIFO, &head, &tail));
+ EXPECT_EQ(NULL, dummy_slist_pop(&list));
+
+ // Verify LIFO order and list pointer walking.
+ // Note that head and tail are reverse of item storage!
+ EXPECT_EQ(&item_storage.back(), head);
+ EXPECT_EQ(&item_storage.front(), tail);
+ dummy_entry_t* p = head;
+ for (size_t i = 0; i < item_storage.size(); ++i) {
+ ASSERT_TRUE(p);
+ EXPECT_EQ(item_storage.size() - i - 1, p->value);
+ p = dummy_slist_get_next(p);
+ }
+ EXPECT_EQ(NULL, p);
+
+ dummy_slist_deinitialize(&list);
+}
+
+TEST(AtomicSList, FlushFIFO) {
+ dummy_slist_t list;
+ dummy_slist_initialize(&list);
+
+ // Flushing when empty is ok.
+ dummy_entry_t* head = NULL;
+ dummy_entry_t* tail = NULL;
+ EXPECT_FALSE(dummy_slist_flush(
+ &list, IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_FIFO, &head, &tail));
+
+ // Push items into the list (LIFO order).
+ // New contents: 3 2 1 0
+ auto item_storage = MakeDummySListItems(0, 4);
+ for (size_t i = 0; i < item_storage.size(); ++i) {
+ dummy_slist_push(&list, &item_storage[i]);
+ }
+
+ // Flush in FIFO order and verify empty.
+ // New contents: e
+ EXPECT_TRUE(dummy_slist_flush(
+ &list, IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_FIFO, &head, &tail));
+ EXPECT_EQ(NULL, dummy_slist_pop(&list));
+
+ // Verify FIFO order and list pointer walking.
+ EXPECT_EQ(&item_storage.front(), head);
+ EXPECT_EQ(&item_storage.back(), tail);
+ dummy_entry_t* p = head;
+ for (size_t i = 0; i < item_storage.size(); ++i) {
+ ASSERT_TRUE(p);
+ EXPECT_EQ(i, p->value);
+ p = dummy_slist_get_next(p);
+ }
+ EXPECT_EQ(NULL, p);
+
+ dummy_slist_deinitialize(&list);
+}
+
+} // namespace
diff --git a/runtime/src/iree/base/internal/atomics.h b/runtime/src/iree/base/internal/atomics.h
new file mode 100644
index 0000000..31eb64c
--- /dev/null
+++ b/runtime/src/iree/base/internal/atomics.h
@@ -0,0 +1,171 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// An implementation of the C11 stdatomics.h utilities we use (which is limited
+// to a subset of types for now). We need this for non-C11-compliant platforms
+// (MSVC), but it has the added benefit of not conflicting with <atomic>
+// (stdatomic.h and atomic cannot be included in the same compilation unit...
+// great design). There shouldn't be any difference between what we do here and
+// what any implementation would do with the platform atomic functions so it's
+// used everywhere.
+//
+// https://en.cppreference.com/w/c/atomic
+
+#ifndef IREE_BASE_INTERNAL_ATOMICS_H_
+#define IREE_BASE_INTERNAL_ATOMICS_H_
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "iree/base/assert.h"
+#include "iree/base/config.h"
+#include "iree/base/target_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//==============================================================================
+// Hardware concurrency information
+//==============================================================================
+
+// https://en.cppreference.com/w/cpp/thread/hardware_destructive_interference_size
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html
+// https://norrischiu.github.io/2018/09/08/Cpp-jargon-1.html
+
+// TODO(benvanik): test 128 on x64 (to thwart hardware prefetcher).
+
+// Minimum offset between two objects to avoid false sharing.
+// If two members are aligned to this value they will (likely) not share the
+// same L1 cache line.
+#define iree_hardware_destructive_interference_size 64
+
+// Maximum size of contiguous memory to promote true sharing.
+// If two members are within a span of this value they will (likely) share the
+// same L1 cache line.
+#define iree_hardware_constructive_interference_size 64
+
+//==============================================================================
+// C11-compatible atomic operations
+//==============================================================================
+// We expose support for int32_t, int64_t, and intptr_t (which aliases one of
+// int32_t or int64_t). This limits what we need to port and it's really all
+// that's needed anyway.
+
+#if IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+
+// Atomics are disabled as we've forced ourselves into a fully thread-hostile
+// configuration. Used on bare-metal systems with single cores.
+#include "iree/base/internal/atomics_disabled.h" // IWYU pragma: export
+
+#elif defined(IREE_COMPILER_MSVC)
+
+// Atomics using the Win32 Interlocked* APIs.
+#include "iree/base/internal/atomics_msvc.h" // IWYU pragma: export
+
+#elif defined(IREE_COMPILER_CLANG)
+
+// C11 atomics using Clang builtins.
+#include "iree/base/internal/atomics_clang.h" // IWYU pragma: export
+
+#elif defined(IREE_COMPILER_GCC)
+
+// Atomics for GCC (compatible with both C and C++).
+#include "iree/base/internal/atomics_gcc.h" // IWYU pragma: export
+
+#else
+
+// Unsupported architecture.
+#error Compiler does not have supported C11-style atomics
+
+#endif // IREE_COMPILER_*
+
+// If the compiler can automatically determine the types:
+#ifdef iree_atomic_load_auto
+
+#define iree_atomic_load_int32 iree_atomic_load_auto
+#define iree_atomic_store_int32 iree_atomic_store_auto
+#define iree_atomic_fetch_add_int32 iree_atomic_fetch_add_auto
+#define iree_atomic_fetch_sub_int32 iree_atomic_fetch_sub_auto
+#define iree_atomic_fetch_and_int32 iree_atomic_fetch_and_auto
+#define iree_atomic_fetch_or_int32 iree_atomic_fetch_or_auto
+#define iree_atomic_fetch_xor_int32 iree_atomic_fetch_xor_auto
+#define iree_atomic_exchange_int32 iree_atomic_exchange_auto
+#define iree_atomic_compare_exchange_strong_int32 \
+ iree_atomic_compare_exchange_strong_auto
+#define iree_atomic_compare_exchange_weak_int32 \
+ iree_atomic_compare_exchange_weak_auto
+
+#define iree_atomic_load_int64 iree_atomic_load_auto
+#define iree_atomic_store_int64 iree_atomic_store_auto
+#define iree_atomic_fetch_add_int64 iree_atomic_fetch_add_auto
+#define iree_atomic_fetch_sub_int64 iree_atomic_fetch_sub_auto
+#define iree_atomic_fetch_and_int64 iree_atomic_fetch_and_auto
+#define iree_atomic_fetch_or_int64 iree_atomic_fetch_or_auto
+#define iree_atomic_fetch_xor_int64 iree_atomic_fetch_xor_auto
+#define iree_atomic_exchange_int64 iree_atomic_exchange_auto
+#define iree_atomic_compare_exchange_strong_int64 \
+ iree_atomic_compare_exchange_strong_auto
+#define iree_atomic_compare_exchange_weak_int64 \
+ iree_atomic_compare_exchange_weak_auto
+
+#define iree_atomic_load_intptr iree_atomic_load_auto
+#define iree_atomic_store_intptr iree_atomic_store_auto
+#define iree_atomic_fetch_add_intptr iree_atomic_fetch_add_auto
+#define iree_atomic_fetch_sub_intptr iree_atomic_fetch_sub_auto
+#define iree_atomic_exchange_intptr iree_atomic_exchange_auto
+#define iree_atomic_compare_exchange_strong_intptr \
+ iree_atomic_compare_exchange_strong_auto
+#define iree_atomic_compare_exchange_weak_intptr \
+ iree_atomic_compare_exchange_weak_auto
+
+#endif // iree_atomic_load_auto
+
+//==============================================================================
+// Reference count atomics
+//==============================================================================
+// These are just aliases that allow use to have nicely readable ref counting
+// operands without caring about the exact bit sizes at each site.
+
+typedef iree_atomic_int32_t iree_atomic_ref_count_t;
+#define iree_atomic_ref_count_init(count_ptr) \
+ iree_atomic_store_int32(count_ptr, 1, iree_memory_order_relaxed)
+// Callers of iree_atomic_ref_count_inc typically don't need it to return a
+// value (unlike iree_atomic_ref_count_dec), so we make sure that it does not,
+// which allows the implementation to use faster atomic instructions where
+// available, e.g. STADD on ARMv8.1-a.
+#define iree_atomic_ref_count_inc(count_ptr) \
+ do { \
+ iree_atomic_fetch_add_int32(count_ptr, 1, iree_memory_order_relaxed); \
+ } while (0)
+#define iree_atomic_ref_count_dec(count_ptr) \
+ iree_atomic_fetch_sub_int32(count_ptr, 1, iree_memory_order_acq_rel)
+
+// Aborts the program if the given reference count value is not 1.
+// This should be avoided in all situations but those where continuing execution
+// would be invalid. If a reference object is allocated on the stack and the
+// parent function is about to return it *must* have a ref count of 1: anything
+// else that may be retaining the object will hold a pointer to (effectively)
+// uninitialized stack memory.
+#define iree_atomic_ref_count_abort_if_uses(count_ptr) \
+ if (IREE_UNLIKELY(iree_atomic_load_int32(count_ptr, \
+ iree_memory_order_seq_cst) != 1)) { \
+ abort(); \
+ }
+
+// Asserts that the given reference count value is zero.
+#define IREE_ASSERT_REF_COUNT_ZERO(count_ptr) \
+ IREE_ASSERT_EQ(iree_atomic_load_int32(count_ptr, iree_memory_order_seq_cst), \
+ 0, "ref counted object still has uses")
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_BASE_INTERNAL_ATOMICS_H_
diff --git a/runtime/src/iree/base/internal/atomics_clang.h b/runtime/src/iree/base/internal/atomics_clang.h
new file mode 100644
index 0000000..44514e0
--- /dev/null
+++ b/runtime/src/iree/base/internal/atomics_clang.h
@@ -0,0 +1,73 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_ATOMICS_CLANG_H_
+#define IREE_BASE_INTERNAL_ATOMICS_CLANG_H_
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/target_platform.h"
+
+#if defined(IREE_COMPILER_CLANG)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum iree_memory_order_e {
+ iree_memory_order_relaxed = __ATOMIC_RELAXED,
+ iree_memory_order_consume = __ATOMIC_CONSUME,
+ iree_memory_order_acquire = __ATOMIC_ACQUIRE,
+ iree_memory_order_release = __ATOMIC_RELEASE,
+ iree_memory_order_acq_rel = __ATOMIC_ACQ_REL,
+ iree_memory_order_seq_cst = __ATOMIC_SEQ_CST,
+} iree_memory_order_t;
+
+#define IREE_ATOMIC_VAR_INIT(value) (value)
+
+typedef _Atomic int32_t iree_atomic_int32_t;
+typedef _Atomic int64_t iree_atomic_int64_t;
+// TODO(#3453): check for __int128 support before using
+// typedef _Atomic __int128 iree_atomic_int128_t;
+typedef _Atomic intptr_t iree_atomic_intptr_t;
+
+#define iree_atomic_load_auto(object, order) \
+ __c11_atomic_load((object), (order))
+#define iree_atomic_store_auto(object, desired, order) \
+ __c11_atomic_store((object), (desired), (order))
+#define iree_atomic_fetch_add_auto(object, operand, order) \
+ __c11_atomic_fetch_add((object), (operand), (order))
+#define iree_atomic_fetch_sub_auto(object, operand, order) \
+ __c11_atomic_fetch_sub((object), (operand), (order))
+#define iree_atomic_fetch_and_auto(object, operand, order) \
+ __c11_atomic_fetch_and((object), (operand), (order))
+#define iree_atomic_fetch_or_auto(object, operand, order) \
+ __c11_atomic_fetch_or((object), (operand), (order))
+#define iree_atomic_fetch_xor_auto(object, operand, order) \
+ __c11_atomic_fetch_xor((object), (operand), (order))
+#define iree_atomic_exchange_auto(object, operand, order) \
+ __c11_atomic_exchange((object), (operand), (order))
+#define iree_atomic_compare_exchange_strong_auto(object, expected, desired, \
+ order_succ, order_fail) \
+ __c11_atomic_compare_exchange_strong((object), (expected), (desired), \
+ (order_succ), (order_fail))
+#define iree_atomic_compare_exchange_weak_auto(object, expected, desired, \
+ order_succ, order_fail) \
+ __c11_atomic_compare_exchange_weak((object), (expected), (desired), \
+ (order_succ), (order_fail))
+
+#define iree_atomic_thread_fence(order) __c11_atomic_thread_fence(order)
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_COMPILER_CLANG
+
+#endif // IREE_BASE_INTERNAL_ATOMICS_CLANG_H_
diff --git a/runtime/src/iree/base/internal/atomics_disabled.h b/runtime/src/iree/base/internal/atomics_disabled.h
new file mode 100644
index 0000000..ce9e17e
--- /dev/null
+++ b/runtime/src/iree/base/internal/atomics_disabled.h
@@ -0,0 +1,244 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_ATOMICS_DISABLED_H_
+#define IREE_BASE_INTERNAL_ATOMICS_DISABLED_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/config.h"
+#include "iree/base/target_platform.h"
+
+#if IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum iree_memory_order_e {
+ iree_memory_order_relaxed,
+ iree_memory_order_consume,
+ iree_memory_order_acquire,
+ iree_memory_order_release,
+ iree_memory_order_acq_rel,
+ iree_memory_order_seq_cst,
+} iree_memory_order_t;
+
+#define IREE_ATOMIC_VAR_INIT(value) (value)
+
+typedef int32_t iree_atomic_int32_t;
+typedef int64_t iree_atomic_int64_t;
+// TODO(#3453): check for __int128 support before using
+// typedef __int128 iree_atomic_int128_t;
+typedef intptr_t iree_atomic_intptr_t;
+
+#define iree_atomic_load_int32(object, order) *(object)
+#define iree_atomic_store_int32(object, desired, order) *(object) = (desired)
+#define iree_atomic_fetch_add_int32(object, operand, order) \
+ iree_atomic_fetch_add_int32_impl((volatile iree_atomic_int32_t*)(object), \
+ (int32_t)(operand))
+#define iree_atomic_fetch_sub_int32(object, operand, order) \
+ iree_atomic_fetch_add_int32_impl((volatile iree_atomic_int32_t*)(object), \
+ -(int32_t)(operand))
+#define iree_atomic_fetch_and_int32(object, operand, order) \
+ iree_atomic_fetch_and_int32_impl((volatile iree_atomic_int32_t*)(object), \
+ (int32_t)(operand))
+#define iree_atomic_fetch_or_int32(object, operand, order) \
+ iree_atomic_fetch_or_int32_impl((volatile iree_atomic_int32_t*)(object), \
+ (int32_t)(operand))
+#define iree_atomic_fetch_xor_int32(object, operand, order) \
+ iree_atomic_fetch_xor_int32_impl((volatile iree_atomic_int32_t*)(object), \
+ (int32_t)(operand))
+#define iree_atomic_exchange_int32(object, desired, order) \
+ iree_atomic_fetch_exchange_int32_impl( \
+ (volatile iree_atomic_int32_t*)(object), (int32_t)(desired))
+#define iree_atomic_compare_exchange_strong_int32(object, expected, desired, \
+ order_succ, order_fail) \
+ iree_atomic_compare_exchange_int32_impl( \
+ (volatile iree_atomic_int32_t*)(object), (int32_t*)(expected), \
+ (int32_t)(desired))
+#define iree_atomic_compare_exchange_weak_int32 \
+ iree_atomic_compare_exchange_strong_int32
+
+#define iree_atomic_load_int64(object, order) *(object)
+#define iree_atomic_store_int64(object, desired, order) *(object) = (desired)
+#define iree_atomic_fetch_add_int64(object, operand, order) \
+ iree_atomic_fetch_add_int64_impl((volatile iree_atomic_int64_t*)(object), \
+ (int64_t)(operand))
+#define iree_atomic_fetch_sub_int64(object, operand, order) \
+ iree_atomic_fetch_add_int64_impl((volatile iree_atomic_int64_t*)(object), \
+ -(int64_t)(operand))
+#define iree_atomic_fetch_and_int64(object, operand, order) \
+ iree_atomic_fetch_and_int64_impl((volatile iree_atomic_int64_t*)(object), \
+ (int64_t)(operand))
+#define iree_atomic_fetch_or_int64(object, operand, order) \
+ iree_atomic_fetch_or_int64_impl((volatile iree_atomic_int64_t*)(object), \
+ (int64_t)(operand))
+#define iree_atomic_fetch_xor_int64(object, operand, order) \
+ iree_atomic_fetch_xor_int64_impl((volatile iree_atomic_int64_t*)(object), \
+ (int64_t)(operand))
+#define iree_atomic_exchange_int64(object, desired, order) \
+ iree_atomic_fetch_exchange_int64_impl( \
+ (volatile iree_atomic_int64_t*)(object), (int64_t)(desired))
+#define iree_atomic_compare_exchange_strong_int64(object, expected, desired, \
+ order_succ, order_fail) \
+ iree_atomic_compare_exchange_int64_impl( \
+ (volatile iree_atomic_int64_t*)(object), (int64_t*)(expected), \
+ (int64_t)(desired))
+#define iree_atomic_compare_exchange_weak_int64 \
+ iree_atomic_compare_exchange_strong_int64
+
+static inline int32_t iree_atomic_fetch_add_int32_impl(
+ volatile iree_atomic_int32_t* object, int32_t operand) {
+ int32_t original = *object;
+ *object += operand;
+ return original;
+}
+
+static inline int32_t iree_atomic_fetch_and_int32_impl(
+ volatile iree_atomic_int32_t* object, int32_t operand) {
+ int32_t original = *object;
+ *object &= operand;
+ return original;
+}
+
+static inline int32_t iree_atomic_fetch_or_int32_impl(
+ volatile iree_atomic_int32_t* object, int32_t operand) {
+ int32_t original = *object;
+ *object |= operand;
+ return original;
+}
+
+static inline int32_t iree_atomic_fetch_xor_int32_impl(
+ volatile iree_atomic_int32_t* object, int32_t operand) {
+ int32_t original = *object;
+ *object ^= operand;
+ return original;
+}
+
+static inline int32_t iree_atomic_fetch_exchange_int32_impl(
+ volatile iree_atomic_int32_t* object, int32_t desired) {
+ int32_t original = *object;
+ *object = desired;
+ return original;
+}
+
+static inline bool iree_atomic_compare_exchange_int32_impl(
+ volatile iree_atomic_int32_t* object, int32_t* expected, int32_t desired) {
+ if (*object == *expected) {
+ *object = desired;
+ return true;
+ } else {
+ *expected = *object;
+ return false;
+ }
+}
+
+static inline int64_t iree_atomic_fetch_add_int64_impl(
+ volatile iree_atomic_int64_t* object, int64_t operand) {
+ int64_t original = *object;
+ *object += operand;
+ return original;
+}
+
+static inline int64_t iree_atomic_fetch_and_int64_impl(
+ volatile iree_atomic_int64_t* object, int64_t operand) {
+ int64_t original = *object;
+ *object &= operand;
+ return original;
+}
+
+static inline int64_t iree_atomic_fetch_or_int64_impl(
+ volatile iree_atomic_int64_t* object, int64_t operand) {
+ int64_t original = *object;
+ *object |= operand;
+ return original;
+}
+
+static inline int64_t iree_atomic_fetch_xor_int64_impl(
+ volatile iree_atomic_int64_t* object, int64_t operand) {
+ int64_t original = *object;
+ *object ^= operand;
+ return original;
+}
+
+static inline int64_t iree_atomic_fetch_exchange_int64_impl(
+ volatile iree_atomic_int64_t* object, int64_t desired) {
+ int64_t original = *object;
+ *object = desired;
+ return original;
+}
+
+static inline bool iree_atomic_compare_exchange_int64_impl(
+ volatile iree_atomic_int64_t* object, int64_t* expected, int64_t desired) {
+ if (*object == *expected) {
+ *object = desired;
+ return true;
+ } else {
+ *expected = *object;
+ return false;
+ }
+}
+
+// There are no pointer-width atomic ops in MSVC so we need to specialize based
+// on the pointer size.
+#if defined(IREE_PTR_SIZE_32)
+#define iree_atomic_load_intptr(object, order) \
+ (intptr_t) iree_atomic_load_int32((iree_atomic_int32_t*)(object), (order))
+#define iree_atomic_store_intptr(object, desired, order) \
+ (intptr_t) iree_atomic_store_int32((iree_atomic_int32_t*)(object), \
+ (int32_t)(desired), (order))
+#define iree_atomic_fetch_add_intptr(object, operand, order) \
+ (intptr_t) iree_atomic_fetch_add_int32((iree_atomic_int32_t*)(object), \
+ (int32_t)(operand), (order))
+#define iree_atomic_fetch_sub_intptr(object, operand, order) \
+ (intptr_t) iree_atomic_fetch_sub_int32((iree_atomic_int32_t*)(object), \
+ (int32_t)(operand), (order))
+#define iree_atomic_exchange_intptr(object, desired, order) \
+ (intptr_t) iree_atomic_exchange_int32((iree_atomic_int32_t*)(object), \
+ (int32_t)(desired), (order))
+#define iree_atomic_compare_exchange_strong_intptr(object, expected, desired, \
+ order_succ, order_fail) \
+ iree_atomic_compare_exchange_strong_int32( \
+ (iree_atomic_int32_t*)(object), (int32_t*)(expected), \
+ (int32_t)(desired), (order_succ), (order_fail))
+#define iree_atomic_compare_exchange_weak_intptr \
+ iree_atomic_compare_exchange_strong_intptr
+#else
+#define iree_atomic_load_intptr(object, order) \
+ (intptr_t) iree_atomic_load_int64((iree_atomic_int64_t*)(object), (order))
+#define iree_atomic_store_intptr(object, desired, order) \
+ (intptr_t) iree_atomic_store_int64((iree_atomic_int64_t*)(object), \
+ (int64_t)(desired), (order))
+#define iree_atomic_fetch_add_intptr(object, operand, order) \
+ (intptr_t) iree_atomic_fetch_add_int64((iree_atomic_int64_t*)(object), \
+ (int64_t)(operand), (order))
+#define iree_atomic_fetch_sub_intptr(object, operand, order) \
+ (intptr_t) iree_atomic_fetch_sub_int64((iree_atomic_int64_t*)(object), \
+ (int64_t)(operand), (order))
+#define iree_atomic_exchange_intptr(object, desired, order) \
+ (intptr_t) iree_atomic_exchange_int64((iree_atomic_int64_t*)(object), \
+ (int64_t)(desired), (order))
+#define iree_atomic_compare_exchange_strong_intptr(object, expected, desired, \
+ order_succ, order_fail) \
+ iree_atomic_compare_exchange_strong_int64( \
+ (iree_atomic_int64_t*)(object), (int64_t*)(expected), \
+ (int64_t)(desired), (order_succ), (order_fail))
+#define iree_atomic_compare_exchange_weak_intptr \
+ iree_atomic_compare_exchange_strong_intptr
+#endif // IREE_PTR_SIZE_32
+
+#define iree_atomic_thread_fence(order)
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+
+#endif // IREE_BASE_INTERNAL_ATOMICS_DISABLED_H_
diff --git a/runtime/src/iree/base/internal/atomics_gcc.h b/runtime/src/iree/base/internal/atomics_gcc.h
new file mode 100644
index 0000000..1eb7170
--- /dev/null
+++ b/runtime/src/iree/base/internal/atomics_gcc.h
@@ -0,0 +1,89 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_ATOMICS_GCC_H_
+#define IREE_BASE_INTERNAL_ATOMICS_GCC_H_
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/target_platform.h"
+
+#if defined(IREE_COMPILER_GCC)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum iree_memory_order_e {
+ iree_memory_order_relaxed = __ATOMIC_RELAXED,
+ iree_memory_order_consume = __ATOMIC_CONSUME,
+ iree_memory_order_acquire = __ATOMIC_ACQUIRE,
+ iree_memory_order_release = __ATOMIC_RELEASE,
+ iree_memory_order_acq_rel = __ATOMIC_ACQ_REL,
+ iree_memory_order_seq_cst = __ATOMIC_SEQ_CST,
+} iree_memory_order_t;
+
+#define IREE_ATOMIC_VAR_INIT(value) (value)
+
+typedef int32_t iree_atomic_int32_t;
+typedef int64_t iree_atomic_int64_t;
+// typedef __int128 iree_atomic_int128_t;
+typedef intptr_t iree_atomic_intptr_t;
+
+#ifdef __cplusplus
+// Equiv to C++ auto keyword in C++ mode.
+#define __iree_auto_type auto
+#else
+// Only defined in C mode.
+#define __iree_auto_type __auto_type
+#endif
+
+#define iree_atomic_load_auto(object, order) \
+ __extension__({ \
+ __iree_auto_type __atomic_load_ptr = (object); \
+ __typeof__(*__atomic_load_ptr) __atomic_load_tmp; \
+ __atomic_load(__atomic_load_ptr, &__atomic_load_tmp, (order)); \
+ __atomic_load_tmp; \
+ })
+#define iree_atomic_store_auto(object, desired, order) \
+ __extension__({ \
+ __iree_auto_type __atomic_store_ptr = (object); \
+ __typeof__(*__atomic_store_ptr) __atomic_store_tmp = (desired); \
+ __atomic_store(__atomic_store_ptr, &__atomic_store_tmp, (order)); \
+ })
+#define iree_atomic_fetch_add_auto(object, operand, order) \
+ __atomic_fetch_add((object), (operand), (order))
+#define iree_atomic_fetch_sub_auto(object, operand, order) \
+ __atomic_fetch_sub((object), (operand), (order))
+#define iree_atomic_fetch_and_auto(object, operand, order) \
+ __atomic_fetch_and((object), (operand), (order))
+#define iree_atomic_fetch_or_auto(object, operand, order) \
+ __atomic_fetch_or((object), (operand), (order))
+#define iree_atomic_fetch_xor_auto(object, operand, order) \
+ __atomic_fetch_xor((object), (operand), (order))
+#define iree_atomic_exchange_auto(object, operand, order) \
+ __atomic_exchange_n((object), (operand), (order))
+#define iree_atomic_compare_exchange_strong_auto(object, expected, desired, \
+ order_succ, order_fail) \
+ __atomic_compare_exchange_n(object, expected, desired, /*weak=*/false, \
+ (order_succ), (order_fail))
+#define iree_atomic_compare_exchange_weak_auto(object, expected, desired, \
+ order_succ, order_fail) \
+ __atomic_compare_exchange_n(object, expected, desired, /*weak=*/true, \
+ (order_succ), (order_fail))
+
+#define iree_atomic_thread_fence(order) __atomic_thread_fence(order)
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_COMPILER_GCC
+
+#endif // IREE_BASE_INTERNAL_ATOMICS_GCC_H_
diff --git a/runtime/src/iree/base/internal/atomics_msvc.h b/runtime/src/iree/base/internal/atomics_msvc.h
new file mode 100644
index 0000000..5cfbf43
--- /dev/null
+++ b/runtime/src/iree/base/internal/atomics_msvc.h
@@ -0,0 +1,182 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_ATOMICS_MSVC_H_
+#define IREE_BASE_INTERNAL_ATOMICS_MSVC_H_
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/target_platform.h"
+
+#if defined(IREE_COMPILER_MSVC)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum iree_memory_order_e {
+ iree_memory_order_relaxed,
+ iree_memory_order_consume,
+ iree_memory_order_acquire,
+ iree_memory_order_release,
+ iree_memory_order_acq_rel,
+ iree_memory_order_seq_cst,
+} iree_memory_order_t;
+
+#define IREE_ATOMIC_VAR_INIT(value) \
+ { (value) }
+
+typedef struct {
+ int32_t __val;
+} iree_atomic_int32_t;
+typedef struct {
+ int64_t __val;
+} iree_atomic_int64_t;
+// typedef __declspec(align(16)) struct {
+// uint64_t __val[2];
+// } iree_atomic_int128_t;
+typedef struct {
+ intptr_t __val;
+} iree_atomic_intptr_t;
+
+#define iree_atomic_load_int32(object, order) \
+ InterlockedExchangeAdd((volatile LONG*)object, 0)
+#define iree_atomic_store_int32(object, desired, order) \
+ InterlockedExchange((volatile LONG*)object, desired)
+#define iree_atomic_fetch_add_int32(object, operand, order) \
+ InterlockedExchangeAdd((volatile LONG*)object, operand)
+#define iree_atomic_fetch_sub_int32(object, operand, order) \
+ InterlockedExchangeAdd((volatile LONG*)object, -((int32_t)(operand)))
+#define iree_atomic_fetch_and_int32(object, operand, order) \
+ InterlockedAnd((volatile LONG*)object, operand)
+#define iree_atomic_fetch_or_int32(object, operand, order) \
+ InterlockedOr((volatile LONG*)object, operand)
+#define iree_atomic_fetch_xor_int32(object, operand, order) \
+ InterlockedXor((volatile LONG*)object, operand)
+#define iree_atomic_exchange_int32(object, desired, order) \
+ InterlockedExchange((volatile LONG*)object, desired)
+#define iree_atomic_compare_exchange_strong_int32(object, expected, desired, \
+ order_succ, order_fail) \
+ iree_atomic_compare_exchange_strong_int32_impl( \
+ (volatile iree_atomic_int32_t*)(object), (int32_t*)(expected), \
+ (int32_t)(desired), (order_succ), (order_fail))
+#define iree_atomic_compare_exchange_weak_int32 \
+ iree_atomic_compare_exchange_strong_int32
+
+#define iree_atomic_load_int64(object, order) \
+ InterlockedExchangeAdd64((volatile LONG64*)object, 0)
+#define iree_atomic_store_int64(object, desired, order) \
+ InterlockedExchange64((volatile LONG64*)object, (LONG64)desired)
+#define iree_atomic_fetch_add_int64(object, operand, order) \
+ InterlockedExchangeAdd64((volatile LONG64*)object, (LONG64)operand)
+#define iree_atomic_fetch_sub_int64(object, operand, order) \
+ InterlockedExchangeAdd64((volatile LONG64*)object, -(operand))
+#define iree_atomic_fetch_and_int64(object, operand, order) \
+ InterlockedAnd64((volatile LONG64*)object, operand)
+#define iree_atomic_fetch_or_int64(object, operand, order) \
+ InterlockedOr64((volatile LONG64*)object, operand)
+#define iree_atomic_fetch_xor_int64(object, operand, order) \
+ InterlockedXor64((volatile LONG64*)object, operand)
+#define iree_atomic_exchange_int64(object, desired, order) \
+ InterlockedExchange64((volatile LONG64*)object, desired)
+#define iree_atomic_compare_exchange_strong_int64(object, expected, desired, \
+ order_succ, order_fail) \
+ iree_atomic_compare_exchange_strong_int64_impl( \
+ (volatile iree_atomic_int64_t*)(object), (int64_t*)(expected), \
+ (int64_t)(desired), (order_succ), (order_fail))
+#define iree_atomic_compare_exchange_weak_int64 \
+ iree_atomic_compare_exchange_strong_int64
+
+#define iree_atomic_thread_fence(order) MemoryBarrier()
+
+static inline bool iree_atomic_compare_exchange_strong_int32_impl(
+ volatile iree_atomic_int32_t* object, int32_t* expected, int32_t desired,
+ iree_memory_order_t order_succ, iree_memory_order_t order_fail) {
+ int32_t expected_value = *expected;
+ int32_t old_value = InterlockedCompareExchange((volatile LONG*)object,
+ desired, expected_value);
+ if (old_value == expected_value) {
+ return true;
+ } else {
+ *expected = old_value;
+ return false;
+ }
+}
+
+static inline bool iree_atomic_compare_exchange_strong_int64_impl(
+ volatile iree_atomic_int64_t* object, int64_t* expected, int64_t desired,
+ iree_memory_order_t order_succ, iree_memory_order_t order_fail) {
+ int64_t expected_value = *expected;
+ int64_t old_value = InterlockedCompareExchange64((volatile LONG64*)object,
+ desired, expected_value);
+ if (old_value == expected_value) {
+ return true;
+ } else {
+ *expected = old_value;
+ return false;
+ }
+}
+
+#define iree_atomic_thread_fence(order) MemoryBarrier()
+
+// There are no pointer-width atomic ops in MSVC so we need to specialize based
+// on the pointer size.
+#if defined(IREE_PTR_SIZE_32)
+#define iree_atomic_load_intptr(object, order) \
+ (intptr_t) iree_atomic_load_int32((iree_atomic_int32_t*)(object), (order))
+#define iree_atomic_store_intptr(object, desired, order) \
+ (intptr_t) iree_atomic_store_int32((iree_atomic_int32_t*)(object), \
+ (int32_t)(desired), (order))
+#define iree_atomic_fetch_add_intptr(object, operand, order) \
+ (intptr_t) iree_atomic_fetch_add_int32((iree_atomic_int32_t*)(object), \
+ (int32_t)(operand), (order))
+#define iree_atomic_fetch_sub_intptr(object, operand, order) \
+ (intptr_t) iree_atomic_fetch_sub_int32((iree_atomic_int32_t*)(object), \
+ (int32_t)(operand), (order))
+#define iree_atomic_exchange_intptr(object, desired, order) \
+ (intptr_t) iree_atomic_exchange_int32((iree_atomic_int32_t*)(object), \
+ (int32_t)(desired), (order))
+#define iree_atomic_compare_exchange_strong_intptr(object, expected, desired, \
+ order_succ, order_fail) \
+ iree_atomic_compare_exchange_strong_int32( \
+ (iree_atomic_int32_t*)(object), (int32_t*)(expected), \
+ (int32_t)(desired), (order_succ), (order_fail))
+#define iree_atomic_compare_exchange_weak_intptr \
+ iree_atomic_compare_exchange_strong_intptr
+#else
+#define iree_atomic_load_intptr(object, order) \
+ (intptr_t) iree_atomic_load_int64((iree_atomic_int64_t*)(object), (order))
+#define iree_atomic_store_intptr(object, desired, order) \
+ (intptr_t) iree_atomic_store_int64((iree_atomic_int64_t*)(object), \
+ (int64_t)(desired), (order))
+#define iree_atomic_fetch_add_intptr(object, operand, order) \
+ (intptr_t) iree_atomic_fetch_add_int64((iree_atomic_int64_t*)(object), \
+ (int64_t)(operand), (order))
+#define iree_atomic_fetch_sub_intptr(object, operand, order) \
+ (intptr_t) iree_atomic_fetch_sub_int64((iree_atomic_int64_t*)(object), \
+ (int64_t)(operand), (order))
+#define iree_atomic_exchange_intptr(object, desired, order) \
+ (intptr_t) iree_atomic_exchange_int64((iree_atomic_int64_t*)(object), \
+ (int64_t)(desired), (order))
+#define iree_atomic_compare_exchange_strong_intptr(object, expected, desired, \
+ order_succ, order_fail) \
+ iree_atomic_compare_exchange_strong_int64( \
+ (iree_atomic_int64_t*)(object), (int64_t*)(expected), \
+ (int64_t)(desired), (order_succ), (order_fail))
+#define iree_atomic_compare_exchange_weak_intptr \
+ iree_atomic_compare_exchange_strong_intptr
+#endif // IREE_PTR_SIZE_32
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_COMPILER_MSVC
+
+#endif // IREE_BASE_INTERNAL_ATOMICS_MSVC_H_
diff --git a/runtime/src/iree/base/internal/atomics_test.cc b/runtime/src/iree/base/internal/atomics_test.cc
new file mode 100644
index 0000000..a9fce2f
--- /dev/null
+++ b/runtime/src/iree/base/internal/atomics_test.cc
@@ -0,0 +1,102 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/atomics.h"
+
+#include <cstddef>
+#include <cstdint>
+
+#include "iree/testing/gtest.h"
+
+namespace {
+
+// NOTE: these tests are just to ensure we correctly compile the macros across
+// our supported toolchains: they don't verify that the memory semantics are
+// correct (as that would be difficult and is really the toolchain's job).
+
+TEST(AtomicPtr, LoadStore) {
+ intptr_t ptr_0 = 0x0;
+ intptr_t ptr_1 = 0x1;
+ iree_atomic_intptr_t value = IREE_ATOMIC_VAR_INIT(ptr_0);
+ EXPECT_EQ(ptr_0, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
+ iree_atomic_store_intptr(&value, ptr_1, iree_memory_order_seq_cst);
+ EXPECT_EQ(ptr_1, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
+}
+
+TEST(AtomicPtr, AddSub) {
+ intptr_t ptr_0 = 0x0;
+ intptr_t ptr_1 = 0x1;
+ intptr_t ptr_2 = 0x2;
+ iree_atomic_intptr_t value = IREE_ATOMIC_VAR_INIT(ptr_0);
+ EXPECT_EQ(ptr_0, iree_atomic_fetch_add_intptr(&value, ptr_1,
+ iree_memory_order_seq_cst));
+ EXPECT_EQ(ptr_1, iree_atomic_fetch_add_intptr(&value, ptr_1,
+ iree_memory_order_seq_cst));
+ EXPECT_EQ(ptr_2, iree_atomic_fetch_sub_intptr(&value, ptr_1,
+ iree_memory_order_seq_cst));
+ EXPECT_EQ(ptr_1, iree_atomic_fetch_sub_intptr(&value, ptr_1,
+ iree_memory_order_seq_cst));
+ EXPECT_EQ(ptr_0, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
+}
+
+TEST(AtomicPtr, Exchange) {
+ intptr_t ptr_0 = 0x0;
+ intptr_t ptr_1 = 0x1;
+ intptr_t ptr_2 = 0x2;
+ iree_atomic_intptr_t value = IREE_ATOMIC_VAR_INIT(ptr_0);
+ EXPECT_EQ(ptr_0, iree_atomic_exchange_intptr(&value, ptr_1,
+ iree_memory_order_seq_cst));
+ EXPECT_EQ(ptr_1, iree_atomic_exchange_intptr(&value, ptr_2,
+ iree_memory_order_seq_cst));
+ EXPECT_EQ(ptr_2, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
+}
+
+TEST(AtomicPtr, CompareExchange) {
+ intptr_t ptr_0 = 0x0;
+ intptr_t ptr_1 = 0x1;
+ intptr_t ptr_2 = 0x2;
+ iree_atomic_intptr_t value = IREE_ATOMIC_VAR_INIT(ptr_0);
+ intptr_t ptr_expected = 0;
+
+ // OK: value == ptr_0, CAS(ptr_0 -> ptr_1)
+ iree_atomic_store_intptr(&value, ptr_0, iree_memory_order_seq_cst);
+ ptr_expected = ptr_0;
+ EXPECT_TRUE(iree_atomic_compare_exchange_strong_intptr(
+ &value, &ptr_expected, ptr_1, iree_memory_order_seq_cst,
+ iree_memory_order_seq_cst));
+ EXPECT_EQ(ptr_0, ptr_expected);
+ EXPECT_EQ(ptr_1, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
+
+ // OK: value == ptr_1, CAS(ptr_1 -> ptr_2)
+ iree_atomic_store_intptr(&value, ptr_1, iree_memory_order_seq_cst);
+ ptr_expected = ptr_1;
+ EXPECT_TRUE(iree_atomic_compare_exchange_strong_intptr(
+ &value, &ptr_expected, ptr_2, iree_memory_order_seq_cst,
+ iree_memory_order_seq_cst));
+ EXPECT_EQ(ptr_1, ptr_expected);
+ EXPECT_EQ(ptr_2, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
+
+ // FAIL: value == ptr_0, CAS(ptr_1 -> ptr_2)
+ iree_atomic_store_intptr(&value, ptr_0, iree_memory_order_seq_cst);
+ ptr_expected = ptr_1;
+ EXPECT_FALSE(iree_atomic_compare_exchange_strong_intptr(
+ &value, &ptr_expected, ptr_2, iree_memory_order_seq_cst,
+ iree_memory_order_seq_cst));
+ EXPECT_EQ(ptr_0, ptr_expected);
+ EXPECT_EQ(ptr_0, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
+}
+
+TEST(AtomicRefCount, IncDec) {
+ iree_atomic_ref_count_t count;
+ iree_atomic_ref_count_init(&count);
+ iree_atomic_ref_count_inc(&count);
+ iree_atomic_ref_count_inc(&count);
+ EXPECT_EQ(3, iree_atomic_ref_count_dec(&count));
+ EXPECT_EQ(2, iree_atomic_ref_count_dec(&count));
+ EXPECT_EQ(1, iree_atomic_ref_count_dec(&count));
+}
+
+} // namespace
diff --git a/runtime/src/iree/base/internal/call_once.h b/runtime/src/iree/base/internal/call_once.h
new file mode 100644
index 0000000..da411dd
--- /dev/null
+++ b/runtime/src/iree/base/internal/call_once.h
@@ -0,0 +1,109 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_CALL_ONCE_H_
+#define IREE_BASE_INTERNAL_CALL_ONCE_H_
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//==============================================================================
+// iree_call_once
+//==============================================================================
+// Emulates the C11 call_once feature as few seem to have it.
+// https://en.cppreference.com/w/c/thread/call_once
+
+#if defined(__has_include)
+#if __has_include(<thread.h>)
+#define IREE_HAS_C11_THREAD_H 1
+#endif
+#endif
+
+#if defined(IREE_HAS_C11_THREAD_H)
+
+// Always prefer the C11 header if present.
+#include <thread.h>
+#define IREE_ONCE_FLAG_INIT ONCE_FLAG_INIT
+#define iree_once_flag ONCE_FLAG
+#define iree_call_once call_once
+
+#elif defined(IREE_PLATFORM_WINDOWS)
+
+// Windows fallback using the native InitOnceExecuteOnce:
+// https://docs.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-initonceexecuteonce
+
+// Expands to a value that can be used to initialize an object of type
+// iree_once_flag.
+#define IREE_ONCE_FLAG_INIT INIT_ONCE_STATIC_INIT
+
+// Complete object type capable of holding a flag used by iree_call_once.
+typedef INIT_ONCE iree_once_flag;
+
+typedef struct {
+ void (*func)(void);
+} iree_call_once_impl_params_t;
+static BOOL CALLBACK iree_call_once_callback_impl(PINIT_ONCE InitOnce,
+ PVOID Parameter,
+ PVOID* Context) {
+ // https://docs.microsoft.com/en-us/windows/win32/api/synchapi/nc-synchapi-pinit_once_fn
+ iree_call_once_impl_params_t* param =
+ (iree_call_once_impl_params_t*)Parameter;
+ (param->func)();
+ ((void)InitOnce);
+ ((void)Context); // suppress warning
+ return TRUE;
+}
+
+// Calls |func| exactly once, even if invoked from several threads.
+// The completion of the function synchronizes with all previous or subsequent
+// calls to call_once with the same flag variable.
+static inline void iree_call_once(iree_once_flag* flag, void (*func)(void)) {
+ iree_call_once_impl_params_t param;
+ param.func = func;
+ InitOnceExecuteOnce(flag, iree_call_once_callback_impl, (PVOID)¶m, NULL);
+}
+
+#elif IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+
+// No-op when the thread control is disabled.
+#define IREE_ONCE_FLAG_INIT 1
+#define iree_once_flag uint32_t
+static inline void iree_call_once(iree_once_flag* flag, void (*func)(void)) {}
+
+#else
+
+// Fallback using pthread_once:
+// https://pubs.opengroup.org/onlinepubs/007908775/xsh/pthread_once.html
+
+#include <pthread.h>
+
+// Expands to a value that can be used to initialize an object of type
+// iree_once_flag.
+#define IREE_ONCE_FLAG_INIT PTHREAD_ONCE_INIT
+
+// Complete object type capable of holding a flag used by iree_call_once.
+typedef pthread_once_t iree_once_flag;
+
+// Calls |func| exactly once, even if invoked from several threads.
+// The completion of the function synchronizes with all previous or subsequent
+// calls to call_once with the same flag variable.
+static inline void iree_call_once(iree_once_flag* flag, void (*func)(void)) {
+ pthread_once(flag, func);
+}
+
+#endif // IREE_HAS_C11_THREAD_H / fallbacks
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_BASE_INTERNAL_CALL_ONCE_H_
diff --git a/runtime/src/iree/base/internal/cpu.c b/runtime/src/iree/base/internal/cpu.c
new file mode 100644
index 0000000..2a0ed83
--- /dev/null
+++ b/runtime/src/iree/base/internal/cpu.c
@@ -0,0 +1,61 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// NOTE: must be first before _any_ system includes.
+#define _GNU_SOURCE
+
+#include "iree/base/internal/cpu.h"
+
+#include "iree/base/target_platform.h"
+
+//===----------------------------------------------------------------------===//
+// iree_cpu_*
+//===----------------------------------------------------------------------===//
+
+#if defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_LINUX)
+
+#include <sched.h>
+
+iree_cpu_processor_id_t iree_cpu_query_processor_id(void) {
+ // This path is relatively portable and should work on linux/bsd/etc-likes.
+ // We may want to use getcpu when available so that we can get the group ID.
+ // https://man7.org/linux/man-pages/man3/sched_getcpu.3.html
+ //
+ // libc implementations can use vDSO and other fun stuff to make this really
+ // cheap: http://git.musl-libc.org/cgit/musl/tree/src/sched/sched_getcpu.c
+ int id = sched_getcpu();
+ return id != -1 ? id : 0;
+}
+
+#elif defined(IREE_PLATFORM_WINDOWS)
+
+iree_cpu_processor_id_t iree_cpu_query_processor_id(void) {
+ PROCESSOR_NUMBER pn;
+ GetCurrentProcessorNumberEx(&pn);
+ return 64 * pn.Group + pn.Number;
+}
+
+#else
+
+// No implementation.
+// We could allow an iree/base/config.h override to externalize this.
+iree_cpu_processor_id_t iree_cpu_query_processor_id(void) { return 0; }
+
+#endif // IREE_PLATFORM_*
+
+void iree_cpu_requery_processor_id(iree_cpu_processor_tag_t* IREE_RESTRICT tag,
+ iree_cpu_processor_id_t* IREE_RESTRICT
+ processor_id) {
+ IREE_ASSERT_ARGUMENT(tag);
+ IREE_ASSERT_ARGUMENT(processor_id);
+
+ // TODO(benvanik): set a frequency for this and use a coarse timer
+ // (CLOCK_MONOTONIC_COARSE) to do a ~4-10Hz refresh. We can store the last
+ // query time and the last processor ID in the tag and only perform the query
+ // if it has changed.
+
+ *processor_id = iree_cpu_query_processor_id();
+}
diff --git a/runtime/src/iree/base/internal/cpu.h b/runtime/src/iree/base/internal/cpu.h
new file mode 100644
index 0000000..914f39d
--- /dev/null
+++ b/runtime/src/iree/base/internal/cpu.h
@@ -0,0 +1,40 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_CPU_H_
+#define IREE_BASE_INTERNAL_CPU_H_
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_cpu_*
+//===----------------------------------------------------------------------===//
+
+typedef uint32_t iree_cpu_processor_id_t;
+typedef uint32_t iree_cpu_processor_tag_t;
+
+// Returns the ID of the logical processor executing this code.
+iree_cpu_processor_id_t iree_cpu_query_processor_id(void);
+
+// Returns the ID of the logical processor executing this code, using |tag| to
+// memoize the query in cases where it does not change frequently.
+// |tag| must be initialized to 0 on first call and may be reset to 0 by the
+// caller at any time to invalidate the cached result.
+void iree_cpu_requery_processor_id(iree_cpu_processor_tag_t* IREE_RESTRICT tag,
+ iree_cpu_processor_id_t* IREE_RESTRICT
+ processor_id);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_INTERNAL_ARENA_H_
diff --git a/runtime/src/iree/base/internal/debugging.h b/runtime/src/iree/base/internal/debugging.h
new file mode 100644
index 0000000..0bf232c
--- /dev/null
+++ b/runtime/src/iree/base/internal/debugging.h
@@ -0,0 +1,109 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_DEBUGGING_H_
+#define IREE_BASE_INTERNAL_DEBUGGING_H_
+
+#include "iree/base/target_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(IREE_COMPILER_GCC_COMPAT)
+#define IREE_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
+#elif defined(IREE_COMPILER_MSVC)
+#define IREE_ATTRIBUTE_ALWAYS_INLINE __forceinline
+#else
+#define IREE_ATTRIBUTE_ALWAYS_INLINE
+#endif // IREE_COMPILER_*
+
+//===----------------------------------------------------------------------===//
+// Debugger interaction
+//===----------------------------------------------------------------------===//
+// NOTE: in general it's not a good idea to change program behavior when running
+// under a debugger as that then makes it harder to reproduce and successfully
+// debug issues that happen without the debugger.
+
+// Forces a break into an attached debugger.
+// May be ignored if no debugger is attached or raise a signal that gives the
+// option to attach a debugger.
+//
+// We implement this directly in the header with ALWAYS_INLINE so that the
+// stack doesn't get all messed up.
+IREE_ATTRIBUTE_ALWAYS_INLINE static inline void iree_debug_break(void) {
+#if defined(IREE_COMPILER_HAS_BUILTIN_DEBUG_TRAP)
+ __builtin_debugtrap();
+#elif defined(IREE_PLATFORM_WINDOWS)
+ __debugbreak();
+#elif defined(IREE_ARCH_ARM_32)
+ __asm__ volatile(".inst 0xe7f001f0");
+#elif defined(IREE_ARCH_ARM_64)
+ __asm__ volatile(".inst 0xd4200000");
+#elif defined(IREE_ARCH_X86_32) || defined(IREE_ARCH_X86_64)
+ __asm__ volatile("int $0x03");
+#elif defined(IREE_PLATFORM_EMSCRIPTEN)
+ EM_ASM({ debugger; });
+#else
+ // NOTE: this is unrecoverable and debugging cannot continue.
+ __builtin_trap();
+#endif // IREE_COMPILER_HAS_BUILTIN_DEBUG_TRAP
+}
+
+//===----------------------------------------------------------------------===//
+// Sanitizer interfaces
+//===----------------------------------------------------------------------===//
+// These provide hints to the various -fsanitize= features that help us indicate
+// what our code is doing to prevent false positives and gain additional
+// coverage. By default the sanitizers try to hook platform features like
+// mutexes and threads and our own implementations of those aren't automatically
+// picked up. In addition, specific uses of memory like arenas can thwart tools
+// like ASAN that try to detect accesses to freed memory because we are never
+// actually malloc()'ing and free()'ing and need to tell ASAN when blocks of
+// memory come into/out-of the pool.
+//
+// The documentation on these interfaces is pretty sparse but it's possible to
+// find usage examples of the hooks in the compiler-provided hooks themselves.
+//
+// The headers can be viewed here:
+// https://github.com/llvm/llvm-project/tree/main/compiler-rt/include/sanitizer
+// And common interceptors here:
+// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+//
+// NOTE: don't assume the presence of a sanitizer implies clang+llvm+x86! GCC
+// supports all of the sanitizers and MSVC supports ASAN and almost all of them
+// can be used on non-x86 platforms.
+
+#if defined(IREE_SANITIZER_ADDRESS)
+#include <sanitizer/asan_interface.h>
+#include <sanitizer/lsan_interface.h>
+#endif // IREE_SANITIZER_ADDRESS
+
+// For whenever we want to provide specialized msan/tsan hooks:
+// #if defined(IREE_SANITIZER_MEMORY)
+// #include <sanitizer/msan_interface.h>
+// #endif // IREE_SANITIZER_MEMORY
+// #if defined(IREE_SANITIZER_THREAD)
+// #include <sanitizer/tsan_interface.h>
+// #endif // IREE_SANITIZER_THREAD
+
+// Suppresses leak detection false-positives in a region. May be nested.
+// Do not use this for any IREE-owned code: fix your leaks! This is useful when
+// third-party libraries or system calls may create false positives or just be
+// leaky such as GPU drivers and shader compilers (which are notoriously bad).
+#if defined(IREE_SANITIZER_ADDRESS)
+#define IREE_LEAK_CHECK_DISABLE_PUSH() __lsan_disable()
+#define IREE_LEAK_CHECK_DISABLE_POP() __lsan_enable()
+#else
+#define IREE_LEAK_CHECK_DISABLE_PUSH()
+#define IREE_LEAK_CHECK_DISABLE_POP()
+#endif // IREE_SANITIZER_ADDRESS
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_BASE_INTERNAL_DEBUGGING_H_
diff --git a/runtime/src/iree/base/internal/dynamic_library.h b/runtime/src/iree/base/internal/dynamic_library.h
new file mode 100644
index 0000000..9856269
--- /dev/null
+++ b/runtime/src/iree/base/internal/dynamic_library.h
@@ -0,0 +1,80 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_DYNAMIC_LIBRARY_H_
+#define IREE_BASE_INTERNAL_DYNAMIC_LIBRARY_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Defines the behavior of the dynamic library loader.
+enum iree_dynamic_library_flag_bits_t {
+ IREE_DYNAMIC_LIBRARY_FLAG_NONE = 0u,
+};
+typedef uint32_t iree_dynamic_library_flags_t;
+
+// Dynamic library (aka shared object) cross-platform wrapper.
+typedef struct iree_dynamic_library_t iree_dynamic_library_t;
+
+// Loads a system library using both the system library load paths and the given
+// file name. The path may may be absolute or relative.
+//
+// For process-wide search control the LD_LIBRARY_PATH (Linux) or PATH (Windows)
+// is used in addition to the default search path rules of the platform.
+iree_status_t iree_dynamic_library_load_from_file(
+ const char* file_path, iree_dynamic_library_flags_t flags,
+ iree_allocator_t allocator, iree_dynamic_library_t** out_library);
+
+// Loads a system library using both the system library load paths and the given
+// search path/alternative file names. The paths may may be absolute or
+// relative.
+//
+// For process-wide search control the LD_LIBRARY_PATH (Linux) or PATH (Windows)
+// is used in addition to the default search path rules of the platform.
+iree_status_t iree_dynamic_library_load_from_files(
+ iree_host_size_t search_path_count, const char* const* search_paths,
+ iree_dynamic_library_flags_t flags, iree_allocator_t allocator,
+ iree_dynamic_library_t** out_library);
+
+// Opens a dynamic library from a range of bytes in memory.
+// |identifier| will be used as the module name in debugging/profiling tools.
+// |buffer| must remain live for the lifetime of the library.
+iree_status_t iree_dynamic_library_load_from_memory(
+ iree_string_view_t identifier, iree_const_byte_span_t buffer,
+ iree_dynamic_library_flags_t flags, iree_allocator_t allocator,
+ iree_dynamic_library_t** out_library);
+
+// Retains the given |library| for the caller.
+void iree_dynamic_library_retain(iree_dynamic_library_t* library);
+
+// Releases the given |library| from the caller.
+void iree_dynamic_library_release(iree_dynamic_library_t* library);
+
+// Performs a symbol lookup in the dynamic library exports.
+iree_status_t iree_dynamic_library_lookup_symbol(
+ iree_dynamic_library_t* library, const char* symbol_name, void** out_fn);
+
+// Loads a debug database (PDB/DWARF/etc) from the given path providing debug
+// symbols for this library and attaches it to the symbol store (if active).
+iree_status_t iree_dynamic_library_attach_symbols_from_file(
+ iree_dynamic_library_t* library, const char* file_path);
+
+// Loads a debug database (PDB/DWARF/etc) from a range of bytes in memory and
+// attaches it to the symbol store (if active). |buffer| must remain live for
+// the lifetime of the library.
+iree_status_t iree_dynamic_library_attach_symbols_from_memory(
+ iree_dynamic_library_t* library, iree_const_byte_span_t buffer);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_BASE_INTERNAL_DYNAMIC_LIBRARY_H_
diff --git a/runtime/src/iree/base/internal/dynamic_library_posix.c b/runtime/src/iree/base/internal/dynamic_library_posix.c
new file mode 100644
index 0000000..1e14f32
--- /dev/null
+++ b/runtime/src/iree/base/internal/dynamic_library_posix.c
@@ -0,0 +1,330 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "iree/base/internal/atomics.h"
+#include "iree/base/internal/call_once.h"
+#include "iree/base/internal/dynamic_library.h"
+#include "iree/base/internal/file_path.h"
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+
+#if defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_APPLE) || \
+ defined(IREE_PLATFORM_LINUX) || defined(IREE_PLATFORM_EMSCRIPTEN)
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+struct iree_dynamic_library_t {
+ iree_atomic_ref_count_t ref_count;
+ iree_allocator_t allocator;
+
+ // dlopen shared object handle.
+ void* handle;
+};
+
+// Allocate a new string from |allocator| returned in |out_file_path| containing
+// a path to a unique file on the filesystem.
+static iree_status_t iree_dynamic_library_make_temp_file_path(
+ const char* prefix, const char* extension, iree_allocator_t allocator,
+ const char* tmpdir, char** out_file_path) {
+ // Stamp in a unique file name (replacing XXXXXX in the string).
+ char temp_path[512];
+ if (snprintf(temp_path, sizeof(temp_path), "%s/iree_dylib_XXXXXX", tmpdir) >=
+ sizeof(temp_path)) {
+ // NOTE: we could dynamically allocate things, but didn't seem worth it.
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "TMPDIR name too long (>%zu chars); keep it reasonable",
+ sizeof(temp_path));
+ }
+ int fd = mkstemp(temp_path);
+ if (fd < 0) {
+ return iree_make_status(iree_status_code_from_errno(errno),
+ "unable to mkstemp file");
+ }
+
+ // Allocate storage for the full file path and format it in.
+ int file_path_length =
+ snprintf(NULL, 0, "%s_%s.%s", temp_path, prefix, extension);
+ if (file_path_length < 0) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "unable to form temp path string");
+ }
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc(
+ allocator, file_path_length + /*NUL=*/1, (void**)out_file_path));
+ snprintf(*out_file_path, file_path_length + /*NUL=*/1, "%s_%s.%s", temp_path,
+ prefix, extension);
+
+ // Canonicalize away any double path separators.
+ iree_file_path_canonicalize(*out_file_path, file_path_length);
+
+ return iree_ok_status();
+}
+
+// Creates a temp file and writes the |source_data| into it.
+// The file path is returned in |out_file_path|.
+static iree_status_t iree_dynamic_library_write_temp_file(
+ iree_const_byte_span_t source_data, const char* prefix,
+ const char* extension, iree_allocator_t allocator, const char* tmpdir,
+ char** out_file_path) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Reserve a temp file path we can write to.
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_dynamic_library_make_temp_file_path(prefix, extension, allocator,
+ tmpdir, out_file_path));
+
+ iree_status_t status = iree_ok_status();
+
+ // Open the file for writing.
+ FILE* file_handle = fopen(*out_file_path, "wb");
+ if (file_handle == NULL) {
+ status = iree_make_status(iree_status_code_from_errno(errno),
+ "unable to open file '%s'", *out_file_path);
+ }
+
+ // Write all file bytes.
+ if (iree_status_is_ok(status)) {
+ if (fwrite((char*)source_data.data, source_data.data_length, 1,
+ file_handle) != 1) {
+ status =
+ iree_make_status(iree_status_code_from_errno(errno),
+ "unable to write file span of %zu bytes to '%s'",
+ source_data.data_length, *out_file_path);
+ }
+ }
+
+ if (file_handle != NULL) {
+ fclose(file_handle);
+ file_handle = NULL;
+ }
+ if (!iree_status_is_ok(status)) {
+ iree_allocator_free(allocator, *out_file_path);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// Allocates an iree_dynamic_library_t with the given allocator.
+static iree_status_t iree_dynamic_library_create(
+ void* handle, iree_allocator_t allocator,
+ iree_dynamic_library_t** out_library) {
+ *out_library = NULL;
+
+ iree_dynamic_library_t* library = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, sizeof(*library), (void**)&library));
+ memset(library, 0, sizeof(*library));
+ iree_atomic_ref_count_init(&library->ref_count);
+ library->allocator = allocator;
+ library->handle = handle;
+
+ *out_library = library;
+ return iree_ok_status();
+}
+
+iree_status_t iree_dynamic_library_load_from_file(
+ const char* file_path, iree_dynamic_library_flags_t flags,
+ iree_allocator_t allocator, iree_dynamic_library_t** out_library) {
+ return iree_dynamic_library_load_from_files(1, &file_path, flags, allocator,
+ out_library);
+}
+
+iree_status_t iree_dynamic_library_load_from_files(
+ iree_host_size_t search_path_count, const char* const* search_paths,
+ iree_dynamic_library_flags_t flags, iree_allocator_t allocator,
+ iree_dynamic_library_t** out_library) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(out_library);
+ *out_library = NULL;
+
+ // Try to load the module from the set of search paths provided.
+ void* handle = NULL;
+ iree_host_size_t i = 0;
+ for (i = 0; i < search_path_count; ++i) {
+ handle = dlopen(search_paths[i], RTLD_LAZY | RTLD_LOCAL);
+ if (handle) break;
+ }
+ if (!handle) {
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(IREE_STATUS_NOT_FOUND,
+ "dynamic library not found on any search path");
+ }
+
+ iree_dynamic_library_t* library = NULL;
+ iree_status_t status =
+ iree_dynamic_library_create(handle, allocator, &library);
+
+ if (iree_status_is_ok(status)) {
+ *out_library = library;
+ } else {
+ dlclose(handle);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static iree_once_flag iree_dynamic_library_temp_dir_init_once_flag_ =
+ IREE_ONCE_FLAG_INIT;
+static const char* iree_dynamic_library_temp_dir_path_;
+static bool iree_dynamic_library_temp_dir_valid_;
+static bool iree_dynamic_library_temp_dir_preserve_;
+
+static bool iree_dynamic_library_path_is_null_or_empty(const char* path) {
+ return path == NULL || path[0] == 0;
+}
+
+static void iree_dynamic_library_init_temp_dir(void) {
+ // Semantics of IREE_PRESERVE_DYLIB_TEMP_FILES:
+ // * If the environment variable is not set, temp files are not preserved.
+ // * If the environment variable is set to "1", temp files are preserved to
+ // some default temp directory. The TMPDIR environment variable is used if
+ // set, otherwise a hardcoded default path is used. Example:
+ // $ IREE_PRESERVE_DYLIB_TEMP_FILES=1 iree-run-module ...
+ // * If the environment variable is set to any other string than "1", temp
+ // files
+ // are preserved, and the value of the environment variable is interpreted
+ // as the path of the temporary directory to use. Example:
+ // $ IREE_PRESERVE_DYLIB_TEMP_FILES=/tmp/iree-benchmarks iree-run-module
+ // ...
+ const char* path = getenv("IREE_PRESERVE_DYLIB_TEMP_FILES");
+ bool preserve = !iree_dynamic_library_path_is_null_or_empty(path);
+ if (!path || !strcmp(path, "1")) {
+ // TMPDIR is a unix semi-standard thing. It's even defined by default on
+ // Android for the regular shell user (but not root).
+ path = getenv("TMPDIR");
+ if (iree_dynamic_library_path_is_null_or_empty(path)) {
+#ifdef __ANDROID__
+ path = "/data/local/tmp";
+#else
+ path = "/tmp";
+#endif // __ANDROID__
+ }
+ }
+ iree_dynamic_library_temp_dir_path_ = path;
+ iree_dynamic_library_temp_dir_preserve_ = preserve;
+ // Validate that temp_dir it is the path of a directory. Could fail if it was
+ // user-provided, or on an Android device where /data/local/tmp hasn't been
+ // created yet.
+ struct stat s;
+ iree_dynamic_library_temp_dir_valid_ =
+ stat(path, &s) == 0 && (s.st_mode & S_IFMT) == S_IFDIR;
+}
+
+// TODO(#3845): use dlopen on an fd with either dlopen(/proc/self/fd/NN),
+// fdlopen, or android_dlopen_ext to avoid needing to write the file to disk.
+// Can fallback to memfd_create + dlopen where available, and fallback from
+// that to disk (maybe just windows/mac).
+iree_status_t iree_dynamic_library_load_from_memory(
+ iree_string_view_t identifier, iree_const_byte_span_t buffer,
+ iree_dynamic_library_flags_t flags, iree_allocator_t allocator,
+ iree_dynamic_library_t** out_library) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(out_library);
+ *out_library = NULL;
+
+ iree_call_once(&iree_dynamic_library_temp_dir_init_once_flag_,
+ iree_dynamic_library_init_temp_dir);
+
+ if (!iree_dynamic_library_temp_dir_valid_) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "path of dylib temp files (%s) is not the path of a directory",
+ iree_dynamic_library_temp_dir_path_);
+ }
+
+ // Extract the library to a temp file.
+ char* temp_path = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_dynamic_library_write_temp_file(
+ buffer, "mem_", "so", allocator,
+ iree_dynamic_library_temp_dir_path_, &temp_path));
+
+ // Load using the normal load from file routine.
+ iree_status_t status = iree_dynamic_library_load_from_file(
+ temp_path, flags, allocator, out_library);
+
+ // Unlink the temp file - it's still open by the loader but won't be
+ // accessible to anyone else and will be deleted once the library is
+ // unloaded. Note that we don't remove the file if the user requested we keep
+ // it around for tooling to access.
+ if (!iree_dynamic_library_temp_dir_preserve_) {
+ remove(temp_path);
+ }
+ iree_allocator_free(allocator, temp_path);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_dynamic_library_delete(iree_dynamic_library_t* library) {
+ iree_allocator_t allocator = library->allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ // Leak the library when tracing, since the profiler may still be reading it.
+ // TODO(benvanik): move to an atexit handler instead, verify with ASAN/MSAN
+ // TODO(scotttodd): Make this compatible with testing:
+ // two test cases, one for each function in the same executable
+ // first test case passes, second fails to open the file (already open)
+#else
+ // Close the library first as it may be loaded from one of the temp files we
+ // are about to delete.
+ if (library->handle != NULL) {
+ dlclose(library->handle);
+ }
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+ iree_allocator_free(allocator, library);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_dynamic_library_retain(iree_dynamic_library_t* library) {
+ if (library) {
+ iree_atomic_ref_count_inc(&library->ref_count);
+ }
+}
+
+void iree_dynamic_library_release(iree_dynamic_library_t* library) {
+ if (library && iree_atomic_ref_count_dec(&library->ref_count) == 1) {
+ iree_dynamic_library_delete(library);
+ }
+}
+
+iree_status_t iree_dynamic_library_lookup_symbol(
+ iree_dynamic_library_t* library, const char* symbol_name, void** out_fn) {
+ IREE_ASSERT_ARGUMENT(library);
+ IREE_ASSERT_ARGUMENT(symbol_name);
+ IREE_ASSERT_ARGUMENT(out_fn);
+ *out_fn = NULL;
+ void* fn = dlsym(library->handle, symbol_name);
+ if (!fn) {
+ return iree_make_status(IREE_STATUS_NOT_FOUND,
+ "symbol '%s' not found in library", symbol_name);
+ }
+ *out_fn = fn;
+ return iree_ok_status();
+}
+
+iree_status_t iree_dynamic_library_attach_symbols_from_file(
+ iree_dynamic_library_t* library, const char* file_path) {
+ return iree_ok_status();
+}
+
+iree_status_t iree_dynamic_library_attach_symbols_from_memory(
+ iree_dynamic_library_t* library, iree_const_byte_span_t buffer) {
+ return iree_ok_status();
+}
+
+#endif // IREE_PLATFORM_*
diff --git a/runtime/src/iree/base/internal/dynamic_library_win32.c b/runtime/src/iree/base/internal/dynamic_library_win32.c
new file mode 100644
index 0000000..a55e143
--- /dev/null
+++ b/runtime/src/iree/base/internal/dynamic_library_win32.c
@@ -0,0 +1,417 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "iree/base/internal/atomics.h"
+#include "iree/base/internal/call_once.h"
+#include "iree/base/internal/dynamic_library.h"
+#include "iree/base/internal/file_path.h"
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+
+#if defined(IREE_PLATFORM_WINDOWS)
+
+// TODO(benvanik): support PDB overlays when tracy is not enabled; we'll
+// need to rearrange how the dbghelp lock is handled for that (probably moving
+// it here and having the tracy code redirect to this).
+#if defined(TRACY_ENABLE)
+#define IREE_HAVE_DYNAMIC_LIBRARY_PDB_SUPPORT 1
+#pragma warning(disable : 4091)
+#include <dbghelp.h>
+
+void IREEDbgHelpLock(void);
+void IREEDbgHelpUnlock(void);
+#endif // TRACY_ENABLE
+
+struct iree_dynamic_library_t {
+ iree_atomic_ref_count_t ref_count;
+ iree_allocator_t allocator;
+
+ // Base module name used as an identifier. When loaded from a file this must
+ // be the basename for dbghelp to be able to find symbols.
+ // Owned and allocated as part of the struct upon creation.
+ // Has NUL terminator for compatibility with Windows APIs.
+ char* identifier;
+
+ // File path of the loaded module, if loaded from one.
+ // Owned and allocated as part of the struct upon creation.
+ // Has NUL terminator for compatibility with Windows APIs.
+ char* module_path;
+
+ // Windows module handle.
+ HMODULE module;
+
+ // 0 or more file paths that were created as part of the loading of the
+ // library or attaching of symbols from memory.
+ //
+ // Each path string is allocated using the |allocator| and freed during
+ // library deletion.
+ iree_host_size_t temp_file_count;
+ char* temp_file_paths[2];
+};
+
+static iree_once_flag iree_dynamic_library_temp_path_flag_ =
+ IREE_ONCE_FLAG_INIT;
+static char iree_dynamic_library_temp_path_base_[MAX_PATH + 1];
+static void iree_dynamic_library_init_temp_paths(void) {
+ // Query the temp path from the OS. This can be overridden with the following
+ // environment variables: [TMP, TEMP, USERPROFILE].
+ //
+ // See:
+ // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-gettemppatha
+ char temp_path[MAX_PATH];
+ DWORD temp_path_length = GetTempPathA(IREE_ARRAYSIZE(temp_path), temp_path);
+
+ // Append the process ID to the path; this is like what _mktemp does but
+ // without all the hoops.
+ snprintf(iree_dynamic_library_temp_path_base_,
+ sizeof(iree_dynamic_library_temp_path_base_), "%s\\iree_dylib_%08X",
+ temp_path, GetCurrentProcessId());
+
+ // Canonicalize away any double path separators.
+ iree_file_path_canonicalize(iree_dynamic_library_temp_path_base_,
+ strlen(iree_dynamic_library_temp_path_base_));
+}
+
+// Allocate a new string from |allocator| returned in |out_file_path| containing
+// a path to a unique file on the filesystem.
+static iree_status_t iree_dynamic_library_make_temp_file_path(
+ const char* prefix, const char* extension, iree_allocator_t allocator,
+ char** out_file_path) {
+ // Ensure the root temp paths are queried/initialized.
+ iree_call_once(&iree_dynamic_library_temp_path_flag_,
+ iree_dynamic_library_init_temp_paths);
+
+ // Generate a per-file unique identifier only unique **within** the current
+ // process. We combine this with the _mktemp path that should be unique to the
+ // process itself.
+ static iree_atomic_int32_t next_unique_id = IREE_ATOMIC_VAR_INIT(0);
+ uint32_t unique_id = (uint32_t)iree_atomic_fetch_add_int32(
+ &next_unique_id, 1, iree_memory_order_seq_cst);
+
+ // Allocate storage for the full file path and format it in.
+ int file_path_length =
+ snprintf(NULL, 0, "%s_%s_%08X.%s", iree_dynamic_library_temp_path_base_,
+ prefix, unique_id, extension);
+ if (file_path_length < 0) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "unable to form temp path string");
+ }
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc(
+ allocator, file_path_length + /*NUL=*/1, (void**)out_file_path));
+ snprintf(*out_file_path, file_path_length + /*NUL=*/1, "%s_%s_%08X.%s",
+ iree_dynamic_library_temp_path_base_, prefix, unique_id, extension);
+
+ return iree_ok_status();
+}
+
+// Creates a temp file and writes the |source_data| into it.
+// The file path is returned in |out_file_path|.
+static iree_status_t iree_dynamic_library_write_temp_file(
+ iree_const_byte_span_t source_data, const char* prefix,
+ const char* extension, iree_allocator_t allocator, char** out_file_path) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Reserve a temp file path we can write to.
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_dynamic_library_make_temp_file_path(prefix, extension, allocator,
+ out_file_path));
+
+ iree_status_t status = iree_ok_status();
+
+ // Open the file for writing.
+ HANDLE file_handle = CreateFileA(
+ /*lpFileName=*/*out_file_path, /*dwDesiredAccess=*/GENERIC_WRITE,
+ /*dwShareMode=*/FILE_SHARE_DELETE, /*lpSecurityAttributes=*/NULL,
+ /*dwCreationDisposition=*/CREATE_ALWAYS,
+ /*dwFlagsAndAttributes=*/FILE_ATTRIBUTE_TEMPORARY,
+ /*hTemplateFile=*/NULL);
+ if (file_handle == INVALID_HANDLE_VALUE) {
+ status = iree_make_status(iree_status_code_from_win32_error(GetLastError()),
+ "unable to open file '%s'", *out_file_path);
+ }
+
+ // Write all file bytes.
+ if (iree_status_is_ok(status)) {
+ if (WriteFile(file_handle, source_data.data, (DWORD)source_data.data_length,
+ NULL, NULL) == FALSE) {
+ status =
+ iree_make_status(iree_status_code_from_win32_error(GetLastError()),
+ "unable to write file span of %zu bytes to '%s'",
+ source_data.data_length, *out_file_path);
+ }
+ }
+
+ if (file_handle != NULL) {
+ CloseHandle(file_handle);
+ file_handle = NULL;
+ }
+ if (!iree_status_is_ok(status)) {
+ iree_allocator_free(allocator, *out_file_path);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// Allocates an iree_dynamic_library_t with the given allocator.
+static iree_status_t iree_dynamic_library_create(
+ iree_string_view_t identifier, iree_string_view_t module_path,
+ HMODULE module, iree_allocator_t allocator,
+ iree_dynamic_library_t** out_library) {
+ *out_library = NULL;
+
+ iree_dynamic_library_t* library = NULL;
+ iree_host_size_t total_size =
+ sizeof(*library) + (identifier.size + 1) + (module_path.size + 1);
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, total_size, (void**)&library));
+ memset(library, 0, total_size);
+ iree_atomic_ref_count_init(&library->ref_count);
+ library->allocator = allocator;
+ library->module = module;
+
+ library->identifier = (char*)library + sizeof(*library);
+ memcpy(library->identifier, identifier.data, identifier.size);
+ library->identifier[identifier.size] = 0; // NUL
+
+ library->module_path = library->identifier + (identifier.size + 1);
+ memcpy(library->module_path, module_path.data, module_path.size);
+ library->module_path[module_path.size] = 0; // NUL
+
+ *out_library = library;
+ return iree_ok_status();
+}
+
+iree_status_t iree_dynamic_library_load_from_file(
+ const char* file_path, iree_dynamic_library_flags_t flags,
+ iree_allocator_t allocator, iree_dynamic_library_t** out_library) {
+ return iree_dynamic_library_load_from_files(1, &file_path, flags, allocator,
+ out_library);
+}
+
+iree_status_t iree_dynamic_library_load_from_files(
+ iree_host_size_t search_path_count, const char* const* search_paths,
+ iree_dynamic_library_flags_t flags, iree_allocator_t allocator,
+ iree_dynamic_library_t** out_library) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(out_library);
+ *out_library = NULL;
+
+ // Try to load the module from the set of search paths provided.
+ HMODULE module = NULL;
+ iree_host_size_t i = 0;
+ for (i = 0; i < search_path_count; ++i) {
+ module = LoadLibraryA(search_paths[i]);
+ if (module) break;
+ }
+ if (!module) {
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(IREE_STATUS_NOT_FOUND,
+ "dynamic library not found on any search path");
+ }
+
+ iree_string_view_t file_path = iree_make_cstring_view(search_paths[i]);
+ iree_string_view_t identifier = iree_file_path_basename(file_path);
+
+ iree_dynamic_library_t* library = NULL;
+ iree_status_t status = iree_dynamic_library_create(
+ identifier, file_path, module, allocator, &library);
+
+ if (iree_status_is_ok(status)) {
+ *out_library = library;
+ } else {
+ FreeLibrary(module);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_status_t iree_dynamic_library_load_from_memory(
+ iree_string_view_t identifier, iree_const_byte_span_t buffer,
+ iree_dynamic_library_flags_t flags, iree_allocator_t allocator,
+ iree_dynamic_library_t** out_library) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(out_library);
+ *out_library = NULL;
+
+ // Extract the library to a temp file.
+ char* temp_path = NULL;
+ iree_status_t status = iree_dynamic_library_write_temp_file(
+ buffer, "mem", "dll", allocator, &temp_path);
+
+ if (iree_status_is_ok(status)) {
+ // Load using the normal load from file routine.
+ status = iree_dynamic_library_load_from_file(temp_path, flags, allocator,
+ out_library);
+ }
+ if (iree_status_is_ok(status)) {
+ // Associate the temp path to the library; the temp_path string and the
+ // backing file will be deleted when the library is closed.
+ iree_dynamic_library_t* library = *out_library;
+ library->temp_file_paths[library->temp_file_count++] = temp_path;
+ } else {
+ iree_allocator_free(allocator, temp_path);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_dynamic_library_delete(iree_dynamic_library_t* library) {
+ iree_allocator_t allocator = library->allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ // Leak the library when tracing, since the profiler may still be reading it.
+ // TODO(benvanik): move to an atexit handler instead, verify with ASAN/MSAN
+ // TODO(scotttodd): Make this compatible with testing:
+ // two test cases, one for each function in the same executable
+ // first test case passes, second fails to open the file (already open)
+#else
+ // Close the library first as it may be loaded from one of the temp files we
+ // are about to delete.
+ if (library->module != NULL) {
+ FreeLibrary(library->module);
+ }
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+ // Cleanup all temp files.
+ for (iree_host_size_t i = 0; i < library->temp_file_count; ++i) {
+ char* file_path = library->temp_file_paths[i];
+ DeleteFileA(file_path);
+ iree_allocator_free(allocator, file_path);
+ }
+
+ iree_allocator_free(allocator, library);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_dynamic_library_retain(iree_dynamic_library_t* library) {
+ if (library) {
+ iree_atomic_ref_count_inc(&library->ref_count);
+ }
+}
+
+void iree_dynamic_library_release(iree_dynamic_library_t* library) {
+ if (library && iree_atomic_ref_count_dec(&library->ref_count) == 1) {
+ iree_dynamic_library_delete(library);
+ }
+}
+
+iree_status_t iree_dynamic_library_lookup_symbol(
+ iree_dynamic_library_t* library, const char* symbol_name, void** out_fn) {
+ IREE_ASSERT_ARGUMENT(library);
+ IREE_ASSERT_ARGUMENT(symbol_name);
+ IREE_ASSERT_ARGUMENT(out_fn);
+ *out_fn = NULL;
+ void* fn = GetProcAddress(library->module, symbol_name);
+ if (!fn) {
+ return iree_make_status(IREE_STATUS_NOT_FOUND,
+ "symbol '%s' not found in library", symbol_name);
+ }
+ *out_fn = fn;
+ return iree_ok_status();
+}
+
+#if defined(IREE_HAVE_DYNAMIC_LIBRARY_PDB_SUPPORT)
+
+typedef struct {
+ const char* module_path;
+ DWORD64 module_base;
+ ULONG module_size;
+} ModuleEnumCallbackState;
+
+static BOOL EnumLoadedModulesCallback(PCSTR ModuleName, DWORD64 ModuleBase,
+ ULONG ModuleSize, PVOID UserContext) {
+ ModuleEnumCallbackState* state = (ModuleEnumCallbackState*)UserContext;
+ if (strcmp(ModuleName, state->module_path) != 0) {
+ return TRUE; // not a match; continue
+ }
+ state->module_base = ModuleBase;
+ state->module_size = ModuleSize;
+ return FALSE; // match found; stop enumeration
+}
+
+iree_status_t iree_dynamic_library_attach_symbols_from_file(
+ iree_dynamic_library_t* library, const char* file_path) {
+ IREE_ASSERT_ARGUMENT(library);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ IREEDbgHelpLock();
+
+ // Useful for debugging this logic; will print search paths and results:
+ // SymSetOptions(SYMOPT_LOAD_LINES | SYMOPT_DEBUG);
+
+ // Enumerates all loaded modules in the process to extract the module
+ // base/size parameters we need to overlay the PDB. There's other ways to
+ // get this (such as registering a LdrDllNotification callback and snooping
+ // the values during LoadLibrary or using CreateToolhelp32Snapshot), however
+ // EnumerateLoadedModules is in dbghelp which we are using anyway.
+ ModuleEnumCallbackState state;
+ memset(&state, 0, sizeof(state));
+ state.module_path = library->module_path;
+ EnumerateLoadedModules64(GetCurrentProcess(), EnumLoadedModulesCallback,
+ &state);
+
+ // Load the PDB file and overlay it onto the already-loaded module at the
+ // address range it got loaded into.
+ if (state.module_base != 0) {
+ SymLoadModuleEx(GetCurrentProcess(), NULL, file_path, library->identifier,
+ state.module_base, state.module_size, NULL, 0);
+ }
+
+ IREEDbgHelpUnlock();
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+iree_status_t iree_dynamic_library_attach_symbols_from_memory(
+ iree_dynamic_library_t* library, iree_const_byte_span_t buffer) {
+ IREE_ASSERT_ARGUMENT(library);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ if (library->temp_file_count + 1 > IREE_ARRAYSIZE(library->temp_file_paths)) {
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "too many temp files attached");
+ }
+
+ // Extract the library to a temp file.
+ char* temp_path = NULL;
+ iree_status_t status = iree_dynamic_library_write_temp_file(
+ buffer, "mem_", "pdb", library->allocator, &temp_path);
+ if (iree_status_is_ok(status)) {
+ // Associate the temp path to the library; the temp_path string and the
+ // backing file will be deleted when the library is closed.
+ library->temp_file_paths[library->temp_file_count++] = temp_path;
+
+ // Attempt to attach the extracted temp file to the module.
+ status = iree_dynamic_library_attach_symbols_from_file(library, temp_path);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+#else
+
+iree_status_t iree_dynamic_library_attach_symbols_from_file(
+ iree_dynamic_library_t* library, const char* file_path) {
+ return iree_ok_status();
+}
+
+iree_status_t iree_dynamic_library_attach_symbols_from_memory(
+ iree_dynamic_library_t* library, iree_const_byte_span_t buffer) {
+ return iree_ok_status();
+}
+
+#endif // IREE_HAVE_DYNAMIC_LIBRARY_PDB_SUPPORT
+
+#endif // IREE_PLATFORM_WINDOWS
diff --git a/runtime/src/iree/base/internal/event_pool.c b/runtime/src/iree/base/internal/event_pool.c
new file mode 100644
index 0000000..2cc93d4
--- /dev/null
+++ b/runtime/src/iree/base/internal/event_pool.c
@@ -0,0 +1,166 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/event_pool.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/tracing.h"
+
+struct iree_event_pool_t {
+ // Allocator used to create the event pool.
+ iree_allocator_t host_allocator;
+ // Guards the pool. Since this pool is used to get operating system-level
+ // event objects that will be signaled and waited on using syscalls it's got
+ // relatively low contention: callers are rate limited by how fast they can
+ // signal and wait on the events they get.
+ iree_slim_mutex_t mutex;
+ // Maximum number of events that will be maintained in the pool. More events
+ // may be allocated at any time but when they are no longer needed they will
+ // be disposed directly.
+ iree_host_size_t available_capacity;
+ // Total number of available
+ iree_host_size_t available_count;
+ // Dense left-aligned list of available_count events.
+ iree_event_t available_list[];
+};
+
+iree_status_t iree_event_pool_allocate(iree_host_size_t available_capacity,
+ iree_allocator_t host_allocator,
+ iree_event_pool_t** out_event_pool) {
+ IREE_ASSERT_ARGUMENT(out_event_pool);
+ *out_event_pool = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_event_pool_t* event_pool = NULL;
+ iree_host_size_t total_size =
+ sizeof(*event_pool) +
+ available_capacity * sizeof(event_pool->available_list[0]);
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_allocator_malloc(host_allocator, total_size, (void**)&event_pool));
+ event_pool->host_allocator = host_allocator;
+ event_pool->available_capacity = available_capacity;
+ event_pool->available_count = 0;
+
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < available_capacity; ++i) {
+ status = iree_event_initialize(
+ /*initial_state=*/false,
+ &event_pool->available_list[event_pool->available_count++]);
+ if (!iree_status_is_ok(status)) break;
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_event_pool = event_pool;
+ } else {
+ iree_event_pool_free(event_pool);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_event_pool_free(iree_event_pool_t* event_pool) {
+ iree_allocator_t host_allocator = event_pool->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ for (iree_host_size_t i = 0; i < event_pool->available_count; ++i) {
+ iree_event_deinitialize(&event_pool->available_list[i]);
+ }
+ iree_slim_mutex_deinitialize(&event_pool->mutex);
+ iree_allocator_free(host_allocator, event_pool);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_event_pool_acquire(iree_event_pool_t* event_pool,
+ iree_host_size_t event_count,
+ iree_event_t* out_events) {
+ IREE_ASSERT_ARGUMENT(event_pool);
+ if (!event_count) return iree_ok_status();
+ IREE_ASSERT_ARGUMENT(out_events);
+
+ // We'll try to get what we can from the pool and fall back to initializing
+ // new events.
+ iree_host_size_t remaining_count = event_count;
+
+ // Try first to grab from the pool.
+ iree_slim_mutex_lock(&event_pool->mutex);
+ iree_host_size_t from_pool_count =
+ iree_min(event_pool->available_count, event_count);
+ if (from_pool_count > 0) {
+ iree_host_size_t pool_base_index =
+ event_pool->available_count - from_pool_count;
+ memcpy(out_events, &event_pool->available_list[pool_base_index],
+ from_pool_count * sizeof(iree_event_t));
+ event_pool->available_count -= from_pool_count;
+ remaining_count -= from_pool_count;
+ }
+ iree_slim_mutex_unlock(&event_pool->mutex);
+
+ // Allocate the rest of the events.
+ if (remaining_count > 0) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < remaining_count; ++i) {
+ status = iree_event_initialize(/*initial_state=*/false,
+ &out_events[from_pool_count + i]);
+ if (!iree_status_is_ok(status)) {
+ // Must release all events we've acquired so far.
+ iree_event_pool_release(event_pool, from_pool_count + i, out_events);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+ }
+ IREE_TRACE_ZONE_END(z0);
+ }
+
+ return iree_ok_status();
+}
+
+void iree_event_pool_release(iree_event_pool_t* event_pool,
+ iree_host_size_t event_count,
+ iree_event_t* events) {
+ IREE_ASSERT_ARGUMENT(event_pool);
+ if (!event_count) return;
+ IREE_ASSERT_ARGUMENT(events);
+
+ // We'll try to release all we can back to the pool and then deinitialize
+ // the ones that won't fit.
+ iree_host_size_t remaining_count = event_count;
+
+ // Try first to release to the pool.
+ // Note that we reset the events we add back to the pool so that they are
+ // ready to be acquired again.
+ iree_slim_mutex_lock(&event_pool->mutex);
+ iree_host_size_t to_pool_count =
+ iree_min(event_pool->available_capacity - event_pool->available_count,
+ event_count);
+ if (to_pool_count > 0) {
+ iree_host_size_t pool_base_index = event_pool->available_count;
+ for (iree_host_size_t i = 0; i < to_pool_count; ++i) {
+ iree_event_reset(&events[i]);
+ }
+ memcpy(&event_pool->available_list[pool_base_index], events,
+ to_pool_count * sizeof(iree_event_t));
+ event_pool->available_count += to_pool_count;
+ remaining_count -= to_pool_count;
+ }
+ iree_slim_mutex_unlock(&event_pool->mutex);
+
+ // Deallocate the rest of the events. We don't bother resetting them as we are
+ // getting rid of them.
+ if (remaining_count > 0) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ for (iree_host_size_t i = 0; i < remaining_count; ++i) {
+ iree_event_deinitialize(&events[to_pool_count + i]);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ }
+}
diff --git a/runtime/src/iree/base/internal/event_pool.h b/runtime/src/iree/base/internal/event_pool.h
new file mode 100644
index 0000000..7ac56cb
--- /dev/null
+++ b/runtime/src/iree/base/internal/event_pool.h
@@ -0,0 +1,49 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_EVENT_POOL_H_
+#define IREE_BASE_INTERNAL_EVENT_POOL_H_
+
+#include "iree/base/api.h"
+#include "iree/base/internal/wait_handle.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// A simple pool of iree_event_ts to recycle.
+//
+// Thread-safe; multiple threads may acquire and release events from the pool.
+typedef struct iree_event_pool_t iree_event_pool_t;
+
+// Allocates a new event pool with up to |available_capacity| events.
+iree_status_t iree_event_pool_allocate(iree_host_size_t available_capacity,
+ iree_allocator_t host_allocator,
+ iree_event_pool_t** out_event_pool);
+
+// Deallocates an event pool and destroys all events.
+// All events that were acquired from the pool must have already been released
+// back to it prior to deallocation.
+void iree_event_pool_free(iree_event_pool_t* event_pool);
+
+// Acquires one or more events from the event pool.
+// The returned events will be unsignaled and ready for use. Callers may set and
+// reset the events as much as they want prior to releasing them back to the
+// pool with iree_event_pool_release.
+iree_status_t iree_event_pool_acquire(iree_event_pool_t* event_pool,
+ iree_host_size_t event_count,
+ iree_event_t* out_events);
+
+// Releases one or more events back to the block pool.
+void iree_event_pool_release(iree_event_pool_t* event_pool,
+ iree_host_size_t event_count,
+ iree_event_t* events);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_INTERNAL_EVENT_POOL_H_
diff --git a/runtime/src/iree/base/internal/file_io.c b/runtime/src/iree/base/internal/file_io.c
new file mode 100644
index 0000000..d9b8076
--- /dev/null
+++ b/runtime/src/iree/base/internal/file_io.c
@@ -0,0 +1,276 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/file_io.h"
+
+#include "iree/base/config.h"
+
+#if IREE_FILE_IO_ENABLE
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+
+#if defined(IREE_PLATFORM_WINDOWS)
+#include <fcntl.h>
+#include <io.h>
+#define IREE_SET_BINARY_MODE(handle) _setmode(_fileno(handle), O_BINARY)
+#else
+#define IREE_SET_BINARY_MODE(handle) ((void)0)
+#endif // IREE_PLATFORM_WINDOWS
+
+// We could take alignment as an arg, but roughly page aligned should be
+// acceptable for all uses - if someone cares about memory usage they won't
+// be using this method.
+#define IREE_FILE_BASE_ALIGNMENT 4096
+
+iree_status_t iree_file_exists(const char* path) {
+ IREE_ASSERT_ARGUMENT(path);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ struct stat stat_buf;
+ iree_status_t status =
+ stat(path, &stat_buf) == 0
+ ? iree_ok_status()
+ : iree_make_status(IREE_STATUS_NOT_FOUND, "'%s'", path);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_status_t iree_file_contents_allocator_ctl(void* self,
+ iree_allocator_command_t command,
+ const void* params,
+ void** inout_ptr) {
+ if (command != IREE_ALLOCATOR_COMMAND_FREE) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "file contents deallocator must only be used to "
+ "deallocate file contents");
+ }
+ iree_file_contents_t* contents = (iree_file_contents_t*)self;
+ if (contents->buffer.data != *inout_ptr) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "only the file contents buffer is valid");
+ }
+ iree_allocator_t allocator = contents->allocator;
+ iree_allocator_free(allocator, contents);
+ return iree_ok_status();
+}
+
+iree_allocator_t iree_file_contents_deallocator(
+ iree_file_contents_t* contents) {
+ iree_allocator_t allocator = {
+ .self = contents,
+ .ctl = iree_file_contents_allocator_ctl,
+ };
+ return allocator;
+}
+
+void iree_file_contents_free(iree_file_contents_t* contents) {
+ if (!contents) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_allocator_free(contents->allocator, contents);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_file_read_contents_impl(
+ FILE* file, iree_allocator_t allocator,
+ iree_file_contents_t** out_contents) {
+ // Seek to the end of the file.
+ if (fseek(file, 0, SEEK_END) == -1) {
+ return iree_make_status(iree_status_code_from_errno(errno), "seek (end)");
+ }
+
+ // Query the position, telling us the total file length in bytes.
+ size_t file_size = ftell(file);
+ if (file_size == -1L) {
+ return iree_make_status(iree_status_code_from_errno(errno), "size query");
+ }
+
+ // Seek back to the file start.
+ if (fseek(file, 0, SEEK_SET) == -1) {
+ return iree_make_status(iree_status_code_from_errno(errno), "seek (beg)");
+ }
+
+ // Compute total size with alignment padding.
+ // We allocate +1 to force a trailing \0 in case this is used as a cstring.
+ iree_file_contents_t* contents = NULL;
+ iree_host_size_t total_size =
+ sizeof(*contents) + IREE_FILE_BASE_ALIGNMENT + file_size + /*NUL*/ 1;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, total_size, (void**)&contents));
+
+ contents->allocator = allocator;
+ contents->buffer.data = (void*)iree_host_align(
+ (uintptr_t)contents + sizeof(*contents), IREE_FILE_BASE_ALIGNMENT);
+ contents->buffer.data_length = file_size;
+
+ // Attempt to read the file into memory.
+ if (fread(contents->buffer.data, file_size, 1, file) != 1) {
+ iree_allocator_free(allocator, contents);
+ return iree_make_status(iree_status_code_from_errno(errno),
+ "unable to read entire %zu file bytes", file_size);
+ }
+
+ // Add trailing NUL to make the contents C-string compatible.
+ contents->buffer.data[file_size] = 0; // NUL
+ *out_contents = contents;
+ return iree_ok_status();
+}
+
+iree_status_t iree_file_read_contents(const char* path,
+ iree_allocator_t allocator,
+ iree_file_contents_t** out_contents) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(path);
+ IREE_ASSERT_ARGUMENT(out_contents);
+ *out_contents = NULL;
+
+ FILE* file = fopen(path, "rb");
+ if (file == NULL) {
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(iree_status_code_from_errno(errno),
+ "failed to open file '%s'", path);
+ }
+
+ // Read the file contents into memory.
+ iree_status_t status =
+ iree_file_read_contents_impl(file, allocator, out_contents);
+ if (!iree_status_is_ok(status)) {
+ status = iree_status_annotate_f(status, "reading file '%s'", path);
+ }
+
+ fclose(file);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_status_t iree_file_write_contents(const char* path,
+ iree_const_byte_span_t content) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(path);
+
+ FILE* file = fopen(path, "wb");
+ if (file == NULL) {
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(iree_status_code_from_errno(errno),
+ "failed to open file '%s'", path);
+ }
+
+ int ret = fwrite((char*)content.data, content.data_length, 1, file);
+ iree_status_t status = iree_ok_status();
+ if (ret != 1) {
+ status =
+ iree_make_status(IREE_STATUS_DATA_LOSS,
+ "unable to write file contents of %zu bytes to '%s'",
+ content.data_length, path);
+ }
+
+ fclose(file);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static iree_status_t iree_stdin_read_contents_impl(
+ iree_allocator_t allocator, iree_file_contents_t** out_contents) {
+ // HACK: fix stdin mode to binary on Windows to match Unix behavior.
+ // Ideally we'd do this in one place for all our tools.
+ IREE_SET_BINARY_MODE(stdin);
+
+ iree_host_size_t capacity = 4096;
+ iree_file_contents_t* contents = NULL;
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc(
+ allocator, sizeof(*contents) + IREE_FILE_BASE_ALIGNMENT + capacity,
+ (void**)&contents));
+ contents->buffer.data = (void*)iree_host_align(
+ (uintptr_t)contents + sizeof(*contents), IREE_FILE_BASE_ALIGNMENT);
+
+ iree_host_size_t size = 0;
+ for (int c = getchar(); c != EOF; c = getchar()) {
+ if (size >= capacity - /*NUL*/ 1) {
+ // NOTE: if we realloc we may end up with a new alignment and need to move
+ // the data around.
+ uintptr_t old_offset =
+ (uintptr_t)contents->buffer.data - (uintptr_t)contents;
+ iree_host_size_t new_capacity = capacity * 2;
+ iree_file_contents_t* new_contents = contents;
+ iree_status_t status = iree_allocator_realloc(
+ allocator,
+ sizeof(*new_contents) + IREE_FILE_BASE_ALIGNMENT + new_capacity,
+ (void**)&new_contents);
+ if (!iree_status_is_ok(status)) {
+ iree_allocator_free(allocator, contents);
+ return status;
+ }
+ contents = new_contents;
+ uint8_t* old_data = (uint8_t*)new_contents + old_offset;
+ uint8_t* new_data = (uint8_t*)iree_host_align(
+ (uintptr_t)new_contents + sizeof(*new_contents),
+ IREE_FILE_BASE_ALIGNMENT);
+ if (new_data != old_data) {
+ // Alignment changed; move the data with safety for overlapping.
+ memmove(new_data, old_data, size);
+ }
+ contents->buffer.data = new_data;
+ capacity = new_capacity;
+ }
+ contents->buffer.data[size++] = c;
+ }
+
+ contents->allocator = allocator;
+ contents->buffer.data[size] = 0; // NUL
+ contents->buffer.data_length = size;
+ *out_contents = contents;
+ return iree_ok_status();
+}
+
+iree_status_t iree_stdin_read_contents(iree_allocator_t allocator,
+ iree_file_contents_t** out_contents) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(out_contents);
+ *out_contents = NULL;
+ iree_status_t status = iree_stdin_read_contents_impl(allocator, out_contents);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+#else
+
+iree_status_t iree_file_exists(const char* path) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE, "File I/O is disabled");
+}
+
+iree_allocator_t iree_file_contents_deallocator(
+ iree_file_contents_t* contents) {
+ return iree_allocator_null();
+}
+
+void iree_file_contents_free(iree_file_contents_t* contents) {}
+
+iree_status_t iree_file_read_contents(const char* path,
+ iree_allocator_t allocator,
+ iree_file_contents_t** out_contents) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE, "File I/O is disabled");
+}
+
+iree_status_t iree_file_write_contents(const char* path,
+ iree_const_byte_span_t content) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE, "File I/O is disabled");
+}
+
+iree_status_t iree_stdin_read_contents(iree_allocator_t allocator,
+ iree_file_contents_t** out_contents) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE, "File I/O is disabled");
+}
+
+#endif // IREE_FILE_IO_ENABLE
diff --git a/runtime/src/iree/base/internal/file_io.h b/runtime/src/iree/base/internal/file_io.h
new file mode 100644
index 0000000..3418c62
--- /dev/null
+++ b/runtime/src/iree/base/internal/file_io.h
@@ -0,0 +1,68 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_FILE_IO_H_
+#define IREE_BASE_INTERNAL_FILE_IO_H_
+
+#include "iree/base/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Checks if a file exists at the provided |path|.
+//
+// Returns an OK status if the file definitely exists. An OK status does not
+// indicate that attempts to read or write the file will succeed.
+// Returns IREE_STATUS_NOT_FOUND if the file does not exist.
+iree_status_t iree_file_exists(const char* path);
+
+// Loaded file contents.
+typedef struct iree_file_contents_t {
+ iree_allocator_t allocator;
+ union {
+ iree_byte_span_t buffer;
+ iree_const_byte_span_t const_buffer;
+ };
+} iree_file_contents_t;
+
+// Returns an allocator that deallocates the |contents|.
+// This can be passed to functions that require a deallocation mechanism.
+iree_allocator_t iree_file_contents_deallocator(iree_file_contents_t* contents);
+
+// Frees memory associated with |contents|.
+void iree_file_contents_free(iree_file_contents_t* contents);
+
+// Synchronously reads a file's contents into memory.
+//
+// Returns the contents of the file in |out_contents|.
+// |allocator| is used to allocate the memory and the caller must use
+// iree_file_contents_free to release the memory.
+iree_status_t iree_file_read_contents(const char* path,
+ iree_allocator_t allocator,
+ iree_file_contents_t** out_contents);
+
+// Synchronously writes a byte buffer into a file.
+// Existing contents are overwritten.
+iree_status_t iree_file_write_contents(const char* path,
+ iree_const_byte_span_t content);
+
+// Reads the contents of stdin until EOF into memory.
+// The contents will specify up until EOF and the allocation will have a
+// trailing NUL to allow use as a C-string (assuming the contents themselves
+// don't contain NUL).
+//
+// Returns the contents of the file in |out_contents|.
+// |allocator| is used to allocate the memory and the caller must use
+// iree_file_contents_free to release the memory.
+iree_status_t iree_stdin_read_contents(iree_allocator_t allocator,
+ iree_file_contents_t** out_contents);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_BASE_INTERNAL_FILE_IO_H_
diff --git a/runtime/src/iree/base/internal/file_io_test.cc b/runtime/src/iree/base/internal/file_io_test.cc
new file mode 100644
index 0000000..fd975a6
--- /dev/null
+++ b/runtime/src/iree/base/internal/file_io_test.cc
@@ -0,0 +1,82 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/file_io.h"
+
+#include "iree/base/config.h"
+
+#if IREE_FILE_IO_ENABLE
+
+#include <cstdlib>
+#include <cstring>
+#include <ostream>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+#include "iree/base/logging.h"
+#include "iree/base/status_cc.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace file_io {
+namespace {
+
+using ::iree::testing::status::StatusIs;
+
+std::string GetUniquePath(const char* unique_name) {
+ char* test_tmpdir = getenv("TEST_TMPDIR");
+ if (!test_tmpdir) {
+ test_tmpdir = getenv("TMPDIR");
+ }
+ if (!test_tmpdir) {
+ test_tmpdir = getenv("TEMP");
+ }
+ IREE_CHECK(test_tmpdir) << "TEST_TMPDIR/TMPDIR/TEMP not defined";
+ return test_tmpdir + std::string("/iree_test_") + unique_name;
+}
+
+std::string GetUniqueContents(const char* unique_name) {
+ return std::string("Test with name ") + unique_name + "\n";
+}
+
+TEST(FileIO, ReadWriteContents) {
+ constexpr const char* kUniqueName = "ReadWriteContents";
+ auto path = GetUniquePath(kUniqueName);
+
+ // File must not exist.
+ iree_status_t status = iree_file_exists(path.c_str());
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_NOT_FOUND, status);
+ iree_status_free(status);
+
+ // Generate file contents.
+ auto write_contents = GetUniqueContents(kUniqueName);
+
+ // Write the contents to disk.
+ IREE_ASSERT_OK(iree_file_write_contents(
+ path.c_str(),
+ iree_make_const_byte_span(write_contents.data(), write_contents.size())));
+
+ // Read the contents from disk.
+ iree_file_contents_t* read_contents = NULL;
+ IREE_ASSERT_OK(iree_file_read_contents(path.c_str(), iree_allocator_system(),
+ &read_contents));
+
+ // Expect the contents are equal.
+ EXPECT_EQ(write_contents.size(), read_contents->const_buffer.data_length);
+ EXPECT_EQ(memcmp(write_contents.data(), read_contents->const_buffer.data,
+ read_contents->const_buffer.data_length),
+ 0);
+
+ iree_file_contents_free(read_contents);
+}
+
+} // namespace
+} // namespace file_io
+} // namespace iree
+
+#endif // IREE_FILE_IO_ENABLE
diff --git a/runtime/src/iree/base/internal/file_path.c b/runtime/src/iree/base/internal/file_path.c
new file mode 100644
index 0000000..0499ec0
--- /dev/null
+++ b/runtime/src/iree/base/internal/file_path.c
@@ -0,0 +1,220 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/file_path.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/target_platform.h"
+
+static iree_status_t iree_string_view_dup(iree_string_view_t value,
+ iree_allocator_t allocator,
+ char** out_buffer) {
+ char* buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, value.size + 1, (void**)&buffer));
+ memcpy(buffer, value.data, value.size);
+ buffer[value.size] = 0; // NUL
+ *out_buffer = buffer;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_string_view_cat(iree_string_view_t lhs,
+ iree_string_view_t rhs,
+ iree_allocator_t allocator,
+ char** out_buffer) {
+ // Allocate storage buffer with NUL character.
+ iree_host_size_t total_length = lhs.size + rhs.size;
+ char* buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, total_length + 1, (void**)&buffer));
+
+ // Copy both parts.
+ memcpy(buffer, lhs.data, lhs.size);
+ memcpy(buffer + lhs.size, rhs.data, rhs.size);
+
+ buffer[total_length] = 0; // NUL
+ *out_buffer = buffer;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_string_view_join(iree_host_size_t part_count,
+ const iree_string_view_t* parts,
+ iree_string_view_t separator,
+ iree_allocator_t allocator,
+ char** out_buffer) {
+ // Compute total output size in characters.
+ iree_host_size_t total_length = 0;
+ for (iree_host_size_t i = 0; i < part_count; ++i) {
+ total_length += parts[i].size;
+ }
+ total_length += part_count > 0 ? separator.size * (part_count - 1) : 0;
+
+ // Allocate storage buffer with NUL character.
+ char* buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, total_length + 1, (void**)&buffer));
+
+ // Append each part and a separator between each.
+ char* p = buffer;
+ for (iree_host_size_t i = 0; i < part_count; ++i) {
+ memcpy(p, parts[i].data, parts[i].size);
+ p += parts[i].size;
+ if (i != part_count - 1) {
+ memcpy(p, separator.data, separator.size);
+ p += separator.size;
+ }
+ }
+
+ buffer[total_length] = 0; // NUL
+ *out_buffer = buffer;
+ return iree_ok_status();
+}
+
+static iree_host_size_t iree_file_path_canonicalize_unix(
+ char* path, iree_host_size_t path_length) {
+ char* p = path;
+ iree_host_size_t new_length = path_length;
+
+ // Replace `//` with `/`.
+ if (new_length > 1) {
+ for (iree_host_size_t i = 0; i < new_length - 1; ++i) {
+ if (p[i] == '/' && p[i + 1] == '/') {
+ memmove(&p[i + 1], &p[i + 2], new_length - i - 2);
+ --new_length;
+ --i;
+ }
+ }
+ }
+
+ path[new_length] = 0; // NUL
+ return new_length;
+}
+
+static iree_host_size_t iree_file_path_canonicalize_win32(
+ char* path, iree_host_size_t path_length) {
+ char* p = path;
+ iree_host_size_t new_length = path_length;
+
+ // Replace `/` with `\`.
+ for (iree_host_size_t i = 0; i < new_length; ++i) {
+ if (p[i] == '/') p[i] = '\\';
+ }
+
+ // Replace `\\` with `\`.
+ if (new_length > 1) {
+ for (iree_host_size_t i = 0; i < new_length - 1; ++i) {
+ if (p[i] == '\\' && p[i + 1] == '\\') {
+ memmove(&p[i + 1], &p[i + 2], new_length - i - 2);
+ --new_length;
+ --i;
+ }
+ }
+ }
+
+ path[new_length] = 0; // NUL
+ return new_length;
+}
+
+iree_host_size_t iree_file_path_canonicalize(char* path,
+ iree_host_size_t path_length) {
+#if defined(IREE_PLATFORM_WINDOWS)
+ return iree_file_path_canonicalize_win32(path, path_length);
+#else
+ return iree_file_path_canonicalize_unix(path, path_length);
+#endif // IREE_PLATFORM_WINDOWS
+}
+
+iree_status_t iree_file_path_join(iree_string_view_t lhs,
+ iree_string_view_t rhs,
+ iree_allocator_t allocator, char** out_path) {
+ if (iree_string_view_is_empty(lhs)) {
+ return iree_string_view_dup(rhs, allocator, out_path);
+ }
+ if (iree_string_view_is_empty(rhs)) {
+ return iree_string_view_dup(lhs, allocator, out_path);
+ }
+ if (lhs.data[lhs.size - 1] == '/') {
+ if (rhs.data[0] == '/') {
+ return iree_string_view_cat(
+ lhs, iree_string_view_substr(rhs, 1, IREE_STRING_VIEW_NPOS),
+ allocator, out_path);
+ }
+ } else {
+ if (rhs.data[0] != '/') {
+ iree_string_view_t parts[2] = {lhs, rhs};
+ return iree_string_view_join(IREE_ARRAYSIZE(parts), parts,
+ iree_make_cstring_view("/"), allocator,
+ out_path);
+ }
+ }
+ return iree_string_view_cat(lhs, rhs, allocator, out_path);
+}
+
+void iree_file_path_split(iree_string_view_t path,
+ iree_string_view_t* out_dirname,
+ iree_string_view_t* out_basename) {
+ iree_host_size_t pos = iree_string_view_find_last_of(
+ path, iree_make_cstring_view("/"), IREE_STRING_VIEW_NPOS);
+ if (pos == IREE_STRING_VIEW_NPOS) {
+ // No '/' in path.
+ *out_dirname = iree_string_view_empty();
+ *out_basename = path;
+ } else if (pos == 0) {
+ // Single leading '/' in path.
+ *out_dirname = iree_string_view_substr(path, 0, 1);
+ *out_basename = iree_string_view_substr(path, 1, IREE_STRING_VIEW_NPOS);
+ } else {
+ *out_dirname = iree_string_view_substr(path, 0, pos);
+ *out_basename =
+ iree_string_view_substr(path, pos + 1, IREE_STRING_VIEW_NPOS);
+ }
+}
+
+iree_string_view_t iree_file_path_dirname(iree_string_view_t path) {
+ iree_string_view_t dirname = iree_string_view_empty();
+ iree_string_view_t basename = iree_string_view_empty();
+ iree_file_path_split(path, &dirname, &basename);
+ return dirname;
+}
+
+iree_string_view_t iree_file_path_basename(iree_string_view_t path) {
+ iree_string_view_t dirname = iree_string_view_empty();
+ iree_string_view_t basename = iree_string_view_empty();
+ iree_file_path_split(path, &dirname, &basename);
+ return basename;
+}
+
+void iree_file_path_split_basename(iree_string_view_t path,
+ iree_string_view_t* out_stem,
+ iree_string_view_t* out_extension) {
+ path = iree_file_path_basename(path);
+ iree_host_size_t pos = iree_string_view_find_last_of(
+ path, iree_make_cstring_view("."), IREE_STRING_VIEW_NPOS);
+ if (pos == IREE_STRING_VIEW_NPOS) {
+ *out_stem = path;
+ *out_extension = iree_string_view_empty();
+ } else {
+ *out_stem = iree_string_view_substr(path, 0, pos);
+ *out_extension =
+ iree_string_view_substr(path, pos + 1, IREE_STRING_VIEW_NPOS);
+ }
+}
+
+iree_string_view_t iree_file_path_stem(iree_string_view_t path) {
+ iree_string_view_t stem = iree_string_view_empty();
+ iree_string_view_t extension = iree_string_view_empty();
+ iree_file_path_split_basename(path, &stem, &extension);
+ return stem;
+}
+
+iree_string_view_t iree_file_path_extension(iree_string_view_t path) {
+ iree_string_view_t stem = iree_string_view_empty();
+ iree_string_view_t extension = iree_string_view_empty();
+ iree_file_path_split_basename(path, &stem, &extension);
+ return extension;
+}
diff --git a/runtime/src/iree/base/internal/file_path.h b/runtime/src/iree/base/internal/file_path.h
new file mode 100644
index 0000000..9893566
--- /dev/null
+++ b/runtime/src/iree/base/internal/file_path.h
@@ -0,0 +1,70 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_FILE_PATH_H_
+#define IREE_BASE_INTERNAL_FILE_PATH_H_
+
+#include "iree/base/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Canonicalizes the given |path| to the platform convention by replacing `/`
+// with the appropriate character (`\` on Windows) and stripping extraneous
+// slashes that may have ended up in the filename.
+//
+// NOTE: this is *not* the same as canonicalizing the path via system utilities
+// that may, for example, resolve network paths or symlinks.
+//
+// |path| (of character length |path_length|) is mutated in-place and will have
+// the same or smaller length upon return. Returns the new length of the path. A
+// NUL terminator will be ensured at the end.
+iree_host_size_t iree_file_path_canonicalize(char* path,
+ iree_host_size_t path_length);
+
+// Joins two paths together by inserting `/` as needed.
+//
+// For example:
+// iree_file_path_join('foo', 'bar') --> 'foo/bar'
+// iree_file_path_join('/foo/', '/bar') --> '/foo/bar'
+//
+// Returns the canonicalized path allocated from |allocator| in |out_path|.
+// Callers must free the string when they are done with it.
+iree_status_t iree_file_path_join(iree_string_view_t lhs,
+ iree_string_view_t rhs,
+ iree_allocator_t allocator, char** out_path);
+
+// Splits |path| into the dirname and basename at the final `/`.
+void iree_file_path_split(iree_string_view_t path,
+ iree_string_view_t* out_dirname,
+ iree_string_view_t* out_basename);
+
+// Gets the directory name component of a file |path| (everything before the
+// final `/`).
+iree_string_view_t iree_file_path_dirname(iree_string_view_t path);
+
+// Returns the part of the |path| after the final `/`.
+iree_string_view_t iree_file_path_basename(iree_string_view_t path);
+
+// Returns the parts of the basename of path, split on the final `.`.
+// If there is no `.` in the basename or `.` is the final character in the
+// basename the second value will be empty.
+void iree_file_path_split_basename(iree_string_view_t path,
+ iree_string_view_t* out_stem,
+ iree_string_view_t* out_extension);
+
+// Returns the part of the basename of |path| prior to the final `.`.
+iree_string_view_t iree_file_path_stem(iree_string_view_t path);
+
+// Returns the part of the basename of |path| after to the final `.`.
+iree_string_view_t iree_file_path_extension(iree_string_view_t path);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_BASE_INTERNAL_FILE_PATH_H_
diff --git a/runtime/src/iree/base/internal/file_path_test.cc b/runtime/src/iree/base/internal/file_path_test.cc
new file mode 100644
index 0000000..456431d
--- /dev/null
+++ b/runtime/src/iree/base/internal/file_path_test.cc
@@ -0,0 +1,170 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/file_path.h"
+
+#include <string>
+
+#include "iree/base/target_platform.h"
+#include "iree/testing/gtest.h"
+
+namespace {
+
+#define _SV(str) iree_make_cstring_view(str)
+
+#define EXPECT_SV_EQ(actual, expected) \
+ EXPECT_TRUE(iree_string_view_equal(actual, expected))
+
+TEST(FilePathTest, Canonicalize) {
+ auto canonicalize = [](std::string value) {
+ value.resize(
+ iree_file_path_canonicalize((char*)value.data(), value.size()));
+ return value;
+ };
+ EXPECT_EQ(canonicalize(""), "");
+ EXPECT_EQ(canonicalize("a"), "a");
+ EXPECT_EQ(canonicalize("ab"), "ab");
+
+#if defined(IREE_PLATFORM_WINDOWS)
+ EXPECT_EQ(canonicalize("/"), "\\");
+ EXPECT_EQ(canonicalize("\\"), "\\");
+ EXPECT_EQ(canonicalize("a/b"), "a\\b");
+ EXPECT_EQ(canonicalize("a//b"), "a\\b");
+ EXPECT_EQ(canonicalize("a////b"), "a\\b");
+ EXPECT_EQ(canonicalize("a\\//b"), "a\\b");
+ EXPECT_EQ(canonicalize("a\\\\b"), "a\\b");
+ EXPECT_EQ(canonicalize("\\a"), "\\a");
+ EXPECT_EQ(canonicalize("/a"), "\\a");
+ EXPECT_EQ(canonicalize("//a"), "\\a");
+ EXPECT_EQ(canonicalize("a/"), "a\\");
+ EXPECT_EQ(canonicalize("a//"), "a\\");
+#else
+ EXPECT_EQ(canonicalize("/"), "/");
+ EXPECT_EQ(canonicalize("a/b"), "a/b");
+ EXPECT_EQ(canonicalize("a//b"), "a/b");
+ EXPECT_EQ(canonicalize("a////b"), "a/b");
+ EXPECT_EQ(canonicalize("/a"), "/a");
+ EXPECT_EQ(canonicalize("//a"), "/a");
+ EXPECT_EQ(canonicalize("a/"), "a/");
+ EXPECT_EQ(canonicalize("a//"), "a/");
+#endif // IREE_PLATFORM_WINDOWS
+}
+
+static std::string JoinPaths(std::string lhs, std::string rhs) {
+ char* result_str = NULL;
+ IREE_IGNORE_ERROR(
+ iree_file_path_join(iree_make_string_view(lhs.data(), lhs.size()),
+ iree_make_string_view(rhs.data(), rhs.size()),
+ iree_allocator_system(), &result_str));
+ std::string result;
+ result.resize(strlen(result_str));
+ memcpy((char*)result.data(), result_str, result.size());
+ iree_allocator_free(iree_allocator_system(), result_str);
+ return result;
+}
+
+TEST(FilePathTest, JoinPathsEmpty) {
+ EXPECT_EQ(JoinPaths("", ""), "");
+ EXPECT_EQ(JoinPaths("", "bar"), "bar");
+ EXPECT_EQ(JoinPaths("foo", ""), "foo");
+}
+
+TEST(FilePathTest, JoinPathsSlash) {
+ EXPECT_EQ(JoinPaths("foo", "bar"), "foo/bar");
+ EXPECT_EQ(JoinPaths("foo", "bar/"), "foo/bar/");
+ EXPECT_EQ(JoinPaths("foo", "/bar"), "foo/bar");
+ EXPECT_EQ(JoinPaths("foo", "/bar/"), "foo/bar/");
+
+ EXPECT_EQ(JoinPaths("foo/", "bar"), "foo/bar");
+ EXPECT_EQ(JoinPaths("foo/", "bar/"), "foo/bar/");
+ EXPECT_EQ(JoinPaths("foo/", "/bar"), "foo/bar");
+ EXPECT_EQ(JoinPaths("foo/", "/bar/"), "foo/bar/");
+
+ EXPECT_EQ(JoinPaths("/foo", "bar"), "/foo/bar");
+ EXPECT_EQ(JoinPaths("/foo", "bar/"), "/foo/bar/");
+ EXPECT_EQ(JoinPaths("/foo", "/bar"), "/foo/bar");
+ EXPECT_EQ(JoinPaths("/foo", "/bar/"), "/foo/bar/");
+
+ EXPECT_EQ(JoinPaths("/foo/", "bar"), "/foo/bar");
+ EXPECT_EQ(JoinPaths("/foo/", "bar/"), "/foo/bar/");
+ EXPECT_EQ(JoinPaths("/foo/", "/bar"), "/foo/bar");
+ EXPECT_EQ(JoinPaths("/foo/", "/bar/"), "/foo/bar/");
+}
+
+TEST(FilePathTest, JoinPathsDoubleSlash) {
+ EXPECT_EQ(JoinPaths("foo//", "bar"), "foo//bar");
+ EXPECT_EQ(JoinPaths("foo", "//bar"), "foo//bar");
+}
+
+TEST(FilePathTest, DirnameEmpty) {
+ EXPECT_SV_EQ(iree_file_path_dirname(_SV("")), _SV(""));
+}
+
+TEST(FilePathTest, DirnameAbsolute) {
+ EXPECT_SV_EQ(iree_file_path_dirname(_SV("/")), _SV("/"));
+ EXPECT_SV_EQ(iree_file_path_dirname(_SV("/foo")), _SV("/"));
+ EXPECT_SV_EQ(iree_file_path_dirname(_SV("/foo/")), _SV("/foo"));
+ EXPECT_SV_EQ(iree_file_path_dirname(_SV("/foo/bar")), _SV("/foo"));
+ EXPECT_SV_EQ(iree_file_path_dirname(_SV("/foo/bar/")), _SV("/foo/bar"));
+}
+
+TEST(FilePathTest, DirnameRelative) {
+ EXPECT_SV_EQ(iree_file_path_dirname(_SV("foo")), _SV(""));
+ EXPECT_SV_EQ(iree_file_path_dirname(_SV("foo/")), _SV("foo"));
+ EXPECT_SV_EQ(iree_file_path_dirname(_SV("foo/bar")), _SV("foo"));
+ EXPECT_SV_EQ(iree_file_path_dirname(_SV("foo/bar/")), _SV("foo/bar"));
+}
+
+TEST(FilePathTest, DirnameDoubleSlash) {
+ EXPECT_SV_EQ(iree_file_path_dirname(_SV("foo//")), _SV("foo/"));
+}
+
+TEST(FilePathTest, BasenameEmpty) {
+ EXPECT_SV_EQ(iree_file_path_basename(_SV("")), _SV(""));
+}
+
+TEST(FilePathTest, BasenameAbsolute) {
+ EXPECT_SV_EQ(iree_file_path_basename(_SV("/")), _SV(""));
+ EXPECT_SV_EQ(iree_file_path_basename(_SV("/foo")), _SV("foo"));
+ EXPECT_SV_EQ(iree_file_path_basename(_SV("/foo/")), _SV(""));
+ EXPECT_SV_EQ(iree_file_path_basename(_SV("/foo/bar")), _SV("bar"));
+ EXPECT_SV_EQ(iree_file_path_basename(_SV("/foo/bar/")), _SV(""));
+}
+
+TEST(FilePathTest, BasenameRelative) {
+ EXPECT_SV_EQ(iree_file_path_basename(_SV("foo")), _SV("foo"));
+ EXPECT_SV_EQ(iree_file_path_basename(_SV("foo/")), _SV(""));
+ EXPECT_SV_EQ(iree_file_path_basename(_SV("foo/bar")), _SV("bar"));
+ EXPECT_SV_EQ(iree_file_path_basename(_SV("foo/bar/")), _SV(""));
+}
+
+TEST(FilePathTest, BasenameDoubleSlash) {
+ EXPECT_SV_EQ(iree_file_path_basename(_SV("foo//")), _SV(""));
+}
+
+TEST(FilePathTest, Stem) {
+ EXPECT_SV_EQ(iree_file_path_stem(_SV("")), _SV(""));
+ EXPECT_SV_EQ(iree_file_path_stem(_SV("foo")), _SV("foo"));
+ EXPECT_SV_EQ(iree_file_path_stem(_SV("foo.")), _SV("foo"));
+ EXPECT_SV_EQ(iree_file_path_stem(_SV("foo.bar")), _SV("foo"));
+ EXPECT_SV_EQ(iree_file_path_stem(_SV("foo..")), _SV("foo."));
+ EXPECT_SV_EQ(iree_file_path_stem(_SV("foo..bar")), _SV("foo."));
+ EXPECT_SV_EQ(iree_file_path_stem(_SV(".bar")), _SV(""));
+ EXPECT_SV_EQ(iree_file_path_stem(_SV("..bar")), _SV("."));
+}
+
+TEST(FilePathTest, Extension) {
+ EXPECT_SV_EQ(iree_file_path_extension(_SV("")), _SV(""));
+ EXPECT_SV_EQ(iree_file_path_extension(_SV("foo")), _SV(""));
+ EXPECT_SV_EQ(iree_file_path_extension(_SV("foo.")), _SV(""));
+ EXPECT_SV_EQ(iree_file_path_extension(_SV("foo.bar")), _SV("bar"));
+ EXPECT_SV_EQ(iree_file_path_extension(_SV("foo..")), _SV(""));
+ EXPECT_SV_EQ(iree_file_path_extension(_SV("foo..bar")), _SV("bar"));
+ EXPECT_SV_EQ(iree_file_path_extension(_SV(".bar")), _SV("bar"));
+ EXPECT_SV_EQ(iree_file_path_extension(_SV("..bar")), _SV("bar"));
+}
+
+} // namespace
diff --git a/runtime/src/iree/base/internal/flags.c b/runtime/src/iree/base/internal/flags.c
new file mode 100644
index 0000000..7f1c961
--- /dev/null
+++ b/runtime/src/iree/base/internal/flags.c
@@ -0,0 +1,545 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/flags.h"
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+
+#if IREE_FLAGS_ENABLE_CLI == 1
+
+#include "iree/base/internal/debugging.h"
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// Flag manipulation utilities
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_flags_leaky_allocator_ctl(
+ void* self, iree_allocator_command_t command, const void* params,
+ void** inout_ptr) {
+ IREE_LEAK_CHECK_DISABLE_PUSH();
+ iree_status_t status =
+ iree_allocator_system_ctl(/*self=*/NULL, command, params, inout_ptr);
+ IREE_LEAK_CHECK_DISABLE_POP();
+ return status;
+}
+
+static void iree_flags_leaky_free(void* self, void* ptr) { free(ptr); }
+
+// Allocates heap memory that is leaked without triggering leak checkers.
+// We do this so that we have valid memory for the lifetime of the process.
+// The memory may still be freed but if not will not hurt anything (besides the
+// private working set size).
+static iree_allocator_t iree_flags_leaky_allocator(void) {
+ iree_allocator_t allocator = {
+ .self = NULL,
+ .ctl = iree_flags_leaky_allocator_ctl,
+ };
+ return allocator;
+}
+
+//===----------------------------------------------------------------------===//
+// Flag registry
+//===----------------------------------------------------------------------===//
+
+// Storage for registered flags.
+typedef struct iree_flag_t {
+ // __FILE__ of flag definition.
+ const char* file;
+ // __LINE__ of flag definition.
+ int line;
+ // Defines what data is at |storage| and how to parse/print it.
+ iree_flag_type_t type;
+ // Registered callback to issue when the flag is parsed, if any.
+ iree_flag_parse_callback_fn_t parse_callback;
+ // Registered callback to issue when the flag is to be printed, if any.
+ iree_flag_print_callback_fn_t print_callback;
+ // Direct reference to the variable storing the flag value of |type|.
+ void* storage;
+ // Name of the flag on the command line ('foo' => '--foo=value').
+ iree_string_view_t name;
+ // Short description string.
+ iree_string_view_t description;
+} iree_flag_t;
+
+// State used for flag registration and reflection.
+typedef struct iree_flag_registry_t {
+ const char* program_name;
+ const char* usage;
+
+ // Total number of entries in the |flags| list.
+ int flag_count;
+ // All registered flags in the executable in an undefined order.
+ iree_flag_t flags[IREE_FLAGS_CAPACITY];
+} iree_flag_registry_t;
+
+// Global flags state.
+// This will persist for the lifetime of the program so that flags can be
+// reparsed/dumped. If you're concerned about the .data overhead then you
+// probably just want to disable the CLI support for flags entirely.
+static iree_flag_registry_t iree_flag_registry = {
+ .program_name = NULL,
+ .usage = NULL,
+ .flag_count = 0,
+};
+
+int iree_flag_register(const char* file, int line, iree_flag_type_t type,
+ void* storage,
+ iree_flag_parse_callback_fn_t parse_callback,
+ iree_flag_print_callback_fn_t print_callback,
+ iree_string_view_t name,
+ iree_string_view_t description) {
+ // TODO(benvanik): make the registry a linked list and externalize the
+ // flag storage - then no need for a fixed count. If you're hitting this then
+ // file an issue :)
+ iree_flag_registry_t* registry = &iree_flag_registry;
+ IREE_ASSERT_LE(registry->flag_count + 1, IREE_FLAGS_CAPACITY,
+ "flag registry overflow; too many flags registered");
+ int flag_ordinal = registry->flag_count++;
+ iree_flag_t* flag = ®istry->flags[flag_ordinal];
+ flag->file = file;
+ flag->line = line;
+ flag->type = type;
+ flag->parse_callback = parse_callback;
+ flag->print_callback = print_callback;
+ flag->storage = storage;
+ flag->name = name;
+ flag->description = description;
+ return flag_ordinal;
+}
+
+// Returns the flag registration with the given |name| or NULL if not found.
+static iree_flag_t* iree_flag_lookup(iree_string_view_t name) {
+ iree_flag_registry_t* registry = &iree_flag_registry;
+ for (int i = 0; i < registry->flag_count; ++i) {
+ iree_flag_t* flag = ®istry->flags[i];
+ if (iree_string_view_equal(flag->name, name)) {
+ return flag;
+ }
+ }
+ return NULL;
+}
+
+static int iree_flag_cmp(const void* lhs_ptr, const void* rhs_ptr) {
+ const iree_flag_t* lhs = (const iree_flag_t*)lhs_ptr;
+ const iree_flag_t* rhs = (const iree_flag_t*)rhs_ptr;
+ int ret = strcmp(lhs->file, rhs->file);
+ if (ret == 0) {
+ return lhs->line - rhs->line;
+ }
+ return ret;
+}
+
+// Sorts the flags in the flag registry by file > line.
+static void iree_flag_registry_sort(iree_flag_registry_t* registry) {
+ qsort(registry->flags, registry->flag_count, sizeof(iree_flag_t),
+ iree_flag_cmp);
+}
+
+//===----------------------------------------------------------------------===//
+// Flag parsing/printing
+//===----------------------------------------------------------------------===//
+
+void iree_flags_set_usage(const char* program_name, const char* usage) {
+ iree_flag_registry_t* registry = &iree_flag_registry;
+ registry->program_name = program_name;
+ registry->usage = usage;
+}
+
+// Parses a flag value from the given string and stores it.
+static iree_status_t iree_flag_parse(iree_flag_t* flag,
+ iree_string_view_t value) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, flag->name.data, flag->name.size);
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, value.data, value.size);
+
+ // Insert NUL on the flag value. This is safe as the value is either coming
+ // from C argv memory which is mutable or a flagfile that we loaded into
+ // memory ourselves.
+ char* str_value = (char*)value.data;
+ if (value.size > 0) {
+ str_value[value.size] = 0;
+ }
+
+ iree_status_t status = iree_ok_status();
+ switch (flag->type) {
+ case IREE_FLAG_TYPE_callback:
+ status = flag->parse_callback(flag->name, flag->storage, value);
+ break;
+ case IREE_FLAG_TYPE_bool:
+ if (value.size == 0 || strcmp(str_value, "true") == 0 ||
+ strcmp(str_value, "1") == 0) {
+ *(bool*)flag->storage = true;
+ } else {
+ *(bool*)flag->storage = false;
+ }
+ break;
+ case IREE_FLAG_TYPE_int32_t:
+ *(int32_t*)flag->storage = value.size ? atoi(str_value) : 0;
+ break;
+ case IREE_FLAG_TYPE_int64_t:
+ *(int64_t*)flag->storage = value.size ? atoll(str_value) : 0;
+ break;
+ case IREE_FLAG_TYPE_float:
+ *(float*)flag->storage = value.size ? (float)atof(str_value) : 0.0f;
+ break;
+ case IREE_FLAG_TYPE_double:
+ *(double*)flag->storage = value.size ? atof(str_value) : 0.0;
+ break;
+ case IREE_FLAG_TYPE_string: {
+ iree_host_size_t str_length = value.size;
+ if (str_length > 2) {
+ // Strip double quotes: "foo" -> foo.
+ // This may not be worth the complexity.
+ if (str_value[0] == '"' && str_value[str_length - 1] == '"') {
+ str_value[str_length - 1] = 0;
+ ++str_value;
+ str_length = str_length - 2;
+ }
+ }
+ *(const char**)flag->storage = str_value;
+ break;
+ }
+ default:
+ status = iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "invalid flag type %u", flag->type);
+ break;
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// Prints a flag value to |file| (like 'true' or '5.43').
+static void iree_flag_print(FILE* file, iree_flag_t* flag) {
+ if (flag->type == IREE_FLAG_TYPE_callback) {
+ flag->print_callback(flag->name, flag->storage, file);
+ return;
+ }
+ fprintf(file, "--%.*s", (int)flag->name.size, flag->name.data);
+ if (flag->storage == NULL) return;
+ switch (flag->type) {
+ case IREE_FLAG_TYPE_bool:
+ fprintf(file, "=%s", (*(bool*)flag->storage) ? "true" : "false");
+ break;
+ case IREE_FLAG_TYPE_int32_t:
+ fprintf(file, "=%" PRId32, *(int32_t*)flag->storage);
+ break;
+ case IREE_FLAG_TYPE_int64_t:
+ fprintf(file, "=%" PRId64, *(int64_t*)flag->storage);
+ break;
+ case IREE_FLAG_TYPE_float:
+ fprintf(file, "=%g", *(float*)flag->storage);
+ break;
+ case IREE_FLAG_TYPE_double:
+ fprintf(file, "=%g", *(double*)flag->storage);
+ break;
+ case IREE_FLAG_TYPE_string:
+ fprintf(file, "=\"%s\"", *(const char**)flag->storage);
+ break;
+ default:
+ fprintf(file, "=<INVALID>");
+ break;
+ }
+ fprintf(file, "\n");
+}
+
+// Dumps a flag definition and value to |file|.
+static void iree_flag_dump(iree_flag_dump_mode_t mode, FILE* file,
+ iree_flag_t* flag) {
+ if (iree_all_bits_set(mode, IREE_FLAG_DUMP_MODE_VERBOSE)) {
+ if (!iree_string_view_is_empty(flag->description)) {
+ iree_string_view_t description = flag->description;
+ while (!iree_string_view_is_empty(description)) {
+ iree_string_view_t line;
+ iree_string_view_split(description, '\n', &line, &description);
+ if (!iree_string_view_is_empty(line)) {
+ fprintf(file, "# %.*s\n", (int)line.size, line.data);
+ }
+ }
+ }
+ }
+ iree_flag_print(file, flag);
+}
+
+static iree_status_t iree_flags_parse_help(iree_string_view_t flag_name,
+ void* storage,
+ iree_string_view_t value) {
+ iree_flag_registry_t* registry = &iree_flag_registry;
+
+ fprintf(stdout,
+ "# "
+ "===================================================================="
+ "========\n");
+ fprintf(stdout, "# 👻 IREE: %s\n",
+ registry->program_name ? registry->program_name : "");
+ fprintf(stdout,
+ "# "
+ "===================================================================="
+ "========\n\n");
+ if (registry->usage) {
+ fprintf(stdout, "%s\n", registry->usage);
+ }
+ iree_flags_dump(IREE_FLAG_DUMP_MODE_VERBOSE, stdout);
+ fprintf(stdout, "\n");
+
+ return iree_ok_status();
+}
+static void iree_flags_print_help(iree_string_view_t flag_name, void* storage,
+ FILE* file) {
+ fprintf(file, "# --%.*s\n", (int)flag_name.size, flag_name.data);
+}
+IREE_FLAG_CALLBACK(iree_flags_parse_help, iree_flags_print_help, NULL, help,
+ "Displays command line usage information.");
+
+// Removes argument |arg| from the argument list.
+static void iree_flags_remove_arg(int arg, int* argc_ptr, char*** argv_ptr) {
+ int argc = *argc_ptr;
+ char** argv = *argv_ptr;
+ memmove(&argv[arg], &argv[arg + 1], (argc - arg) * sizeof(char*));
+ *argc_ptr = argc - 1;
+}
+
+iree_status_t iree_flags_parse(iree_flags_parse_mode_t mode, int* argc_ptr,
+ char*** argv_ptr) {
+ if (argc_ptr == NULL || argv_ptr == NULL || *argc_ptr == 0) {
+ // No flags; that's fine - in some environments flags aren't supported.
+ return iree_ok_status();
+ }
+
+ // Always sort the registry; though we may parse flags multiple times this is
+ // not a hot path and this is easier than trying to keep track of whether we
+ // need to or not.
+ iree_flag_registry_sort(&iree_flag_registry);
+
+ int argc = *argc_ptr;
+ char** argv = *argv_ptr;
+
+ for (int arg_ordinal = 1; arg_ordinal < argc; ++arg_ordinal) {
+ iree_string_view_t arg = iree_make_cstring_view(argv[arg_ordinal]);
+
+ // Strip whitespace.
+ arg = iree_string_view_trim(arg);
+
+ // Position arguments are ignored; they may appear anywhere in the list.
+ if (!iree_string_view_starts_with(arg, iree_make_cstring_view("--"))) {
+ continue;
+ }
+
+ // Strip `--`.
+ arg = iree_string_view_remove_prefix(arg, 2);
+
+ // Split into `flag_name` = `flag_value`.
+ iree_string_view_t flag_name;
+ iree_string_view_t flag_value;
+ iree_string_view_split(arg, '=', &flag_name, &flag_value);
+ flag_name = iree_string_view_trim(flag_name);
+ flag_value = iree_string_view_trim(flag_value);
+
+ // Lookup the flag by name.
+ iree_flag_t* flag = iree_flag_lookup(flag_name);
+ if (!flag) {
+ // If --undefok allows undefined flags then we just skip this one. Note
+ // that we leave it in the argument list so that subsequent flag parsers
+ // can try to handle it.
+ if (iree_all_bits_set(mode, IREE_FLAGS_PARSE_MODE_UNDEFINED_OK)) {
+ continue;
+ }
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "flag '%.*s' not recognized", (int)flag_name.size,
+ flag_name.data);
+ }
+
+ // Parse and store the flag value.
+ IREE_RETURN_IF_ERROR(iree_flag_parse(flag, flag_value));
+
+ // --help gets special handling due to interop with external libraries that
+ // may also need to find it. If indicated we keep --help in the argument
+ // list and don't exit.
+ if (iree_string_view_equal(flag_name, iree_make_cstring_view("help"))) {
+ if (iree_all_bits_set(mode, IREE_FLAGS_PARSE_MODE_CONTINUE_AFTER_HELP)) {
+ continue; // don't remove the arg below
+ }
+ exit(0); // --help exits by default.
+ }
+
+ // Splice out the flag from the argv list.
+ iree_flags_remove_arg(arg_ordinal, &argc, &argv);
+ --arg_ordinal;
+ }
+
+ *argc_ptr = argc;
+ return iree_ok_status();
+}
+
+void iree_flags_parse_checked(iree_flags_parse_mode_t mode, int* argc,
+ char*** argv) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ for (int i = 0; i < *argc; ++i) {
+ IREE_TRACE_ZONE_APPEND_TEXT_CSTRING(z0, (*argv)[i]);
+ }
+ iree_status_t status = iree_flags_parse(mode, argc, argv);
+ IREE_TRACE_ZONE_END(z0);
+ if (iree_status_is_ok(status)) return;
+
+ fprintf(stderr, "\x1b[31mFLAGS ERROR: (╯°□°)╯︵👻\x1b[0m\n");
+ iree_status_fprint(stderr, status);
+ fflush(stderr);
+
+ exit(EXIT_FAILURE);
+}
+
+void iree_flags_dump(iree_flag_dump_mode_t mode, FILE* file) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Always sort the registry; though we may dump flags multiple times this is
+ // not a hot path and this is easier than trying to keep track of whether we
+ // need to or not.
+ iree_flag_registry_sort(&iree_flag_registry);
+
+ const char* last_file = NULL;
+ for (size_t i = 0; i < iree_flag_registry.flag_count; ++i) {
+ iree_flag_t* flag = &iree_flag_registry.flags[i];
+ if (iree_all_bits_set(mode, IREE_FLAG_DUMP_MODE_VERBOSE)) {
+ if (last_file) {
+ fprintf(file, "\n");
+ }
+ if (!last_file || strcmp(flag->file, last_file) != 0) {
+ fprintf(file,
+ "# "
+ "===-----------------------------------------------------------"
+ "-----------===\n");
+ fprintf(file, "# Flags in %s:%d\n", flag->file, flag->line);
+ fprintf(file,
+ "# "
+ "===-----------------------------------------------------------"
+ "-----------===\n\n");
+ last_file = flag->file;
+ }
+ }
+ iree_flag_dump(mode, file, flag);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+//===----------------------------------------------------------------------===//
+// --flagfile= support
+//===----------------------------------------------------------------------===//
+// NOTE: this is conditionally enabled as some platforms may not have IO.
+
+#if IREE_FLAGS_ENABLE_FLAG_FILE == 1
+
+#include "iree/base/internal/file_io.h"
+
+// Parses a newline-separated list of flags from a file.
+static iree_status_t iree_flags_parse_file(iree_string_view_t file_path) {
+ // Read file contents.
+ // NOTE: we intentionally leak the contents here so that the flags remain in
+ // memory in case they are referenced.
+ // NOTE: safe to use file_path.data here as it will always have a NUL
+ // terminator.
+ iree_allocator_t allocator = iree_flags_leaky_allocator();
+ iree_file_contents_t* file_contents = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_file_read_contents(file_path.data, allocator, &file_contents),
+ "while trying to parse flagfile");
+
+ // Run through the file line-by-line.
+ int line_number = 0;
+ iree_string_view_t contents =
+ iree_make_string_view((const char*)file_contents->buffer.data,
+ file_contents->buffer.data_length);
+ while (!iree_string_view_is_empty(contents)) {
+ // Split into a single line and the entire rest of the file contents.
+ iree_string_view_t line;
+ iree_string_view_split(contents, '\n', &line, &contents);
+ ++line_number;
+
+ // Strip whitespace.
+ line = iree_string_view_trim(line);
+ if (iree_string_view_is_empty(line)) continue;
+
+ // Ignore comments.
+ if (iree_string_view_starts_with(line, iree_make_cstring_view("#")) ||
+ iree_string_view_starts_with(line, iree_make_cstring_view("//"))) {
+ continue;
+ }
+
+ // Strip `--`.
+ if (!iree_string_view_starts_with(line, iree_make_cstring_view("--"))) {
+ // Positional arguments can't be specified in flag files.
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "%.*s:%d: positional arguments not allowed in flag files",
+ (int)file_path.size, file_path.data, line_number);
+ }
+ line = iree_string_view_remove_prefix(line, 2);
+
+ // Split into `flag_name` = `flag_value`.
+ iree_string_view_t flag_name;
+ iree_string_view_t flag_value;
+ iree_string_view_split(line, '=', &flag_name, &flag_value);
+ flag_name = iree_string_view_trim(flag_name);
+ flag_value = iree_string_view_trim(flag_value);
+
+ // Lookup the flag by name.
+ iree_flag_t* flag = iree_flag_lookup(flag_name);
+ if (!flag) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "%.*s:%d: flag '%.*s' not recognized",
+ (int)file_path.size, file_path.data, line_number,
+ (int)flag_name.size, flag_name.data);
+ }
+
+ // Parse the flag value.
+ IREE_RETURN_IF_ERROR(iree_flag_parse(flag, flag_value),
+ "%.*s:%d: while parsing flag '%.*s'",
+ (int)file_path.size, file_path.data, line_number,
+ (int)line.size, line.data);
+ }
+
+ // NOTE: we intentionally leak the memory as flags may continue to reference
+ // segments of it for their string values.
+ return iree_ok_status();
+}
+
+static iree_status_t iree_flags_parse_flagfile(iree_string_view_t flag_name,
+ void* storage,
+ iree_string_view_t value) {
+ if (iree_string_view_is_empty(value)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "--%.*s= requires a file path", (int)flag_name.size,
+ flag_name.data);
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, value.data, value.size);
+ iree_status_t status = iree_flags_parse_file(value);
+ IREE_TRACE_ZONE_END(z0);
+
+ return status;
+}
+static void iree_flags_print_flagfile(iree_string_view_t flag_name,
+ void* storage, FILE* file) {
+ fprintf(file, "# --%.*s=[path]\n", (int)flag_name.size, flag_name.data);
+}
+IREE_FLAG_CALLBACK(iree_flags_parse_flagfile, iree_flags_print_flagfile, NULL,
+ flagfile,
+ "Parses a newline-separated list of flags from a file.\n"
+ "Flags are parsed at the point where the flagfile is "
+ "specified\nand following flags may override the parsed "
+ "values.");
+
+#endif // IREE_FLAGS_ENABLE_FLAG_FILE
+
+#endif // IREE_FLAGS_ENABLE_CLI
diff --git a/runtime/src/iree/base/internal/flags.h b/runtime/src/iree/base/internal/flags.h
new file mode 100644
index 0000000..213c1f3
--- /dev/null
+++ b/runtime/src/iree/base/internal/flags.h
@@ -0,0 +1,297 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_FLAGS_H_
+#define IREE_BASE_INTERNAL_FLAGS_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// Flags configuration
+//===----------------------------------------------------------------------===//
+
+// 1 to enable command line parsing from argc/argv; 0 otherwise.
+// When parsing is disabled flags are just variables that can still be queried
+// and manually overridden by code if desired.
+#if !defined(IREE_FLAGS_ENABLE_CLI)
+#define IREE_FLAGS_ENABLE_CLI 1
+#endif // !IREE_FLAGS_ENABLE_CLI
+
+// 1 to enable --flagfile= support.
+#if !defined(IREE_FLAGS_ENABLE_FLAG_FILE)
+#define IREE_FLAGS_ENABLE_FLAG_FILE 1
+#endif // !IREE_FLAGS_ENABLE_FLAG_FILE
+
+// Maximum number of flags that can be registered in a single binary.
+#if !defined(IREE_FLAGS_CAPACITY)
+#define IREE_FLAGS_CAPACITY 64
+#endif // !IREE_FLAGS_CAPACITY
+
+//===----------------------------------------------------------------------===//
+// Static initialization utility
+//===----------------------------------------------------------------------===//
+// This declares a static initialization function with the given name.
+// Usage:
+// IREE_STATIC_INITIALIZER(initializer_name) {
+// // Do something here! Note that initialization order is undefined and
+// // what you do should be tolerant to that.
+//
+// // If you want a finalizer (you probably don't; they may not get run)
+// // then you can use atexit:
+// atexit(some_finalizer_fn);
+// }
+
+#ifdef __cplusplus
+
+#define IREE_STATIC_INITIALIZER(f) \
+ static void f(void); \
+ struct f##_t_ { \
+ f##_t_(void) { f(); } \
+ }; \
+ static f##_t_ f##_; \
+ static void f(void)
+
+#elif defined(IREE_COMPILER_MSVC)
+
+// `__attribute__((constructor))`-like behavior in MSVC. See:
+// https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-initialization?view=msvc-160
+
+#pragma section(".CRT$XCU", read)
+#define IREE_STATIC_INITIALIZER_IMPL(f, p) \
+ static void f(void); \
+ __declspec(allocate(".CRT$XCU")) void (*f##_)(void) = f; \
+ __pragma(comment(linker, "/include:" p #f "_")) static void f(void)
+#ifdef _WIN64
+#define IREE_STATIC_INITIALIZER(f) IREE_STATIC_INITIALIZER_IMPL(f, "")
+#else
+#define IREE_STATIC_INITIALIZER(f) IREE_STATIC_INITIALIZER_IMPL(f, "_")
+#endif // _WIN64
+
+#else
+
+#define IREE_STATIC_INITIALIZER(f) \
+ static void f(void) __attribute__((constructor)); \
+ static void f(void)
+
+#endif // __cplusplus / MSVC
+
+//===----------------------------------------------------------------------===//
+// Flag definition
+//===----------------------------------------------------------------------===//
+
+enum iree_flag_dump_mode_bits_t {
+ IREE_FLAG_DUMP_MODE_DEFAULT = 0u,
+ IREE_FLAG_DUMP_MODE_VERBOSE = 1u << 0,
+};
+typedef uint32_t iree_flag_dump_mode_t;
+
+#define IREE_FLAG_CTYPE_bool bool
+#define IREE_FLAG_CTYPE_int32_t int32_t
+#define IREE_FLAG_CTYPE_int64_t int64_t
+#define IREE_FLAG_CTYPE_float float
+#define IREE_FLAG_CTYPE_double double
+#define IREE_FLAG_CTYPE_string const char*
+
+#if IREE_FLAGS_ENABLE_CLI == 1
+
+// Types of flags supported by the parser.
+typedef enum iree_flag_type_e {
+ // Empty/unspecified sentinel.
+ IREE_FLAG_TYPE_none = 0,
+ // Custom parsing callback; see IREE_FLAG_CALLBACK.
+ IREE_FLAG_TYPE_callback = 1,
+ // Boolean flag:
+ // --foo (set true)
+ // --foo=true | --foo=false
+ IREE_FLAG_TYPE_bool,
+ // 32-bit integer flag:
+ // --foo=123
+ IREE_FLAG_TYPE_int32_t,
+ // 64-bit integer flag:
+ // --foo=123
+ IREE_FLAG_TYPE_int64_t,
+ // 32-bit floating-point flag:
+ // --foo=1.2
+ IREE_FLAG_TYPE_float,
+ // 64-bit floating-point flag:
+ // --foo=1.2
+ IREE_FLAG_TYPE_double,
+ // String flag:
+ // --foo=abc
+ // --foo="a b c"
+ // Holds a reference to constant string data; assigned values must remain
+ // live for as long as the flag value references them.
+ IREE_FLAG_TYPE_string,
+} iree_flag_type_t;
+
+// Custom callback issued for each time the flag is seen during parsing.
+// The |value| provided will already be trimmed and may be empty. For
+// compatibility with non-IREE APIs there will be a NUL terminator immediately
+// following the flag value in memory such that `value.data` can be used as a
+// C-string.
+typedef iree_status_t(IREE_API_PTR* iree_flag_parse_callback_fn_t)(
+ iree_string_view_t flag_name, void* storage, iree_string_view_t value);
+
+// Custom callback issued for each time the flag is to be printed.
+// The callback should print the flag and its value to |file|.
+// Example: `--my_flag=value\n`
+typedef void(IREE_API_PTR* iree_flag_print_callback_fn_t)(
+ iree_string_view_t flag_name, void* storage, FILE* file);
+
+int iree_flag_register(const char* file, int line, iree_flag_type_t type,
+ void* storage,
+ iree_flag_parse_callback_fn_t parse_callback,
+ iree_flag_print_callback_fn_t print_callback,
+ iree_string_view_t name, iree_string_view_t description);
+
+// Defines a flag with the given |type| and |name|.
+//
+// Conceptually the flag is just a variable and can be loaded/stored:
+// IREE_FLAG(bool, foo, true, "hello");
+// =>
+// static bool FLAG_foo = true;
+// ...
+// if (FLAG_foo) do_something();
+//
+// If flag parsing is enabled with IREE_FLAGS_ENABLE_CLI == 1 then the flag
+// value can be specified on the command line with --name:
+// --foo
+// --foo=true
+//
+// See iree_flag_type_t for the types supported and how they are parsed.
+#define IREE_FLAG(type, name, default_value, description) \
+ static IREE_FLAG_CTYPE_##type FLAG_##name = (default_value); \
+ IREE_STATIC_INITIALIZER(iree_flag_register_##name) { \
+ iree_flag_register(__FILE__, __LINE__, IREE_FLAG_TYPE_##type, \
+ (void**)&(FLAG_##name), /*parse_callback=*/NULL, \
+ /*print_callback=*/NULL, iree_make_cstring_view(#name), \
+ iree_make_cstring_view(description)); \
+ }
+
+// Defines a flag issues |callback| for custom parsing.
+//
+// Usage:
+// iree_status_t parse_callback(const char* flag_name, void* storage,
+// iree_string_view_t value) {
+// // Parse |value| and store in |storage|, however you want.
+// // Returning IREE_STATUS_INVALID_ARGUMENT will trigger --help.
+// int* storage_ptr = (int*)storage;
+// printf("hello! %d", (*storage_ptr)++);
+// return iree_ok_status();
+// }
+// void print_callback(const char* flag_name, void* storage, FILE* file) {
+// // Print the value in |storage|, however you want. For repeated fields
+// // you can print multiple separated by newlines.
+// int* storage_ptr = (int*)storage;
+// fprintf(file, "--say_hello=%d\n", *storage_ptr);
+// }
+// int my_storage = 0;
+// IREE_FLAG_CALLBACK(parse_callback, print_callback, &my_storage,
+// say_hello, "Say hello!");
+#define IREE_FLAG_CALLBACK(parse_callback, print_callback, storage, name, \
+ description) \
+ IREE_STATIC_INITIALIZER(iree_flag_register_##name) { \
+ iree_flag_register(__FILE__, __LINE__, IREE_FLAG_TYPE_callback, \
+ (void*)storage, parse_callback, print_callback, \
+ iree_make_cstring_view(#name), \
+ iree_make_cstring_view(description)); \
+ }
+
+#else
+
+#define IREE_FLAG(type, name, default_value, description) \
+ static const IREE_FLAG_CTYPE_##type FLAG_##name = (default_value);
+
+#define IREE_FLAG_CALLBACK(parse_callback, print_callback, storage, name, \
+ description)
+
+#endif // IREE_FLAGS_ENABLE_CLI
+
+//===----------------------------------------------------------------------===//
+// Flag parsing
+//===----------------------------------------------------------------------===//
+
+// Controls how flag parsing is performed.
+enum iree_flags_parse_mode_bits_t {
+ IREE_FLAGS_PARSE_MODE_DEFAULT = 0,
+ // Do not error out on undefined flags; leave them in the list.
+ // Useful when needing to chain multiple flag parsers together.
+ IREE_FLAGS_PARSE_MODE_UNDEFINED_OK = 1u << 0,
+ // Continues parsing and returns success without exiting when `--help` is
+ // encountered. This allows for IREE flag parsing to happen before another
+ // external library parses its flags. `--help` will remain in the flag set
+ // such that the subsequent parsing can find it.
+ IREE_FLAGS_PARSE_MODE_CONTINUE_AFTER_HELP = 1u << 1,
+};
+typedef uint32_t iree_flags_parse_mode_t;
+
+#if IREE_FLAGS_ENABLE_CLI == 1
+
+// Sets the usage information printed when --help is passed on the command line.
+// Both strings must remain live for the lifetime of the program.
+void iree_flags_set_usage(const char* program_name, const char* usage);
+
+// Parses flags from the given command line arguments.
+// All flag-style arguments ('--foo', '-f', etc) will be consumed and argc/argv
+// will be updated to contain only the program name (index 0) and any remaining
+// positional arguments.
+//
+// Returns 0 if all flags were parsed and execution should continue.
+// Returns >0 if execution should be cancelled such as when --help is used.
+// Returns <0 if parsing fails.
+//
+// Usage:
+// extern "C" int main(int argc, char** argv) {
+// iree_status_t status = iree_flags_parse(&argc, &argv);
+// if (!iree_status_is_ok(status)) { exit(1); }
+// consume_positional_args(argc, argv);
+// return 0;
+// }
+//
+// Example:
+// argc = 4, argv = ['program', 'abc', '--flag=2']
+// Results:
+// argc = 2, argv = ['program', 'abc']
+iree_status_t iree_flags_parse(iree_flags_parse_mode_t mode, int* argc,
+ char*** argv);
+
+// Parses flags as with iree_flags_parse but will use exit() or abort().
+// WARNING: this almost always what you want in a command line tool and *never*
+// what you want when embedded in a host process. You don't want to have a flag
+// typo and shut down your entire server/sandbox/Android app/etc.
+void iree_flags_parse_checked(iree_flags_parse_mode_t mode, int* argc,
+ char*** argv);
+
+// Dumps all flags and their current values to the given |file|.
+void iree_flags_dump(iree_flag_dump_mode_t mode, FILE* file);
+
+#else
+
+inline void iree_flags_set_usage(const char* program_name, const char* usage) {}
+inline int iree_flags_parse(iree_flags_parse_mode_t mode, int* argc,
+ char*** argv) {
+ return 0;
+}
+inline void iree_flags_parse_checked(iree_flags_parse_mode_t mode, int* argc,
+ char*** argv) {}
+inline void iree_flags_dump(iree_flag_dump_mode_t mode, FILE* file) {}
+
+#endif // IREE_FLAGS_ENABLE_CLI
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_INTERNAL_FLAGS_H_
diff --git a/runtime/src/iree/base/internal/flags_demo.c b/runtime/src/iree/base/internal/flags_demo.c
new file mode 100644
index 0000000..82f1213
--- /dev/null
+++ b/runtime/src/iree/base/internal/flags_demo.c
@@ -0,0 +1,62 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/flags.h"
+
+IREE_FLAG(bool, test_bool, false, "A boolean value.");
+IREE_FLAG(int32_t, test_int32, 123, "An int32_t value.");
+IREE_FLAG(int64_t, test_int64, 555, "An int64_t value.");
+IREE_FLAG(float, test_float, 1.0f, "A float value.");
+IREE_FLAG(string, test_string, "some default", "A string\nvalue.");
+
+static iree_status_t parse_callback(iree_string_view_t flag_name, void* storage,
+ iree_string_view_t value) {
+ int* count_ptr = (int*)storage;
+ if (strcmp(value.data, "FORCE_FAILURE") == 0) {
+ return iree_make_status(IREE_STATUS_INTERNAL,
+ "callbacks can do verification");
+ }
+ *count_ptr += atoi(value.data);
+ return iree_ok_status();
+}
+static void print_callback(iree_string_view_t flag_name, void* storage,
+ FILE* file) {
+ int* count_ptr = (int*)storage;
+ fprintf(file, "--%.*s=%d\n", (int)flag_name.size, flag_name.data, *count_ptr);
+}
+static int callback_count = 0;
+IREE_FLAG_CALLBACK(parse_callback, print_callback, &callback_count,
+ test_callback, "Callback!");
+
+int main(int argc, char** argv) {
+ // Parse flags, updating argc/argv with position arguments.
+ iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_DEFAULT, &argc, &argv);
+
+ // Report parsed flag values:
+ printf("FLAG[test_bool] = %s\n", FLAG_test_bool ? "true" : "false");
+ printf("FLAG[test_int32] = %" PRId32 "\n", FLAG_test_int32);
+ printf("FLAG[test_int64] = %" PRId64 "\n", FLAG_test_int64);
+ printf("FLAG[test_float] = %g\n", FLAG_test_float);
+ printf("FLAG[test_string] = %s\n", FLAG_test_string);
+ printf("FLAG[test_callback] = %d\n", callback_count);
+
+ // Report positional arguments:
+ for (int i = 0; i < argc; ++i) {
+ printf("ARG(%d) = %s\n", i, argv[i]);
+ }
+
+ // Dump all flags back out for round-tripping:
+ iree_flags_dump(IREE_FLAG_DUMP_MODE_DEFAULT, stdout);
+
+ return 0;
+}
diff --git a/runtime/src/iree/base/internal/flags_test.txt b/runtime/src/iree/base/internal/flags_test.txt
new file mode 100644
index 0000000..74f14ed
--- /dev/null
+++ b/runtime/src/iree/base/internal/flags_test.txt
@@ -0,0 +1,94 @@
+// RUN: ( flags_demo ) | FileCheck --check-prefix=NO-FLAGS %s
+// NO-FLAGS: FLAG[test_bool] = false
+// NO-FLAGS: FLAG[test_int32] = 123
+// NO-FLAGS: FLAG[test_int64] = 555
+// NO-FLAGS: FLAG[test_float] = 1
+// NO-FLAGS: FLAG[test_string] = some default
+// NO-FLAGS: FLAG[test_callback] = 0
+// NO-FLAGS: ARG(0) ={{.+}}flags_demo
+
+// RUN: ( flags_demo --help ) | FileCheck --check-prefix=FLAGS-HELP %s
+// FLAGS-HELP: # {{.+}} IREE
+// FLAGS-HELP: # Flags in {{.+}}flags.c
+// FLAGS-HELP: # Displays command line usage information.
+// FLAGS-HELP: --help
+// FLAGS-HELP: # Flags in {{.+}}flags_demo.c
+// FLAGS-HELP: # A boolean value.
+// FLAGS-HELP: --test_bool=false
+// FLAGS-HELP: # An int32_t value.
+// FLAGS-HELP: --test_int32=123
+// FLAGS-HELP: # An int64_t value.
+// FLAGS-HELP: --test_int64=555
+// FLAGS-HELP: # A float value.
+// FLAGS-HELP: --test_float=1
+// FLAGS-HELP: # A string
+// FLAGS-HELP: # value.
+// FLAGS-HELP: --test_string="some default"
+// FLAGS-HELP: # Callback!
+// FLAGS-HELP: --test_callback=0
+
+// RUN: ( flags_demo --unknown-flag 2>&1 || [[ $? == 1 ]] ) | FileCheck --check-prefix=UNKNOWN-FLAG %s
+// UNKNOWN-FLAG: INVALID_ARGUMENT; flag 'unknown-flag' not recognized
+
+// RUN: ( flags_demo --test_bool=true ) | FileCheck --check-prefix=FLAG-BOOL-TRUE %s
+// FLAG-BOOL-TRUE: FLAG[test_bool] = true
+// RUN: ( flags_demo --test_bool=1 ) | FileCheck --check-prefix=FLAG-BOOL-1 %s
+// FLAG-BOOL-1: FLAG[test_bool] = true
+// RUN: ( flags_demo --test_bool=true --test_bool=false ) | FileCheck --check-prefix=FLAG-BOOL-OVERRIDE %s
+// FLAG-BOOL-OVERRIDE: FLAG[test_bool] = false
+
+// RUN: ( flags_demo --test_int32=456 ) | FileCheck --check-prefix=FLAG-INT32 %s
+// FLAG-INT32: FLAG[test_int32] = 456
+// RUN: ( flags_demo --test_int32=-2147483648 ) | FileCheck --check-prefix=FLAG-INT32-MIN %s
+// FLAG-INT32-MIN: FLAG[test_int32] = -2147483648
+// RUN: ( flags_demo --test_int32=2147483647 ) | FileCheck --check-prefix=FLAG-INT32-MAX %s
+// FLAG-INT32-MAX: FLAG[test_int32] = 2147483647
+
+// RUN: ( flags_demo --test_int64=902834 ) | FileCheck --check-prefix=FLAG-INT64 %s
+// FLAG-INT64: FLAG[test_int64] = 902834
+// RUN: ( flags_demo --test_int64=-9223372036854775808 ) | FileCheck --check-prefix=FLAG-INT64-MIN %s
+// FLAG-INT64-MIN: FLAG[test_int64] = -9223372036854775808
+// RUN: ( flags_demo --test_int64=9223372036854775807 ) | FileCheck --check-prefix=FLAG-INT64-MAX %s
+// FLAG-INT64-MAX: FLAG[test_int64] = 9223372036854775807
+
+// RUN: ( flags_demo --test_float=1.1234 ) | FileCheck --check-prefix=FLAG-FLOAT %s
+// FLAG-FLOAT: FLAG[test_float] = 1.1234
+
+// RUN: ( flags_demo --test_string= ) | FileCheck --check-prefix=FLAG-STRING-EMPTY %s
+// FLAG-STRING-EMPTY: FLAG[test_string] =
+// RUN: ( flags_demo --test_string=abc ) | FileCheck --check-prefix=FLAG-STRING-ABC %s
+// FLAG-STRING-ABC: FLAG[test_string] = abc
+// RUN: ( flags_demo --test_string="with some space" ) | FileCheck --check-prefix=FLAG-STRING-SPACES %s
+// FLAG-STRING-SPACES: FLAG[test_string] = with some space
+
+// RUN: ( flags_demo --test_callback=1 ) | FileCheck --check-prefix=FLAG-CALLBACK-1 %s
+// FLAG-CALLBACK-1: FLAG[test_callback] = 1
+// RUN: ( flags_demo --test_callback=4 ) | FileCheck --check-prefix=FLAG-CALLBACK-4 %s
+// FLAG-CALLBACK-4: FLAG[test_callback] = 4
+// RUN: ( flags_demo --test_callback=FORCE_FAILURE 2>&1 || [[ $? == 1 ]] ) | FileCheck --check-prefix=FLAG-CALLBACK-ERROR %s
+// FLAG-CALLBACK-ERROR: INTERNAL; callbacks can do verification
+
+// RUN: ( flags_demo arg1 ) | FileCheck --check-prefix=FLAG-POSITIONAL-1 %s
+// FLAG-POSITIONAL-1: ARG(1) = arg1
+// RUN: ( flags_demo arg1 arg2 arg3 ) | FileCheck --check-prefix=FLAG-POSITIONAL-3 %s
+// FLAG-POSITIONAL-3: ARG(1) = arg1
+// FLAG-POSITIONAL-3: ARG(2) = arg2
+// FLAG-POSITIONAL-3: ARG(3) = arg3
+
+// RUN: ( flags_demo --test_bool=true --flagfile=not_found.txt 2>&1 || [[ $? == 1 ]] ) | FileCheck --check-prefix=MISSING-FLAGFILE %s
+// MISSING-FLAGFILE: NOT_FOUND; failed to open file 'not_found.txt'
+
+// RUN: ( flags_demo --test_bool=true --flagfile=%s ) | FileCheck --check-prefix=FLAGFILE %s
+# Comments are ignored.
+// FLAGFILE: FLAG[test_bool] = false
+--test_bool=false
+// FLAGFILE: FLAG[test_int64] = 123111
+// Note that whitespace is ignored in case you are copy/pasting flags around.
+ --test_int64=123111
+// FLAGFILE: FLAG[test_float] = 55.1
+--test_float=55.1
+// FLAGFILE: FLAG[test_string] = override spaces
+--test_string="override spaces"
+
+
+# NOTE: above two lines are to test that vertical whitespace is ok.
diff --git a/runtime/src/iree/base/internal/flatcc/BUILD b/runtime/src/iree/base/internal/flatcc/BUILD
new file mode 100644
index 0000000..c7a93dd
--- /dev/null
+++ b/runtime/src/iree/base/internal/flatcc/BUILD
@@ -0,0 +1,53 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/bazel:iree_flatcc.bzl", "iree_flatbuffer_c_library")
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_runtime_cc_library(
+ name = "building",
+ hdrs = ["building.h"],
+ deps = [
+ ":dummy",
+ ":parsing",
+ "@com_github_dvidelabs_flatcc//:runtime",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "debugging",
+ hdrs = ["debugging.h"],
+ deps = [
+ ":dummy",
+ "@com_github_dvidelabs_flatcc//:runtime",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "parsing",
+ hdrs = ["parsing.h"],
+ deps = [
+ ":dummy",
+ "@com_github_dvidelabs_flatcc//:parsing",
+ ],
+)
+
+iree_flatbuffer_c_library(
+ name = "dummy",
+ srcs = ["dummy.fbs"],
+ flatcc_args = [
+ "--reader",
+ "--builder",
+ "--verifier",
+ "--json",
+ ],
+)
diff --git a/runtime/src/iree/base/internal/flatcc/CMakeLists.txt b/runtime/src/iree/base/internal/flatcc/CMakeLists.txt
new file mode 100644
index 0000000..92d2ee7
--- /dev/null
+++ b/runtime/src/iree/base/internal/flatcc/CMakeLists.txt
@@ -0,0 +1,60 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/base/internal/flatcc/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ building
+ HDRS
+ "building.h"
+ DEPS
+ ::dummy
+ ::parsing
+ flatcc::runtime
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ debugging
+ HDRS
+ "debugging.h"
+ DEPS
+ ::dummy
+ flatcc::runtime
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ parsing
+ HDRS
+ "parsing.h"
+ DEPS
+ ::dummy
+ flatcc::parsing
+ PUBLIC
+)
+
+flatbuffer_c_library(
+ NAME
+ dummy
+ SRCS
+ "dummy.fbs"
+ FLATCC_ARGS
+ "--reader"
+ "--builder"
+ "--verifier"
+ "--json"
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/base/internal/flatcc/building.h b/runtime/src/iree/base/internal/flatcc/building.h
new file mode 100644
index 0000000..14fa965
--- /dev/null
+++ b/runtime/src/iree/base/internal/flatcc/building.h
@@ -0,0 +1,32 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_FLATCC_BUILDING_H_
+#define IREE_BASE_INTERNAL_FLATCC_BUILDING_H_
+
+//===----------------------------------------------------------------------===//
+// flatcc include order fixes
+//===----------------------------------------------------------------------===//
+//
+// This header merely wraps the flatcc headers that are generally useful to
+// include in various places that may not know the specific messages they are
+// working with.
+//
+// If using flatcc prefer to include this file over any hard-to-handle flatcc
+// file such as flatbuffers_common_reader.h or flatbuffers_common_builder.h.
+//
+// NOTE: order matters for these includes so stop clang from messing with it:
+// clang-format off
+
+#include "iree/base/internal/flatcc/parsing.h"
+
+#include "flatcc/flatcc_builder.h" // IWYU pragma: export
+#include "flatcc/reflection/flatbuffers_common_builder.h" // IWYU pragma: export
+#include "iree/base/internal/flatcc/dummy_builder.h" // IWYU pragma: export
+
+// clang-format on
+
+#endif // IREE_BASE_INTERNAL_FLATCC_BUILDING_H_
diff --git a/runtime/src/iree/base/internal/flatcc/debugging.h b/runtime/src/iree/base/internal/flatcc/debugging.h
new file mode 100644
index 0000000..fdbc7e5
--- /dev/null
+++ b/runtime/src/iree/base/internal/flatcc/debugging.h
@@ -0,0 +1,34 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_FLATCC_DEBUGGING_H_
+#define IREE_BASE_INTERNAL_FLATCC_DEBUGGING_H_
+
+//===----------------------------------------------------------------------===//
+// flatcc include order fixes
+//===----------------------------------------------------------------------===//
+//
+// This header merely wraps the flatcc headers that are generally useful to
+// include in various places that may not know the specific messages they are
+// working with.
+//
+// If using flatcc prefer to include this file over any hard-to-handle flatcc
+// file such as flatbuffers_common_reader.h or flatbuffers_common_builder.h.
+//
+// NOTE: order matters for these includes so stop clang from messing with it:
+// clang-format off
+
+#include "iree/base/internal/flatcc/parsing.h"
+
+#include "flatcc/flatcc_json_parser.h" // IWYU pragma: export
+#include "iree/base/internal/flatcc/dummy_json_parser.h" // IWYU pragma: export
+
+#include "flatcc/flatcc_json_printer.h" // IWYU pragma: export
+#include "iree/base/internal/flatcc/dummy_json_printer.h" // IWYU pragma: export
+
+// clang-format on
+
+#endif // IREE_BASE_INTERNAL_FLATCC_DEBUGGING_H_
diff --git a/runtime/src/iree/base/internal/flatcc/dummy.fbs b/runtime/src/iree/base/internal/flatcc/dummy.fbs
new file mode 100644
index 0000000..626af1e
--- /dev/null
+++ b/runtime/src/iree/base/internal/flatcc/dummy.fbs
@@ -0,0 +1,22 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+namespace iree_flatcc;
+
+// HACK: flatcc public API headers are incomplete and some things only exist
+// when pulled in via generated headers. So here we give ourselves something to
+// include that's always available and cheap.
+//
+// Instead of directly including this file use iree/base/internal/flatcc/*.h.
+//
+// Normally including any generated file will include the appropriate headers in
+// the required order (as they are non-hermetic), but that requires that we have
+// a generated file. Though most of the API is exposed through the main includes
+// there are various types that only get generated and included by way of the
+// common headers that are not easily included.
+struct __IncludeWorkaround {
+ reserved:int;
+}
diff --git a/runtime/src/iree/base/internal/flatcc/parsing.h b/runtime/src/iree/base/internal/flatcc/parsing.h
new file mode 100644
index 0000000..4e1c675
--- /dev/null
+++ b/runtime/src/iree/base/internal/flatcc/parsing.h
@@ -0,0 +1,32 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_FLATCC_PARSING_H_
+#define IREE_BASE_INTERNAL_FLATCC_PARSING_H_
+
+//===----------------------------------------------------------------------===//
+// flatcc include order fixes
+//===----------------------------------------------------------------------===//
+//
+// This header merely wraps the flatcc headers that are generally useful to
+// include in various places that may not know the specific messages they are
+// working with.
+//
+// If using flatcc prefer to include this file over any hard-to-handle flatcc
+// file such as flatbuffers_common_reader.h or flatbuffers_common_builder.h.
+//
+// NOTE: order matters for these includes so stop clang from messing with it:
+// clang-format off
+
+#include "flatcc/reflection/flatbuffers_common_reader.h" // IWYU pragma: export
+#include "iree/base/internal/flatcc/dummy_reader.h" // IWYU pragma: export
+
+#include "flatcc/flatcc_verifier.h" // IWYU pragma: export
+#include "iree/base/internal/flatcc/dummy_verifier.h" // IWYU pragma: export
+
+// clang-format on
+
+#endif // IREE_BASE_INTERNAL_FLATCC_PARSING_H_
diff --git a/runtime/src/iree/base/internal/fpu_state.c b/runtime/src/iree/base/internal/fpu_state.c
new file mode 100644
index 0000000..f44af3b
--- /dev/null
+++ b/runtime/src/iree/base/internal/fpu_state.c
@@ -0,0 +1,108 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/fpu_state.h"
+
+#include <stdbool.h>
+
+#include "iree/base/target_platform.h"
+
+#if defined(IREE_ARCH_X86_32) || defined(IREE_ARCH_X86_64)
+#include <xmmintrin.h>
+#endif // IREE_ARCH_X86_*
+
+#if defined(IREE_COMPILER_MSVC)
+#include <intrin.h>
+#endif // IREE_COMPILER_MSVC
+
+//==============================================================================
+// iree_fpu_state_t
+//==============================================================================
+// https://github.com/petewarden/tensorflow_makefile/blob/master/tensorflow/core/platform/denormal.cc
+// https://chromium.googlesource.com/chromium/blink/+/master/Source/platform/audio/DenormalDisabler.h
+
+static uint64_t iree_fpu_state_set_dtz(uint64_t state, bool denormals_to_zero);
+
+#if defined(IREE_ARCH_ARM_32)
+static uint64_t iree_fpu_state_set_dtz(uint64_t state, bool denormals_to_zero) {
+ return (state & ~0x1000000) | (denormals_to_zero ? 0x1000000 : 0);
+}
+#elif defined(IREE_ARCH_ARM_64)
+static uint64_t iree_fpu_state_set_dtz(uint64_t state, bool denormals_to_zero) {
+ return (state & ~0x1080000) | (denormals_to_zero ? 0x1080000 : 0);
+}
+#elif defined(IREE_ARCH_X86_32) || defined(IREE_ARCH_X86_64)
+static uint64_t iree_fpu_state_set_dtz(uint64_t state, bool denormals_to_zero) {
+ return (state & ~0x8040) | (denormals_to_zero ? 0x8040 : 0);
+}
+#else
+static uint64_t iree_fpu_state_set_dtz(uint64_t state, bool denormals_to_zero) {
+ return state;
+}
+#endif // IREE_ARCH_*
+
+static uint64_t iree_fpu_load_state(void);
+static void iree_fpu_store_state(uint64_t state);
+
+#if defined(IREE_ARCH_ARM_32) && defined(IREE_COMPILER_MSVC)
+static uint64_t iree_fpu_load_state(void) {
+ return (uint64_t)_MoveFromCoprocessor(10, 7, 1, 0, 0);
+}
+static void iree_fpu_store_state(uint64_t state) {
+ _MoveToCoprocessor((int)state, 10, 7, 1, 0, 0);
+}
+#elif defined(IREE_ARCH_ARM_32)
+static uint64_t iree_fpu_load_state() {
+ uint32_t fpscr;
+ __asm__ __volatile__("VMRS %[fpscr], fpscr" : [ fpscr ] "=r"(fpscr));
+ return (uint64_t)fpscr;
+}
+static void iree_fpu_store_state(uint64_t state) {
+ __asm__ __volatile__("VMSR fpscr, %[fpscr]" : : [ fpscr ] "r"(state));
+}
+#elif defined(IREE_ARCH_ARM_64) && defined(IREE_COMPILER_MSVC)
+static uint64_t iree_fpu_load_state(void) {
+ return (uint64_t)_ReadStatusReg(0x5A20);
+}
+static void iree_fpu_store_state(uint64_t state) {
+ _WriteStatusReg(0x5A20, (__int64)state);
+}
+#elif defined(IREE_ARCH_ARM_64)
+static uint64_t iree_fpu_load_state(void) {
+ uint64_t fpcr;
+ __asm__ __volatile__("MRS %[fpcr], fpcr" : [ fpcr ] "=r"(fpcr));
+ return fpcr;
+}
+static void iree_fpu_store_state(uint64_t state) {
+ __asm__ __volatile__("MSR fpcr, %[fpcr]" : : [ fpcr ] "r"(state));
+}
+#elif defined(IREE_ARCH_X86_32) || defined(IREE_ARCH_X86_64)
+static uint64_t iree_fpu_load_state(void) { return (uint64_t)_mm_getcsr(); }
+static void iree_fpu_store_state(uint64_t state) {
+ _mm_setcsr((unsigned int)state);
+}
+#else
+static uint64_t iree_fpu_load_state(void) { return 0; }
+static void iree_fpu_store_state(uint64_t state) {}
+#endif // IREE_ARCH_*
+
+iree_fpu_state_t iree_fpu_state_push(iree_fpu_state_flags_t flags) {
+ iree_fpu_state_t state;
+ state.current_value = state.previous_value = iree_fpu_load_state();
+ state.current_value = iree_fpu_state_set_dtz(
+ state.current_value,
+ (flags & IREE_FPU_STATE_FLAG_FLUSH_DENORMALS_TO_ZERO) ? true : false);
+ if (state.previous_value != state.current_value) {
+ iree_fpu_store_state(state.current_value);
+ }
+ return state;
+}
+
+void iree_fpu_state_pop(iree_fpu_state_t state) {
+ if (state.previous_value != state.current_value) {
+ iree_fpu_store_state(state.previous_value);
+ }
+}
diff --git a/runtime/src/iree/base/internal/fpu_state.h b/runtime/src/iree/base/internal/fpu_state.h
new file mode 100644
index 0000000..fc9a36c
--- /dev/null
+++ b/runtime/src/iree/base/internal/fpu_state.h
@@ -0,0 +1,59 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_FPU_STATE_H_
+#define IREE_BASE_INTERNAL_FPU_STATE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//==============================================================================
+// iree_fpu_state_*
+//==============================================================================
+
+// Flags controlling FPU features.
+enum iree_fpu_state_flag_bits_t {
+ // Platform default.
+ IREE_FPU_STATE_DEFAULT = 0,
+
+ // Denormals can cause some serious slowdowns in certain ISAs where they may
+ // be implemented in microcode. Flushing them to zero instead of letting them
+ // propagate ensures that the slow paths aren't hit. This is a fast-math style
+ // optimization (and is often part of all compiler's fast-math set of flags).
+ //
+ // https://en.wikipedia.org/wiki/Denormal_number
+ // https://carlh.net/plugins/denormals.php
+ // https://www.xspdf.com/resolution/50507310.html
+ IREE_FPU_STATE_FLAG_FLUSH_DENORMALS_TO_ZERO = 1 << 0,
+};
+typedef uint32_t iree_fpu_state_flags_t;
+
+// Opaque FPU state vector manipulated with iree_fpu_* functions.
+typedef struct iree_fpu_state_t {
+ uint64_t previous_value;
+ uint64_t current_value;
+} iree_fpu_state_t;
+
+// Pushes a new floating-point unit (FPU) state for the current thread.
+// May lead to a pipeline flush; avoid if possible.
+iree_fpu_state_t iree_fpu_state_push(iree_fpu_state_flags_t flags);
+
+// Restores the FPU state of the thread to its original value.
+// May lead to a pipeline flush; avoid if possible.
+void iree_fpu_state_pop(iree_fpu_state_t state);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_BASE_INTERNAL_FPU_STATE_H_
diff --git a/runtime/src/iree/base/internal/fpu_state_benchmark.cc b/runtime/src/iree/base/internal/fpu_state_benchmark.cc
new file mode 100644
index 0000000..ff8ffa7
--- /dev/null
+++ b/runtime/src/iree/base/internal/fpu_state_benchmark.cc
@@ -0,0 +1,124 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cstddef>
+
+#include "benchmark/benchmark.h"
+#include "iree/base/api.h"
+#include "iree/base/internal/fpu_state.h"
+
+namespace {
+
+constexpr size_t kElementBufferSize = 2048;
+
+// Scales a buffer of floats by |scale| and disables autovectorization.
+// Will generally be normal scalar floating point math and indicate whether the
+// FPU has issues with denormals.
+static float UnvectorizedScaleBufferByValue(float scale) {
+ float buffer[kElementBufferSize];
+ for (size_t i = 0; i < IREE_ARRAYSIZE(buffer); ++i) {
+ buffer[i] = 1.0f;
+ }
+ benchmark::DoNotOptimize(*buffer);
+ for (size_t i = 0; i < IREE_ARRAYSIZE(buffer); ++i) {
+ buffer[i] *= scale;
+ benchmark::DoNotOptimize(buffer[i]);
+ }
+ benchmark::DoNotOptimize(*buffer);
+ float sum = 0.0f;
+ for (size_t i = 0; i < IREE_ARRAYSIZE(buffer); ++i) {
+ sum += buffer[i];
+ }
+ return sum;
+}
+
+// Scales a buffer of floats by |scale| and allows autovectorization.
+// Will generally be SIMD floating point math and indicate whether the vector
+// units (NEON, AVX, etc) have issues with denormals.
+static float VectorizedScaleBufferByValue(float scale) {
+ float buffer[kElementBufferSize];
+ for (size_t i = 0; i < IREE_ARRAYSIZE(buffer); ++i) {
+ buffer[i] = 1.0f;
+ }
+ benchmark::DoNotOptimize(*buffer);
+ for (size_t i = 0; i < IREE_ARRAYSIZE(buffer); ++i) {
+ buffer[i] *= scale;
+ }
+ benchmark::DoNotOptimize(*buffer);
+ float sum = 0.0f;
+ for (size_t i = 0; i < IREE_ARRAYSIZE(buffer); ++i) {
+ sum += buffer[i];
+ }
+ return sum;
+}
+
+void BM_UnvectorizedNormals(benchmark::State& state) {
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(UnvectorizedScaleBufferByValue(1.0f));
+ }
+}
+BENCHMARK(BM_UnvectorizedNormals);
+
+void BM_UnvectorizedDenormals(benchmark::State& state) {
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(UnvectorizedScaleBufferByValue(1e-39f));
+ }
+}
+BENCHMARK(BM_UnvectorizedDenormals);
+
+void BM_UnvectorizedDenormalsFlushedToZero(benchmark::State& state) {
+ iree_fpu_state_t fpu_state =
+ iree_fpu_state_push(IREE_FPU_STATE_FLAG_FLUSH_DENORMALS_TO_ZERO);
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(UnvectorizedScaleBufferByValue(1e-39f));
+ }
+ iree_fpu_state_pop(fpu_state);
+}
+BENCHMARK(BM_UnvectorizedDenormalsFlushedToZero);
+
+void BM_UnvectorizedDenormalsNotFlushedToZero(benchmark::State& state) {
+ iree_fpu_state_t fpu_state = iree_fpu_state_push(IREE_FPU_STATE_DEFAULT);
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(UnvectorizedScaleBufferByValue(1e-39f));
+ }
+ iree_fpu_state_pop(fpu_state);
+}
+BENCHMARK(BM_UnvectorizedDenormalsNotFlushedToZero);
+
+void BM_VectorizedNormals(benchmark::State& state) {
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(VectorizedScaleBufferByValue(1.0f));
+ }
+}
+BENCHMARK(BM_VectorizedNormals);
+
+void BM_VectorizedDenormals(benchmark::State& state) {
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(VectorizedScaleBufferByValue(1e-39f));
+ }
+}
+BENCHMARK(BM_VectorizedDenormals);
+
+void BM_VectorizedDenormalsFlushedToZero(benchmark::State& state) {
+ iree_fpu_state_t fpu_state =
+ iree_fpu_state_push(IREE_FPU_STATE_FLAG_FLUSH_DENORMALS_TO_ZERO);
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(VectorizedScaleBufferByValue(1e-39f));
+ }
+ iree_fpu_state_pop(fpu_state);
+}
+BENCHMARK(BM_VectorizedDenormalsFlushedToZero);
+
+void BM_VectorizedDenormalsNotFlushedToZero(benchmark::State& state) {
+ iree_fpu_state_t fpu_state = iree_fpu_state_push(IREE_FPU_STATE_DEFAULT);
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(VectorizedScaleBufferByValue(1e-39f));
+ }
+ iree_fpu_state_pop(fpu_state);
+}
+BENCHMARK(BM_VectorizedDenormalsNotFlushedToZero);
+
+} // namespace
diff --git a/runtime/src/iree/base/internal/fpu_state_test.cc b/runtime/src/iree/base/internal/fpu_state_test.cc
new file mode 100644
index 0000000..74bc0eb
--- /dev/null
+++ b/runtime/src/iree/base/internal/fpu_state_test.cc
@@ -0,0 +1,28 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/fpu_state.h"
+
+#include "iree/testing/gtest.h"
+
+namespace {
+
+// NOTE: depending on compiler options or architecture denormals may always be
+// flushed to zero. Here we just test that they are flushed when we request them
+// to be.
+TEST(FPUStateTest, FlushDenormalsToZero) {
+ iree_fpu_state_t fpu_state =
+ iree_fpu_state_push(IREE_FPU_STATE_FLAG_FLUSH_DENORMALS_TO_ZERO);
+
+ float f = 1.0f;
+ volatile float* fp = &f;
+ *fp = *fp * 1e-39f;
+ EXPECT_EQ(0.0f, f);
+
+ iree_fpu_state_pop(fpu_state);
+}
+
+} // namespace
diff --git a/runtime/src/iree/base/internal/inline_array.h b/runtime/src/iree/base/internal/inline_array.h
new file mode 100644
index 0000000..ccbaf14
--- /dev/null
+++ b/runtime/src/iree/base/internal/inline_array.h
@@ -0,0 +1,59 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_INLINE_ARRAY_H_
+#define IREE_BASE_INTERNAL_INLINE_ARRAY_H_
+
+#include "iree/base/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//==============================================================================
+// iree_inline_array_t
+//==============================================================================
+
+// Maximum number of bytes that can be allocated from the stack.
+// Arrays exceeding this size will incur a heap allocation.
+#define IREE_INLINE_ARRAY_MAX_STACK_ALLOCATION 512
+
+#define iree_inline_array(type, variable, initial_size, allocator) \
+ const iree_allocator_t variable##_allocator = (allocator); \
+ struct { \
+ iree_host_size_t size; \
+ type* data; \
+ } variable = { \
+ (initial_size), \
+ NULL, \
+ }; \
+ if (IREE_UNLIKELY(sizeof(type) * (initial_size) > \
+ IREE_INLINE_ARRAY_MAX_STACK_ALLOCATION)) { \
+ IREE_CHECK_OK(iree_allocator_malloc(variable##_allocator, \
+ sizeof(type) * (initial_size), \
+ (void**)&(variable).data)); \
+ } else { \
+ (variable).data = (type*)iree_alloca(sizeof(type) * (initial_size)); \
+ }
+
+#define iree_inline_array_deinitialize(variable) \
+ if (IREE_UNLIKELY(sizeof(*(variable).data) * (variable).size > \
+ IREE_INLINE_ARRAY_MAX_STACK_ALLOCATION)) { \
+ iree_allocator_free(variable##_allocator, (variable).data); \
+ }
+
+#define iree_inline_array_size(variable) (variable).size
+
+#define iree_inline_array_capacity(variable) (variable).capacity
+#define iree_inline_array_data(variable) (variable).data
+
+#define iree_inline_array_at(variable, index) &(variable).data[(index)]
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_BASE_INTERNAL_INLINE_ARRAY_H_
diff --git a/runtime/src/iree/base/internal/main.h b/runtime/src/iree/base/internal/main.h
new file mode 100644
index 0000000..1321832
--- /dev/null
+++ b/runtime/src/iree/base/internal/main.h
@@ -0,0 +1,20 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_MAIN_H_
+#define IREE_BASE_INTERNAL_MAIN_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+int iree_main(int argc, char** argv);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_INTERNAL_MAIN_H_
diff --git a/runtime/src/iree/base/internal/main_posix.c b/runtime/src/iree/base/internal/main_posix.c
new file mode 100644
index 0000000..cf884a3
--- /dev/null
+++ b/runtime/src/iree/base/internal/main_posix.c
@@ -0,0 +1,15 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/main.h"
+#include "iree/base/target_platform.h"
+
+#if defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_APPLE) || \
+ defined(IREE_PLATFORM_LINUX)
+
+int main(int argc, char** argv) { return iree_main(argc, argv); }
+
+#endif // IREE_PLATFORM_*
diff --git a/runtime/src/iree/base/internal/main_win32.c b/runtime/src/iree/base/internal/main_win32.c
new file mode 100644
index 0000000..119ed19
--- /dev/null
+++ b/runtime/src/iree/base/internal/main_win32.c
@@ -0,0 +1,35 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdlib.h>
+
+#include "iree/base/internal/main.h"
+#include "iree/base/target_platform.h"
+
+#if defined(IREE_PLATFORM_WINDOWS)
+
+#include <combaseapi.h>
+
+// Entry point when using /SUBSYSTEM:CONSOLE is the standard main().
+int main(int argc, char** argv) { return iree_main(argc, argv); }
+
+// Entry point when using /SUBSYSTEM:WINDOWS.
+// https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-winmain
+int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
+ LPSTR lpCmdLine, int nShowCmd) {
+ // Setup COM on the main thread.
+ // NOTE: this may fail if COM has already been initialized - that's OK.
+ CoInitializeEx(NULL, COINIT_MULTITHREADED);
+
+ // Run standard main function.
+ // We use the MSVCRT __argc/__argv to get access to the standard argc/argv
+ // vs. using the flattened string passed to WinMain (that would require
+ // complex unicode splitting/etc).
+ // https://docs.microsoft.com/en-us/cpp/c-runtime-library/argc-argv-wargv
+ return iree_main(__argc, __argv);
+}
+
+#endif // IREE_PLATFORM_WINDOWS
diff --git a/runtime/src/iree/base/internal/math.h b/runtime/src/iree/base/internal/math.h
new file mode 100644
index 0000000..af767a7
--- /dev/null
+++ b/runtime/src/iree/base/internal/math.h
@@ -0,0 +1,310 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_MATH_H_
+#define IREE_BASE_INTERNAL_MATH_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iree/base/alignment.h"
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+
+// Haswell or later, gcc compile time option: -mlzcnt
+#if defined(__LZCNT__)
+#include <x86intrin.h>
+#endif
+
+// Clang on Windows has __builtin_clzll; otherwise we need to use the
+// windows intrinsic functions.
+#if defined(IREE_COMPILER_MSVC)
+#include <intrin.h>
+#if defined(IREE_ARCH_ARM_64) || defined(IREE_ARCH_X86_64)
+#pragma intrinsic(_BitScanReverse64)
+#pragma intrinsic(_BitScanForward64)
+#endif
+#pragma intrinsic(_BitScanReverse)
+#pragma intrinsic(_BitScanForward)
+#endif // IREE_COMPILER_MSVC
+
+#define iree_shr(value, shamt) \
+ (((shamt) < sizeof(value) * 8) ? ((value) >> (shamt)) : 0)
+
+//==============================================================================
+// Bitwise rotation (aka circular shifts)
+//==============================================================================
+
+// Unsigned rotate-left a 64-bit integer.
+// https://en.cppreference.com/w/cpp/numeric/rotl
+//
+//
+// NOTE: this exact form is confirmed to be recognized by the compilers we care
+// about; do not modify: https://godbolt.org/z/xzof9d
+static inline uint64_t iree_math_rotl_u64(const uint64_t n, uint32_t c) {
+ const uint32_t mask = 8 * sizeof(n) - 1;
+ c &= mask;
+ if (!c) return n;
+ return (n << c) | (n >> (64 - c));
+}
+
+// Unsigned rotate-right a 64-bit integer.
+// https://en.cppreference.com/w/cpp/numeric/rotr
+//
+// NOTE: this exact form is confirmed to be recognized by the compilers we care
+// about **except MSVC**; do not modify: https://godbolt.org/z/xzof9d
+static inline uint64_t iree_math_rotr_u64(const uint64_t n, uint32_t c) {
+ const uint32_t mask = 8 * sizeof(n) - 1;
+ c &= mask;
+ if (!c) return n;
+ return (n >> c) | (n << ((-c) & mask));
+}
+
+//==============================================================================
+// Bit scanning/counting
+//==============================================================================
+
+static inline int iree_math_count_leading_zeros_u32(const uint32_t n) {
+#if defined(IREE_COMPILER_MSVC)
+ unsigned long result = 0; // NOLINT(runtime/int)
+ if (_BitScanReverse(&result, n)) {
+ return (int)(31 - result);
+ }
+ return 32;
+#elif defined(IREE_COMPILER_GCC_COMPAT)
+#if defined(__LCZNT__)
+ // NOTE: LZCNT is a risky instruction; it is not supported on architectures
+ // before Haswell, yet it is encoded as 'rep bsr', which typically ignores
+ // invalid rep prefixes, and interprets it as the 'bsr' instruction, which
+ // returns the index of the value rather than the count, resulting in
+ // incorrect code.
+ return (int)__lzcnt32(n);
+#endif // defined(__LCZNT__)
+
+ // Handle 0 as a special case because __builtin_clz(0) is undefined.
+ if (n == 0) return 32;
+ // Use __builtin_clz, which uses the following instructions:
+ // x86: bsr
+ // ARM64: clz
+ // PPC: cntlzd
+ return (int)__builtin_clz(n);
+#else
+#error No clz for this arch.
+#endif // IREE_COMPILER_MSVC / IREE_COMPILER_GCC_COMPAT
+}
+
+static inline int iree_math_count_leading_zeros_u64(uint64_t n) {
+#if defined(IREE_COMPILER_MSVC) && \
+ (defined(IREE_ARCH_ARM_64) || defined(IREE_ARCH_X86_64))
+ // MSVC does not have __buitin_clzll. Use _BitScanReverse64.
+ unsigned long result = 0; // NOLINT(runtime/int)
+ if (_BitScanReverse64(&result, n)) {
+ return (int)(63 - result);
+ }
+ return 64;
+#elif defined(IREE_COMPILER_MSVC)
+ // MSVC does not have __buitin_clzll. Compose two calls to _BitScanReverse
+ unsigned long result = 0; // NOLINT(runtime/int)
+ if ((n >> 32) && _BitScanReverse(&result, n >> 32)) {
+ return (int)(31 - result);
+ }
+ if (_BitScanReverse(&result, n)) {
+ return (int)(63 - result);
+ }
+ return 64;
+#elif defined(IREE_COMPILER_GCC_COMPAT)
+#if defined(__LCZNT__)
+ // NOTE: LZCNT is a risky instruction; it is not supported on architectures
+ // before Haswell, yet it is encoded as 'rep bsr', which typically ignores
+ // invalid rep prefixes, and interprets it as the 'bsr' instruction, which
+ // returns the index of the value rather than the count, resulting in
+ // incorrect code.
+ return __lzcnt64(n);
+#elif defined(__aarch64__) || defined(__powerpc64__)
+ // Empirically verified that __builtin_clzll(0) works as expected.
+ return (int)__builtin_clzll(n);
+#endif
+ // Handle 0 as a special case because __builtin_clzll(0) is undefined.
+ if (!n) return 64;
+ // Use __builtin_clzll, which uses the following instructions:
+ // x86: bsr
+ // PPC: cntlzd
+ // WASM: i32.clz
+ // RISC-V: __clzsi2 in GCC, splat out in clang
+ return (int)__builtin_clzll(n);
+#else
+#error No clz for this arch.
+#endif // IREE_COMPILER_MSVC / IREE_COMPILER_GCC_COMPAT
+}
+
+static inline int iree_math_count_trailing_zeros_u32(uint32_t n) {
+#if defined(IREE_COMPILER_MSVC)
+ unsigned long result = 0; // NOLINT(runtime/int)
+ _BitScanForward(&result, n);
+ return (int)result;
+#elif defined(IREE_COMPILER_GCC_COMPAT)
+ return (int)__builtin_ctz(n);
+#else
+ int c = 31;
+ n &= ~n + 1;
+ if (n & 0x0000FFFFu) c -= 16;
+ if (n & 0x00FF00FFu) c -= 8;
+ if (n & 0x0F0F0F0Fu) c -= 4;
+ if (n & 0x33333333u) c -= 2;
+ if (n & 0x55555555u) c -= 1;
+ return c;
+#endif // IREE_COMPILER_MSVC / IREE_COMPILER_GCC_COMPAT
+}
+
+static inline int iree_math_count_trailing_zeros_u64(uint64_t n) {
+#if defined(IREE_COMPILER_MSVC) && defined(IREE_PTR_SIZE_64)
+ unsigned long result = 0; // NOLINT(runtime/int)
+ _BitScanForward64(&result, n);
+ return (int)result;
+#elif defined(IREE_COMPILER_MSVC) && defined(IREE_PTR_SIZE_32)
+ unsigned long result = 0; // NOLINT(runtime/int)
+ if ((uint32_t)(n) == 0) {
+ _BitScanForward(&result, n >> 32);
+ return result + 32;
+ }
+ _BitScanForward(&result, n);
+ return (int)result;
+#elif defined(IREE_COMPILER_GCC_COMPAT)
+ // Use __builtin_clzll, which uses the following instructions:
+ // x86: bsr
+ // PPC: cntlzd
+ // WASM: i64.clz
+ // RISC-V: __clzdi2 in GCC, splat out in clang
+ return __builtin_ctzll(n);
+#else
+ int c = 63;
+ n &= ~n + 1;
+ if (n & 0x00000000FFFFFFFFull) c -= 32;
+ if (n & 0x0000FFFF0000FFFFull) c -= 16;
+ if (n & 0x00FF00FF00FF00FFull) c -= 8;
+ if (n & 0x0F0F0F0F0F0F0F0Full) c -= 4;
+ if (n & 0x3333333333333333ull) c -= 2;
+ if (n & 0x5555555555555555ull) c -= 1;
+ return c;
+#endif // IREE_COMPILER_MSVC / IREE_COMPILER_GCC_COMPAT
+}
+
+//==============================================================================
+// Population count
+//==============================================================================
+
+// Returns the number of 1 bits in a 32 bit value.
+static inline int iree_math_count_ones_u32(uint32_t n) {
+ n -= ((n >> 1) & 0x55555555u);
+ n = ((n >> 2) & 0x33333333u) + (n & 0x33333333u);
+ return (int)((((n + (n >> 4)) & 0x0F0F0F0Fu) * 0x01010101u) >> 24);
+}
+
+// Returns the number of 1 bits in a 64 bit value.
+static inline int iree_math_count_ones_u64(uint64_t n) {
+ return iree_math_count_ones_u32(n >> 32) +
+ iree_math_count_ones_u32(n & 0xFFFFFFFFu);
+}
+
+//==============================================================================
+// Rounding and alignment
+//==============================================================================
+// There are certain platforms - mostly those with poorer quality compilers or
+// more restricted instruction sets - where we want to avoid the clz path as
+// it is emulated and instead we use some bit-twiddling hacks. On other
+// platforms it's the opposite - they may emulate clz but doing so saves
+// dozens of bytes that otherwise would have been the shift/or tree.
+//
+// Which to choose is entirely determined by fiddling on godbolt for the
+// target platform: https://godbolt.org/z/h4vPzo
+
+// Rounds up the value to the nearest power of 2 (if not already a power of 2).
+// For 32-bit numbers this only supports values <= 2^31; higher will wrap.
+static inline uint32_t iree_math_round_up_to_pow2_u32(uint32_t n) {
+#if 0 // golf required; can be bloated
+ const uint32_t i = (n != 1);
+ return (1 + i) << ((iree_math_count_leading_zeros_u32(n - i) ^ 31));
+#elif 0 // golf required; can be bloated
+ return n == 1 ? 1u : 2u << ((iree_math_count_leading_zeros_u32(n - 1) ^ 31));
+#else
+ // https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+ n--;
+ n |= n >> 1;
+ n |= n >> 2;
+ n |= n >> 4;
+ n |= n >> 8;
+ n |= n >> 16;
+ return n + 1;
+#endif // 1
+}
+
+// Rounds up the value to the nearest power of 2 (if not already a power of 2).
+// For 64-bit numbers this only supports values <= 2^63; higher will wrap.
+static inline uint64_t iree_math_round_up_to_pow2_u64(uint64_t n) {
+#if 0 // golf required; can be bloated
+ const uint64_t i = (n != 1);
+ return (1 + i) << ((iree_math_count_leading_zeros_u64(n - i) ^ 63));
+#elif 0 // golf required; can be bloated
+ return n == 1 ? 1ull
+ : 2ull << ((iree_math_count_leading_zeros_u64(n - 1) ^ 63));
+#else
+ // https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+ n--;
+ n |= n >> 1;
+ n |= n >> 2;
+ n |= n >> 4;
+ n |= n >> 8;
+ n |= n >> 16;
+ n |= n >> 32;
+ return n + 1;
+#endif // 1
+}
+
+//==============================================================================
+// FP16 support
+//==============================================================================
+
+// Converts a 16-bit floating-point value to a 32-bit C `float`.
+//
+// NOTE: this implementation does not handle corner cases around NaN and such;
+// we can improve this implementation over time if it is used for such cases.
+static inline float iree_math_f16_to_f32(const uint16_t f16_value) {
+ const uint32_t sign = ((uint32_t)((f16_value & 0x8000u) >> 15)) << 31;
+ uint32_t exp = ((f16_value & 0x7C00u) >> 10);
+ uint32_t mantissa = 0;
+ if (exp > 0) {
+ exp = (exp + 127 - 15) << 23;
+ mantissa = ((uint32_t)(f16_value & 0x3FFu)) << (23 - 10);
+ }
+ const uint32_t u32_value = sign | exp | mantissa;
+ float f32_value;
+ memcpy(&f32_value, &u32_value, sizeof(f32_value));
+ return f32_value;
+}
+
+// Converts a 32-bit C `float` value to a 16-bit floating-point value.
+//
+// NOTE: this implementation does not handle corner cases around NaN and such;
+// we can improve this implementation over time if it is used for such cases.
+static inline uint16_t iree_math_f32_to_f16(const float f32_value) {
+ uint32_t u32_value;
+ memcpy(&u32_value, &f32_value, sizeof(u32_value));
+ const uint32_t sign = ((u32_value & 0x80000000u) >> 31) << 15;
+ const uint32_t mantissa = (u32_value & 0x007FFFFFu) >> (23 - 10);
+ int32_t exp = ((u32_value & 0x7F800000u) >> 23) - 127 + 15;
+ if (exp > 31) {
+ exp = 31 << 10;
+ } else if (exp < 0) {
+ exp = 0;
+ } else {
+ exp = exp << 10;
+ }
+ return (uint16_t)(sign | exp | mantissa);
+}
+
+#endif // IREE_BASE_INTERNAL_MATH_H_
diff --git a/runtime/src/iree/base/internal/math_test.cc b/runtime/src/iree/base/internal/math_test.cc
new file mode 100644
index 0000000..8984e54
--- /dev/null
+++ b/runtime/src/iree/base/internal/math_test.cc
@@ -0,0 +1,202 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/math.h"
+
+#include <cfloat>
+
+#include "iree/testing/gtest.h"
+
+namespace {
+
+//==============================================================================
+// Bitwise rotation (aka circular shifts)
+//==============================================================================
+
+TEST(BitwiseRotationTest, ROTL64) {
+ EXPECT_EQ(0ull, iree_math_rotl_u64(0ull, 0u));
+ EXPECT_EQ(0ull, iree_math_rotl_u64(0ull, 0u));
+ EXPECT_EQ(1ull, iree_math_rotl_u64(1ull, 0u));
+ EXPECT_EQ(1ull, iree_math_rotl_u64(1ull, 0u));
+
+ EXPECT_EQ(2ull, iree_math_rotl_u64(1ull, 1u));
+ EXPECT_EQ(2ull, iree_math_rotl_u64(1ull, 1u));
+ EXPECT_EQ(UINT64_MAX, iree_math_rotl_u64(UINT64_MAX, 63u));
+ EXPECT_EQ(UINT64_MAX, iree_math_rotl_u64(UINT64_MAX, 64u));
+}
+
+TEST(BitwiseRotationTest, ROTR64) {
+ EXPECT_EQ(0ull, iree_math_rotr_u64(0ull, 0u));
+ EXPECT_EQ(0ull, iree_math_rotr_u64(0ull, 0u));
+ EXPECT_EQ(1ull, iree_math_rotr_u64(1ull, 0u));
+ EXPECT_EQ(1ull, iree_math_rotr_u64(1ull, 0u));
+
+ EXPECT_EQ(1ull, iree_math_rotr_u64(2ull, 1u));
+ EXPECT_EQ(0x8000000000000000ull, iree_math_rotr_u64(2ull, 2u));
+ EXPECT_EQ(0x8000000000000000ull, iree_math_rotr_u64(1ull, 1u));
+ EXPECT_EQ(0x4000000000000000ull, iree_math_rotr_u64(1ull, 2u));
+}
+
+//==============================================================================
+// Bit scanning/counting
+//==============================================================================
+
+TEST(BitwiseScansTest, CLZ32) {
+ EXPECT_EQ(32, iree_math_count_leading_zeros_u32(uint32_t{}));
+ EXPECT_EQ(0, iree_math_count_leading_zeros_u32(~uint32_t{}));
+ for (int index = 0; index < 32; index++) {
+ uint32_t x = 1u << index;
+ const int cnt = 31 - index;
+ ASSERT_EQ(cnt, iree_math_count_leading_zeros_u32(x)) << index;
+ ASSERT_EQ(cnt, iree_math_count_leading_zeros_u32(x + x - 1)) << index;
+ }
+}
+
+TEST(BitwiseScansTest, CLZ64) {
+ EXPECT_EQ(64, iree_math_count_leading_zeros_u64(uint64_t{}));
+ EXPECT_EQ(0, iree_math_count_leading_zeros_u64(~uint64_t{}));
+ for (int index = 0; index < 64; index++) {
+ uint64_t x = 1ull << index;
+ const int cnt = 63 - index;
+ ASSERT_EQ(cnt, iree_math_count_leading_zeros_u64(x)) << index;
+ ASSERT_EQ(cnt, iree_math_count_leading_zeros_u64(x + x - 1)) << index;
+ }
+}
+
+TEST(BitwiseScansTest, CTZ32) {
+ EXPECT_EQ(0, iree_math_count_trailing_zeros_u32(~uint32_t{}));
+ for (int index = 0; index < 32; index++) {
+ uint32_t x = static_cast<uint32_t>(1) << index;
+ const int cnt = index;
+ ASSERT_EQ(cnt, iree_math_count_trailing_zeros_u32(x)) << index;
+ ASSERT_EQ(cnt, iree_math_count_trailing_zeros_u32(~(x - 1))) << index;
+ }
+}
+
+TEST(BitwiseScansTest, CTZ64) {
+ // iree_math_count_trailing_zeros_u32
+ EXPECT_EQ(0, iree_math_count_trailing_zeros_u64(~uint64_t{}));
+ for (int index = 0; index < 64; index++) {
+ uint64_t x = static_cast<uint64_t>(1) << index;
+ const int cnt = index;
+ ASSERT_EQ(cnt, iree_math_count_trailing_zeros_u64(x)) << index;
+ ASSERT_EQ(cnt, iree_math_count_trailing_zeros_u64(~(x - 1))) << index;
+ }
+}
+
+//==============================================================================
+// Population count
+//==============================================================================
+
+TEST(PopulationCountTest, Ones32) {
+ EXPECT_EQ(0, iree_math_count_ones_u32(0u));
+ EXPECT_EQ(1, iree_math_count_ones_u32(1u));
+ EXPECT_EQ(29, iree_math_count_ones_u32(-15u));
+ EXPECT_EQ(5, iree_math_count_ones_u32(341u));
+ EXPECT_EQ(32, iree_math_count_ones_u32(UINT32_MAX));
+ EXPECT_EQ(31, iree_math_count_ones_u32(UINT32_MAX - 1));
+}
+
+TEST(PopulationCountTest, Ones64) {
+ EXPECT_EQ(0, iree_math_count_ones_u64(0ull));
+ EXPECT_EQ(1, iree_math_count_ones_u64(1ull));
+ EXPECT_EQ(61, iree_math_count_ones_u64(-15ull));
+ EXPECT_EQ(5, iree_math_count_ones_u64(341ull));
+ EXPECT_EQ(64, iree_math_count_ones_u64(UINT64_MAX));
+ EXPECT_EQ(63, iree_math_count_ones_u64(UINT64_MAX - 1ull));
+}
+
+//==============================================================================
+// Rounding and alignment
+//==============================================================================
+
+TEST(RoundingTest, UpToNextPow232) {
+ constexpr uint32_t kUint16Max = UINT16_MAX;
+ constexpr uint32_t kUint32Max = UINT32_MAX;
+ EXPECT_EQ(0u, iree_math_round_up_to_pow2_u32(0u));
+ EXPECT_EQ(1u, iree_math_round_up_to_pow2_u32(1u));
+ EXPECT_EQ(2u, iree_math_round_up_to_pow2_u32(2u));
+ EXPECT_EQ(4u, iree_math_round_up_to_pow2_u32(3u));
+ EXPECT_EQ(8u, iree_math_round_up_to_pow2_u32(8u));
+ EXPECT_EQ(16u, iree_math_round_up_to_pow2_u32(9u));
+ EXPECT_EQ(kUint16Max + 1u, iree_math_round_up_to_pow2_u32(kUint16Max - 1u));
+ EXPECT_EQ(kUint16Max + 1u, iree_math_round_up_to_pow2_u32(kUint16Max));
+ EXPECT_EQ(kUint16Max + 1u, iree_math_round_up_to_pow2_u32(kUint16Max + 1u));
+ EXPECT_EQ(131072u, iree_math_round_up_to_pow2_u32(kUint16Max + 2u));
+ EXPECT_EQ(262144u, iree_math_round_up_to_pow2_u32(262144u - 1u));
+ EXPECT_EQ(0x80000000u, iree_math_round_up_to_pow2_u32(0x7FFFFFFFu));
+ EXPECT_EQ(0x80000000u, iree_math_round_up_to_pow2_u32(0x80000000u));
+
+ // NOTE: wrap to 0.
+ EXPECT_EQ(0u, iree_math_round_up_to_pow2_u32(0x80000001u));
+ EXPECT_EQ(0u, iree_math_round_up_to_pow2_u32(kUint32Max - 1u));
+ EXPECT_EQ(0u, iree_math_round_up_to_pow2_u32(kUint32Max));
+}
+
+TEST(RoundingTest, UpToNextPow264) {
+ constexpr uint64_t kUint16Max = UINT16_MAX;
+ constexpr uint64_t kUint64Max = UINT64_MAX;
+ EXPECT_EQ(0ull, iree_math_round_up_to_pow2_u64(0ull));
+ EXPECT_EQ(1ull, iree_math_round_up_to_pow2_u64(1ull));
+ EXPECT_EQ(2ull, iree_math_round_up_to_pow2_u64(2ull));
+ EXPECT_EQ(4ull, iree_math_round_up_to_pow2_u64(3ull));
+ EXPECT_EQ(8ull, iree_math_round_up_to_pow2_u64(8ull));
+ EXPECT_EQ(16ull, iree_math_round_up_to_pow2_u64(9ull));
+ EXPECT_EQ(kUint16Max + 1ull,
+ iree_math_round_up_to_pow2_u64(kUint16Max - 1ull));
+ EXPECT_EQ(kUint16Max + 1ull, iree_math_round_up_to_pow2_u64(kUint16Max));
+ EXPECT_EQ(kUint16Max + 1ull,
+ iree_math_round_up_to_pow2_u64(kUint16Max + 1ull));
+ EXPECT_EQ(131072ull, iree_math_round_up_to_pow2_u64(kUint16Max + 2ull));
+ EXPECT_EQ(0x100000000ull, iree_math_round_up_to_pow2_u64(0xFFFFFFFEull));
+ EXPECT_EQ(0x100000000ull, iree_math_round_up_to_pow2_u64(0xFFFFFFFFull));
+ EXPECT_EQ(0x80000000ull, iree_math_round_up_to_pow2_u64(0x7FFFFFFFull));
+ EXPECT_EQ(0x80000000ull, iree_math_round_up_to_pow2_u64(0x80000000ull));
+ EXPECT_EQ(0x100000000ull, iree_math_round_up_to_pow2_u64(0x80000001ull));
+
+ // NOTE: wrap to 0.
+ EXPECT_EQ(0ull, iree_math_round_up_to_pow2_u64(0x8000000000000001ull));
+ EXPECT_EQ(0ull, iree_math_round_up_to_pow2_u64(kUint64Max - 1ull));
+ EXPECT_EQ(0ull, iree_math_round_up_to_pow2_u64(kUint64Max));
+}
+
+//==============================================================================
+// FP16 support
+//==============================================================================
+
+TEST(F16ConversionTest, F32ToF16) {
+ // Within range, normal truncation.
+ EXPECT_EQ(0x3400, iree_math_f32_to_f16(0.25f));
+ EXPECT_EQ(0xd646, iree_math_f32_to_f16(-100.375f));
+ // Overflow
+ EXPECT_EQ(0x7fff, iree_math_f32_to_f16(FLT_MAX));
+ EXPECT_EQ(0xffff, iree_math_f32_to_f16(-FLT_MAX));
+ // Underflow
+ EXPECT_EQ(0, iree_math_f32_to_f16(FLT_MIN));
+ EXPECT_EQ(0x8000, iree_math_f32_to_f16(-FLT_MIN));
+}
+
+TEST(F16ConversionTest, F32ToF16ToF32) {
+ constexpr float kF16Max = 65504.f;
+ constexpr float kF16Min = 0.0000610351563f;
+ // Within range, should just recover.
+ EXPECT_EQ(0.25f, iree_math_f16_to_f32(iree_math_f32_to_f16(0.25f)));
+ EXPECT_EQ(-100.375f, iree_math_f16_to_f32(iree_math_f32_to_f16(-100.375f)));
+ EXPECT_EQ(kF16Max, iree_math_f16_to_f32(iree_math_f32_to_f16(kF16Max)));
+ EXPECT_EQ(kF16Min, iree_math_f16_to_f32(iree_math_f32_to_f16(kF16Min)));
+ // Overflow
+ EXPECT_GE(FLT_MAX, iree_math_f16_to_f32(iree_math_f32_to_f16(FLT_MAX)));
+ EXPECT_LT(-FLT_MAX, iree_math_f16_to_f32(iree_math_f32_to_f16(-FLT_MAX)));
+ EXPECT_GT(kF16Max + 1.f,
+ iree_math_f16_to_f32(iree_math_f32_to_f16(kF16Max + 1.f)));
+ // Underflow
+ EXPECT_EQ(0.0f, iree_math_f16_to_f32(iree_math_f32_to_f16(FLT_MIN)));
+ EXPECT_EQ(0.0f, iree_math_f16_to_f32(iree_math_f32_to_f16(-FLT_MIN)));
+ EXPECT_EQ(0.0f,
+ iree_math_f16_to_f32(iree_math_f32_to_f16(kF16Min - kF16Min / 2)));
+}
+
+} // namespace
diff --git a/runtime/src/iree/base/internal/prng.h b/runtime/src/iree/base/internal/prng.h
new file mode 100644
index 0000000..8a97df8
--- /dev/null
+++ b/runtime/src/iree/base/internal/prng.h
@@ -0,0 +1,205 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+//==============================================================================
+//
+// Pseudo-random number generators (PRNGs): **NOT CRYPTOGRAPHICALLY SECURE*
+//
+// Only use these tiny little PRNGs to introduce a bit of randomnessish behavior
+// to things like balancing and backoff algorithms.
+//
+//==============================================================================
+
+#ifndef IREE_BASE_INTERNAL_PRNG_H_
+#define IREE_BASE_INTERNAL_PRNG_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/math.h"
+#include "iree/base/target_platform.h"
+
+#if defined(IREE_ARCH_ARM_64)
+#include <arm_neon.h>
+#endif // IREE_ARCH_ARM_64
+
+//==============================================================================
+// Pseudo-random number generators (PRNGs): **NOT CRYPTOGRAPHICALLY SECURE*
+//==============================================================================
+
+// A fixed-increment version of Java 8's SplittableRandom generator
+// See http://dx.doi.org/10.1145/2714064.2660195 and
+// http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html
+//
+// SplitMix64 as recommended for use with xoroshiro by the authors:
+// http://prng.di.unimi.it/splitmix64.c
+// http://rosettacode.org/wiki/Pseudo-random_numbers/Splitmix64
+typedef uint64_t iree_prng_splitmix64_state_t;
+
+// Initializes a SplitMix64 PRNG state vector; |out_state| is overwritten.
+// |seed| may be any 64-bit value.
+static inline void iree_prng_splitmix64_initialize(
+ uint64_t seed, iree_prng_splitmix64_state_t* out_state) {
+ *out_state = seed;
+}
+
+// Steps a SplitMix64 PRNG state vector and yields a value for use.
+static inline uint64_t iree_prng_splitmix64_next(
+ iree_prng_splitmix64_state_t* state) {
+ uint64_t z = (*state += 0x9E3779B97F4A7C15ull);
+ z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
+ z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
+ return z ^ (z >> 31);
+}
+
+// A small **pseudorandom** number generator (named after the operations used).
+// http://prng.di.unimi.it/
+typedef struct {
+ uint64_t value[2];
+} iree_prng_xoroshiro128_state_t;
+
+// Initializes a xoroshiro128+ PRNG state vector; |out_state| is overwritten.
+// |seed| may be any 64-bit value.
+static inline void iree_prng_xoroshiro128_initialize(
+ uint64_t seed, iree_prng_xoroshiro128_state_t* out_state) {
+ // The authors recommend using SplitMix64 to go from a single int seed
+ // into the two state values we need. It's critical that we don't use a
+ // xoroshiro128 for this as seeding a PRNG with the results of itself is...
+ // unsound.
+ iree_prng_splitmix64_state_t init_state;
+ iree_prng_splitmix64_initialize(seed, &init_state);
+ out_state->value[0] = iree_prng_splitmix64_next(&seed);
+ out_state->value[1] = iree_prng_splitmix64_next(&seed);
+
+ // A state of 0 will never produce anything but zeros so ensure that doesn't
+ // happen; of course, after running splitmix that should be closer to the
+ // side of never than not.
+ if (!out_state->value[0] && !out_state->value[1]) {
+ out_state->value[0] = 1;
+ }
+}
+
+// Steps a xoroshiro128 state vector and yields a value for use.
+// xoroshiro128+ variant: produces a single value with 32-bit bits of entropy.
+// This is the fastest variant but the lower 4 bits of the returned value may
+// not be sufficiently well-distributed. This is fine if the usage requires
+// fewer than 60 bits such as when sampling bools or array indices.
+// Note also that this works great for floating-point numbers where only 23 or
+// 53 bits are required to populate a mantissa and an additional step can be
+// used to generate the sign/exponent when required.
+//
+// footprint: 128-bits
+// period: 2^128 - 1
+// ns/64-bits: 0.72
+// cycles/byte: 0.29
+//
+// http://prng.di.unimi.it/xoroshiro128plus.c
+static inline uint64_t iree_prng_xoroshiro128plus_next_uint60(
+ iree_prng_xoroshiro128_state_t* state) {
+ uint64_t s0 = state->value[0];
+ uint64_t s1 = state->value[1];
+ const uint64_t result = s0 + s1;
+ s1 ^= s0;
+ state->value[0] = iree_math_rotl_u64(s0, 24) ^ s1 ^ (s1 << 16); // a, b
+ state->value[1] = iree_math_rotl_u64(s1, 37); // c
+ return result;
+}
+
+// Steps a xoroshiro128 state vector and yields a single boolean value for use.
+// See iree_prng_xoroshiro128plus_next_uint60 for details.
+static inline bool iree_prng_xoroshiro128plus_next_bool(
+ iree_prng_xoroshiro128_state_t* state) {
+ return (bool)(iree_prng_xoroshiro128plus_next_uint60(state) >> (64 - 1));
+}
+
+// Steps a xoroshiro128 state vector and yields a single uint8_t value for use.
+// See iree_prng_xoroshiro128plus_next_uint60 for details.
+static inline uint8_t iree_prng_xoroshiro128plus_next_uint8(
+ iree_prng_xoroshiro128_state_t* state) {
+ return (uint8_t)(iree_prng_xoroshiro128plus_next_uint60(state) >> (64 - 8));
+}
+
+// Steps a xoroshiro128 state vector and yields a single uint32_t value for use.
+// See iree_prng_xoroshiro128plus_next_uint60 for details.
+static inline uint32_t iree_prng_xoroshiro128plus_next_uint32(
+ iree_prng_xoroshiro128_state_t* state) {
+ return (uint32_t)(iree_prng_xoroshiro128plus_next_uint60(state) >> (64 - 32));
+}
+
+// Steps a xoroshiro128 state vector and yields a value for use.
+// xoroshiro128** variant: produces a single value with 32-bit bits of entropy.
+// Prefer this to xoroshiro128+ when good distribution over the integer range
+// is required; see xoroshiro128+ for details of its issues.
+//
+// footprint: 128-bits
+// period: 2^128 - 1
+// ns/64-bits: 0.93
+// cycles/byte: 0.42
+//
+// http://prng.di.unimi.it/xoroshiro128starstar.c
+static inline uint64_t iree_prng_xoroshiro128starstar_next_uint64(
+ iree_prng_xoroshiro128_state_t* state) {
+ uint64_t s0 = state->value[0];
+ uint64_t s1 = state->value[1];
+ const uint64_t result = iree_math_rotl_u64(s0 * 5, 7) * 9;
+ s1 ^= s0;
+ state->value[0] = iree_math_rotl_u64(s0, 24) ^ s1 ^ (s1 << 16); // a, b
+ state->value[1] = iree_math_rotl_u64(s1, 37); // c
+ return result;
+}
+
+// MiniLcg by @bjacob: A shot at the cheapest possible PRNG on ARM NEON
+// https://gist.github.com/bjacob/7d635b91acd02559d73a6d159fe9cfbe
+// I have no idea what the entropy characteristics of it are but it's really
+// fast and in a lot of places that's all we need. For example, whatever number
+// we generate when doing worker thread selection is going to get AND'ed with
+// some other bitmasks by the caller -- and once you do that to a random number
+// you've pretty much admitted it's ok to not be so strong and may as well
+// capitalize on it!
+typedef iree_alignas(iree_max_align_t) struct {
+ uint8_t value[16]; // first to ensure alignment
+ int8_t remaining; // number of remaining valid values in the state
+} iree_prng_minilcg128_state_t;
+
+#define IREE_PRNG_MINILCG_INIT_MUL_CONSTANT 13
+#define IREE_PRNG_MINILCG_INIT_ADD_CONSTANT 47
+#define IREE_PRNG_MINILCG_NEXT_MUL_CONSTANT 37
+#define IREE_PRNG_MINILCG_NEXT_ADD_CONSTANT 47
+
+// Initializes a MiniLcg PRNG state vector; |out_state| is overwritten.
+// |seed| may be any 8-bit value.
+static inline void iree_prng_minilcg128_initialize(
+ uint64_t seed, iree_prng_minilcg128_state_t* out_state) {
+ uint8_t value = (seed ^ 11400714819323198485ull) & 0xFF;
+ for (size_t i = 0; i < 16; ++i) {
+ out_state->value[i] = value;
+ value = value * IREE_PRNG_MINILCG_INIT_MUL_CONSTANT +
+ IREE_PRNG_MINILCG_INIT_ADD_CONSTANT;
+ }
+ out_state->remaining = 16;
+}
+
+static inline uint8_t iree_prng_minilcg128_next_uint8(
+ iree_prng_minilcg128_state_t* state) {
+ if (IREE_UNLIKELY(--state->remaining < 0)) {
+#if defined(IREE_ARCH_ARM_64)
+ uint8x16_t kmul = vdupq_n_u8(IREE_PRNG_MINILCG_NEXT_MUL_CONSTANT);
+ uint8x16_t kadd = vdupq_n_u8(IREE_PRNG_MINILCG_NEXT_ADD_CONSTANT);
+ vst1q_u8(state->value, vmlaq_u8(kadd, kmul, vld1q_u8(state->value)));
+#else
+ for (size_t i = 0; i < 16; ++i) {
+ state->value[i] = state->value[i] * IREE_PRNG_MINILCG_NEXT_MUL_CONSTANT +
+ IREE_PRNG_MINILCG_NEXT_ADD_CONSTANT;
+ }
+#endif // IREE_ARCH_ARM_64
+ state->remaining = 15;
+ }
+ return state->value[16 - state->remaining - 1];
+}
+
+#endif // IREE_BASE_INTERNAL_PRNG_H_
diff --git a/runtime/src/iree/base/internal/prng_test.cc b/runtime/src/iree/base/internal/prng_test.cc
new file mode 100644
index 0000000..95adb12
--- /dev/null
+++ b/runtime/src/iree/base/internal/prng_test.cc
@@ -0,0 +1,91 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+//==============================================================================
+// Pseudo-random number generators (PRNGs): **NOT CRYPTOGRAPHICALLY SECURE*
+//==============================================================================
+// NOTE: we leave the real testing to the authors; this just ensures we aren't
+// `return 4;`ing it or ignoring the seed.
+
+#include "iree/base/internal/prng.h"
+
+#include "iree/testing/gtest.h"
+
+namespace {
+
+TEST(PRNG, SplitMix64) {
+ iree_prng_splitmix64_state_t state;
+
+ iree_prng_splitmix64_initialize(/*seed=*/0ull, &state);
+ EXPECT_EQ(16294208416658607535ull, iree_prng_splitmix64_next(&state));
+ EXPECT_EQ(7960286522194355700ull, iree_prng_splitmix64_next(&state));
+
+ iree_prng_splitmix64_initialize(/*seed=*/1ull, &state);
+ EXPECT_EQ(10451216379200822465ull, iree_prng_splitmix64_next(&state));
+ EXPECT_EQ(13757245211066428519ull, iree_prng_splitmix64_next(&state));
+
+ iree_prng_splitmix64_initialize(/*seed=*/UINT64_MAX, &state);
+ EXPECT_EQ(16490336266968443936ull, iree_prng_splitmix64_next(&state));
+ EXPECT_EQ(16834447057089888969ull, iree_prng_splitmix64_next(&state));
+}
+
+TEST(PRNG, Xoroshiro128) {
+ iree_prng_xoroshiro128_state_t state;
+
+ iree_prng_xoroshiro128_initialize(/*seed=*/0ull, &state);
+ EXPECT_EQ(5807750865143411619ull,
+ iree_prng_xoroshiro128plus_next_uint60(&state));
+ EXPECT_TRUE(iree_prng_xoroshiro128plus_next_bool(&state));
+ EXPECT_EQ(218u, iree_prng_xoroshiro128plus_next_uint8(&state));
+ EXPECT_EQ(1647201753u, iree_prng_xoroshiro128plus_next_uint32(&state));
+ EXPECT_EQ(7260361800523965311ull,
+ iree_prng_xoroshiro128starstar_next_uint64(&state));
+
+ iree_prng_xoroshiro128_initialize(/*seed=*/1ull, &state);
+ EXPECT_EQ(5761717516557699368ull,
+ iree_prng_xoroshiro128plus_next_uint60(&state));
+ EXPECT_TRUE(iree_prng_xoroshiro128plus_next_bool(&state));
+ EXPECT_EQ(103u, iree_prng_xoroshiro128plus_next_uint8(&state));
+ EXPECT_EQ(2242241045u, iree_prng_xoroshiro128plus_next_uint32(&state));
+ EXPECT_EQ(661144386810419178ull,
+ iree_prng_xoroshiro128starstar_next_uint64(&state));
+
+ iree_prng_xoroshiro128_initialize(/*seed=*/UINT64_MAX, &state);
+ EXPECT_EQ(14878039250348781289ull,
+ iree_prng_xoroshiro128plus_next_uint60(&state));
+ EXPECT_FALSE(iree_prng_xoroshiro128plus_next_bool(&state));
+ EXPECT_EQ(137u, iree_prng_xoroshiro128plus_next_uint8(&state));
+ EXPECT_EQ(2111322015u, iree_prng_xoroshiro128plus_next_uint32(&state));
+ EXPECT_EQ(138107609852220106ull,
+ iree_prng_xoroshiro128starstar_next_uint64(&state));
+}
+
+TEST(PRNG, MiniLcg128) {
+ iree_prng_minilcg128_state_t state;
+
+ iree_prng_minilcg128_initialize(/*seed=*/0ull, &state);
+ EXPECT_EQ(21u, iree_prng_minilcg128_next_uint8(&state));
+ for (int i = 0; i < 100; ++i) {
+ iree_prng_minilcg128_next_uint8(&state);
+ }
+ EXPECT_EQ(18u, iree_prng_minilcg128_next_uint8(&state));
+
+ iree_prng_minilcg128_initialize(/*seed=*/1ull, &state);
+ EXPECT_EQ(20u, iree_prng_minilcg128_next_uint8(&state));
+ for (int i = 0; i < 100; ++i) {
+ iree_prng_minilcg128_next_uint8(&state);
+ }
+ EXPECT_EQ(13u, iree_prng_minilcg128_next_uint8(&state));
+
+ iree_prng_minilcg128_initialize(/*seed=*/UINT64_MAX, &state);
+ EXPECT_EQ(234u, iree_prng_minilcg128_next_uint8(&state));
+ for (int i = 0; i < 100; ++i) {
+ iree_prng_minilcg128_next_uint8(&state);
+ }
+ EXPECT_EQ(59u, iree_prng_minilcg128_next_uint8(&state));
+}
+
+} // namespace
diff --git a/runtime/src/iree/base/internal/span.h b/runtime/src/iree/base/internal/span.h
new file mode 100644
index 0000000..82fd91c
--- /dev/null
+++ b/runtime/src/iree/base/internal/span.h
@@ -0,0 +1,187 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_SPAN_H_
+#define IREE_BASE_INTERNAL_SPAN_H_
+#ifdef __cplusplus
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <initializer_list>
+#include <iterator>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+// std::span is available starting in C++20.
+// Prior to that we fall back to our simplified implementation below.
+#if defined(__has_include)
+#if __has_include(<span>) && __cplusplus >= 202002L
+#define IREE_HAVE_STD_SPAN 1
+#include <span>
+#endif // __has_include(<span>)
+#endif // __has_include
+
+#ifndef IREE_HAVE_STD_SPAN
+#include <limits>
+#endif
+
+namespace iree {
+
+#if defined(IREE_HAVE_STD_SPAN)
+
+// Alias. Once we bump up our minimum C++ version we can drop this entire file.
+template <typename T>
+using span = std::span<T>;
+
+#else
+
+constexpr std::size_t dynamic_extent = std::numeric_limits<std::size_t>::max();
+
+// A pared down version of std::span doing just enough for our uses in IREE.
+// Most of the IREE code started using absl::Span which while close to std::span
+// has some additional functionality of its own and is missing some from std.
+// The benefit here is that means we only need to implement the intersection of
+// the two as none of our code uses those newer std features.
+//
+// https://en.cppreference.com/w/cpp/container/span/subspan
+template <typename T>
+class span {
+ private:
+ template <typename V>
+ using remove_cv_t = typename std::remove_cv<V>::type;
+ template <typename V>
+ using decay_t = typename std::decay<V>::type;
+
+ template <typename C>
+ static constexpr auto GetDataImpl(C& c, char) noexcept -> decltype(c.data()) {
+ return c.data();
+ }
+ static inline char* GetDataImpl(std::string& s, int) noexcept {
+ return &s[0];
+ }
+ template <typename C>
+ static constexpr auto GetData(C& c) noexcept -> decltype(GetDataImpl(c, 0)) {
+ return GetDataImpl(c, 0);
+ }
+
+ template <typename C>
+ using HasSize =
+ std::is_integral<decay_t<decltype(std::declval<C&>().size())> >;
+
+ template <typename V, typename C>
+ using HasData =
+ std::is_convertible<decay_t<decltype(GetData(std::declval<C&>()))>*,
+ V* const*>;
+
+ template <typename C>
+ using EnableIfConvertibleFrom =
+ typename std::enable_if<HasData<T, C>::value && HasSize<C>::value>::type;
+
+ template <typename U>
+ using EnableIfConstView =
+ typename std::enable_if<std::is_const<T>::value, U>::type;
+
+ template <typename U>
+ using EnableIfMutableView =
+ typename std::enable_if<!std::is_const<T>::value, U>::type;
+
+ public:
+ using value_type = remove_cv_t<T>;
+ using pointer = T*;
+ using const_pointer = const T*;
+ using reference = T&;
+ using const_reference = const T&;
+ using iterator = pointer;
+ using const_iterator = const_pointer;
+ using reverse_iterator = std::reverse_iterator<iterator>;
+ using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+ using size_type = size_t;
+ using difference_type = ptrdiff_t;
+
+ constexpr span() noexcept : span(nullptr, 0) {}
+ constexpr span(pointer array, size_type length) noexcept
+ : ptr_(array), len_(length) {}
+
+ template <size_type N>
+ constexpr span(T (&a)[N]) noexcept : span(a, N) {}
+
+ template <typename V, typename = EnableIfConvertibleFrom<V>,
+ typename = EnableIfMutableView<V> >
+ explicit span(V& v) noexcept : span(GetData(v), v.size()) {}
+
+ template <typename V, typename = EnableIfConvertibleFrom<V>,
+ typename = EnableIfConstView<V> >
+ constexpr span(const V& v) noexcept : span(GetData(v), v.size()) {}
+
+ template <typename LazyT = T, typename = EnableIfConstView<LazyT> >
+ span(std::initializer_list<value_type> v) noexcept
+ : span(v.begin(), v.size()) {}
+
+ constexpr pointer data() const noexcept { return ptr_; }
+
+ constexpr size_type size() const noexcept { return len_; }
+
+ constexpr size_type length() const noexcept { return size(); }
+
+ constexpr bool empty() const noexcept { return size() == 0; }
+
+ constexpr reference operator[](size_type i) const noexcept {
+ // MSVC 2015 accepts this as constexpr, but not ptr_[i]
+ assert(i < size());
+ return *(data() + i);
+ }
+
+ constexpr reference at(size_type i) const {
+ return i < size() ? *(data() + i) : (std::abort(), *(data() + i));
+ }
+
+ constexpr reference front() const noexcept {
+ assert(size() > 0);
+ return *data();
+ }
+ constexpr reference back() const noexcept {
+ assert(size() > 0);
+ return *(data() + size() - 1);
+ }
+
+ constexpr iterator begin() const noexcept { return data(); }
+ constexpr iterator end() const noexcept { return data() + size(); }
+
+ constexpr reverse_iterator rbegin() const noexcept {
+ return reverse_iterator(end());
+ }
+ constexpr reverse_iterator rend() const noexcept {
+ return reverse_iterator(begin());
+ }
+
+ constexpr span subspan(size_type pos = 0,
+ size_type len = iree::dynamic_extent) const {
+ return (pos <= size()) ? span(data() + pos, std::min(size() - pos, len))
+ : (std::abort(), span());
+ }
+
+ constexpr span first(size_type len) const {
+ return (len <= size()) ? span(data(), len) : (std::abort(), span());
+ }
+
+ constexpr span last(size_type len) const {
+ return (len <= size()) ? span(size() - len + data(), len)
+ : (std::abort(), span());
+ }
+
+ private:
+ pointer ptr_;
+ size_type len_;
+};
+
+#endif // IREE_HAVE_STD_SPAN
+
+} // namespace iree
+
+#endif // __cplusplus
+#endif // IREE_BASE_INTERNAL_SPAN_H_
diff --git a/runtime/src/iree/base/internal/synchronization.c b/runtime/src/iree/base/internal/synchronization.c
new file mode 100644
index 0000000..936238c
--- /dev/null
+++ b/runtime/src/iree/base/internal/synchronization.c
@@ -0,0 +1,778 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/synchronization.h"
+
+#include <assert.h>
+#include <string.h>
+
+#if IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+
+// Disabled.
+
+#elif defined(IREE_PLATFORM_EMSCRIPTEN)
+
+#include <emscripten/threading.h>
+#include <errno.h>
+
+#elif defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_LINUX)
+
+#include <errno.h>
+#include <linux/futex.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+// Oh Android...
+#ifndef SYS_futex
+#define SYS_futex __NR_futex
+#endif // !SYS_futex
+#ifndef FUTEX_PRIVATE_FLAG
+#define FUTEX_PRIVATE_FLAG 128
+#endif // !FUTEX_PRIVATE_FLAG
+
+#endif // IREE_PLATFORM_*
+
+#if defined(NDEBUG)
+#define SYNC_ASSERT(x) (void)(x)
+#else
+#define SYNC_ASSERT(x) assert(x)
+#endif // NDEBUG
+
+// Tag functions in .c files with this to indicate that thread safety analysis
+// warnings should not show. This is useful on our implementation functions as
+// clang cannot reason about lock-free magic.
+#define IREE_DISABLE_THREAD_SAFETY_ANALYSIS \
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis)
+
+//==============================================================================
+// Cross-platform futex mappings (where supported)
+//==============================================================================
+
+#if defined(IREE_PLATFORM_HAS_FUTEX)
+
+// Waits in the OS for the value at the specified |address| to change.
+// If the contents of |address| do not match |expected_value| the wait will
+// fail and return IREE_STATUS_UNAVAILABLE and should be retried.
+//
+// |timeout_ms| can be either IREE_INFINITE_TIMEOUT_MS to wait forever or a
+// relative number of milliseconds to wait prior to returning early with
+// IREE_STATUS_DEADLINE_EXCEEDED.
+static inline iree_status_code_t iree_futex_wait(void* address,
+ uint32_t expected_value,
+ uint32_t timeout_ms);
+
+// Wakes at most |count| threads waiting for the |address| to change.
+// Use IREE_ALL_WAITERS to wake all waiters. Which waiters are woken is
+// undefined and it is not guaranteed that higher priority waiters will be woken
+// over lower priority waiters.
+static inline void iree_futex_wake(void* address, int32_t count);
+
+#if defined(IREE_PLATFORM_EMSCRIPTEN)
+
+static inline iree_status_code_t iree_futex_wait(void* address,
+ uint32_t expected_value,
+ uint32_t timeout_ms) {
+ int rc = emscripten_futex_wait(address, expected_value, (double)timeout_ms);
+ switch (rc) {
+ default:
+ return IREE_STATUS_OK;
+ case -ETIMEDOUT:
+ return IREE_STATUS_DEADLINE_EXCEEDED;
+ case -EWOULDBLOCK:
+ return IREE_STATUS_UNAVAILABLE;
+ }
+}
+
+static inline void iree_futex_wake(void* address, int32_t count) {
+ emscripten_futex_wake(address, count);
+}
+
+#elif defined(IREE_PLATFORM_WINDOWS)
+
+#pragma comment(lib, "Synchronization.lib")
+
+static inline iree_status_code_t iree_futex_wait(void* address,
+ uint32_t expected_value,
+ uint32_t timeout_ms) {
+ if (IREE_LIKELY(WaitOnAddress(address, &expected_value,
+ sizeof(expected_value), timeout_ms) == TRUE)) {
+ return IREE_STATUS_OK;
+ }
+ if (GetLastError() == ERROR_TIMEOUT) {
+ return IREE_STATUS_DEADLINE_EXCEEDED;
+ }
+ return IREE_STATUS_UNAVAILABLE;
+}
+
+static inline void iree_futex_wake(void* address, int32_t count) {
+ if (count == INT32_MAX) {
+ WakeByAddressAll(address);
+ return;
+ }
+ for (; count > 0; --count) {
+ WakeByAddressSingle(address);
+ }
+}
+
+#elif defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_LINUX)
+
+static inline iree_status_code_t iree_futex_wait(void* address,
+ uint32_t expected_value,
+ uint32_t timeout_ms) {
+ struct timespec timeout = {
+ .tv_sec = timeout_ms / 1000,
+ .tv_nsec = (timeout_ms % 1000) * 1000000,
+ };
+ int rc = syscall(
+ SYS_futex, address, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, expected_value,
+ timeout_ms == IREE_INFINITE_TIMEOUT_MS ? NULL : &timeout, NULL, 0);
+ if (IREE_LIKELY(rc == 0) || errno == EAGAIN) {
+ return IREE_STATUS_OK;
+ } else if (errno == ETIMEDOUT) {
+ return IREE_STATUS_DEADLINE_EXCEEDED;
+ }
+ return IREE_STATUS_UNAVAILABLE;
+}
+
+static inline void iree_futex_wake(void* address, int32_t count) {
+ syscall(SYS_futex, address, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, count, NULL,
+ NULL, 0);
+}
+
+#endif // IREE_PLATFORM_*
+
+#endif // IREE_PLATFORM_HAS_FUTEX
+
+//==============================================================================
+// iree_mutex_t
+//==============================================================================
+
+#if IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+
+#define iree_mutex_impl_initialize(mutex)
+#define iree_mutex_impl_deinitialize(mutex)
+#define iree_mutex_impl_lock(mutex)
+#define iree_mutex_impl_try_lock(mutex) true
+#define iree_mutex_impl_unlock(mutex)
+
+#elif defined(IREE_PLATFORM_WINDOWS) && defined(IREE_MUTEX_USE_WIN32_SRW)
+
+// Win32 Slim Reader/Writer (SRW) Lock (same as std::mutex)
+#define iree_mutex_impl_initialize(mutex) InitializeSRWLock(&(mutex)->value)
+#define iree_mutex_impl_deinitialize(mutex)
+#define iree_mutex_impl_lock(mutex) AcquireSRWLockExclusive(&(mutex)->value)
+#define iree_mutex_impl_try_lock(mutex) \
+ (TryAcquireSRWLockExclusive(&(mutex)->value) == TRUE)
+#define iree_mutex_impl_unlock(mutex) ReleaseSRWLockExclusive(&(mutex)->value)
+
+#elif defined(IREE_PLATFORM_WINDOWS)
+
+// Win32 CRITICAL_SECTION
+#define IREE_WIN32_CRITICAL_SECTION_FLAG_DYNAMIC_SPIN 0x02000000
+#define iree_mutex_impl_initialize(mutex) \
+ InitializeCriticalSectionEx(&(mutex)->value, 4000, \
+ IREE_WIN32_CRITICAL_SECTION_FLAG_DYNAMIC_SPIN)
+#define iree_mutex_impl_deinitialize(mutex) \
+ DeleteCriticalSection(&(mutex)->value)
+#define iree_mutex_impl_lock(mutex) EnterCriticalSection(&(mutex)->value)
+#define iree_mutex_impl_try_lock(mutex) \
+ (TryEnterCriticalSection(&(mutex)->value) == TRUE)
+#define iree_mutex_impl_unlock(mutex) LeaveCriticalSection(&(mutex)->value)
+
+#else
+
+// pthreads pthread_mutex_t
+#define iree_mutex_impl_initialize(mutex) \
+ pthread_mutex_init(&(mutex)->value, NULL)
+#define iree_mutex_impl_deinitialize(mutex) \
+ pthread_mutex_destroy(&(mutex)->value)
+#define iree_mutex_impl_lock(mutex) pthread_mutex_lock(&(mutex)->value)
+#define iree_mutex_impl_try_lock(mutex) \
+ (pthread_mutex_trylock(&(mutex)->value) == 0)
+#define iree_mutex_impl_unlock(mutex) pthread_mutex_unlock(&(mutex)->value)
+
+#endif // IREE_PLATFORM_*
+
+#if (IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_SLOW_LOCKS)
+
+// NOTE: the tracy mutex tracing code takes locks itself (which makes it slower
+// and may cause deadlocks).
+
+void iree_mutex_initialize_impl(const iree_tracing_location_t* src_loc,
+ iree_mutex_t* out_mutex) {
+ memset(out_mutex, 0, sizeof(*out_mutex));
+ iree_tracing_mutex_announce(src_loc, &out_mutex->lock_id);
+ iree_mutex_impl_initialize(out_mutex);
+}
+
+void iree_mutex_deinitialize(iree_mutex_t* mutex) {
+ iree_mutex_impl_deinitialize(mutex);
+ iree_tracing_mutex_terminate(mutex->lock_id);
+ memset(mutex, 0, sizeof(*mutex));
+}
+
+void iree_mutex_lock(iree_mutex_t* mutex) IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ iree_tracing_mutex_before_lock(mutex->lock_id);
+ iree_mutex_impl_lock(mutex);
+ iree_tracing_mutex_after_lock(mutex->lock_id);
+}
+
+bool iree_mutex_try_lock(iree_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ bool was_acquired = iree_mutex_impl_try_lock(mutex);
+ iree_tracing_mutex_after_try_lock(mutex->lock_id, was_acquired);
+ return was_acquired;
+}
+
+void iree_mutex_unlock(iree_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ iree_mutex_impl_unlock(mutex);
+ iree_tracing_mutex_after_unlock(mutex->lock_id);
+}
+
+#else
+
+void iree_mutex_initialize(iree_mutex_t* out_mutex) {
+ memset(out_mutex, 0, sizeof(*out_mutex));
+ iree_mutex_impl_initialize(out_mutex);
+}
+
+void iree_mutex_deinitialize(iree_mutex_t* mutex) {
+ iree_mutex_impl_deinitialize(mutex);
+ memset(mutex, 0, sizeof(*mutex));
+}
+
+void iree_mutex_lock(iree_mutex_t* mutex) IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ iree_mutex_impl_lock(mutex);
+}
+
+bool iree_mutex_try_lock(iree_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ return iree_mutex_impl_try_lock(mutex);
+}
+
+void iree_mutex_unlock(iree_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ iree_mutex_impl_unlock(mutex);
+}
+
+#endif // IREE_TRACING_FEATURE_SLOW_LOCKS
+
+//==============================================================================
+// iree_slim_mutex_t
+//==============================================================================
+
+#if (IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_FAST_LOCKS)
+
+// Turn fast locks into slow locks.
+// This lets us just reuse that code at the cost of obscuring our lock
+// performance; but at the time you are recording 2+ tracy messages per lock use
+// there's not much interesting to gain from that level of granularity anyway.
+// If these start showing up in traces it means that the higher-level algorithm
+// is taking too many locks and not that this taking time is the core issue.
+
+void iree_slim_mutex_initialize_impl(const iree_tracing_location_t* src_loc,
+ iree_slim_mutex_t* out_mutex) {
+ iree_mutex_initialize_impl(src_loc, &out_mutex->impl);
+}
+
+void iree_slim_mutex_deinitialize(iree_slim_mutex_t* mutex) {
+ iree_mutex_deinitialize(&mutex->impl);
+}
+
+void iree_slim_mutex_lock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ iree_mutex_lock(&mutex->impl);
+}
+
+bool iree_slim_mutex_try_lock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ return iree_mutex_try_lock(&mutex->impl);
+}
+
+void iree_slim_mutex_unlock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ iree_mutex_unlock(&mutex->impl);
+}
+
+#else
+
+#if IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+
+void iree_slim_mutex_initialize(iree_slim_mutex_t* out_mutex) {}
+
+void iree_slim_mutex_deinitialize(iree_slim_mutex_t* mutex) {}
+
+void iree_slim_mutex_lock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {}
+
+bool iree_slim_mutex_try_lock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ return iree_mutex_try_lock((iree_mutex_t*)&mutex->reserved);
+}
+
+void iree_slim_mutex_unlock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {}
+
+#elif defined(IREE_PLATFORM_APPLE)
+
+void iree_slim_mutex_initialize(iree_slim_mutex_t* out_mutex) {
+ out_mutex->value = OS_UNFAIR_LOCK_INIT;
+}
+
+void iree_slim_mutex_deinitialize(iree_slim_mutex_t* mutex) {
+ os_unfair_lock_assert_not_owner(&mutex->value);
+}
+
+void iree_slim_mutex_lock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ os_unfair_lock_lock(&mutex->value);
+}
+
+bool iree_slim_mutex_try_lock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ return os_unfair_lock_trylock(&mutex->value);
+}
+
+void iree_slim_mutex_unlock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ os_unfair_lock_unlock(&mutex->value);
+}
+
+#elif defined(IREE_PLATFORM_WINDOWS) && defined(IREE_MUTEX_USE_WIN32_SRW)
+
+// The SRW on Windows is pointer-sized and slightly better than what we emulate
+// with the futex so let's just use that.
+
+void iree_slim_mutex_initialize(iree_slim_mutex_t* out_mutex) {
+ iree_mutex_impl_initialize(out_mutex);
+}
+
+void iree_slim_mutex_deinitialize(iree_slim_mutex_t* mutex) {
+ iree_mutex_impl_deinitialize(mutex);
+}
+
+void iree_slim_mutex_lock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ iree_mutex_impl_lock(mutex);
+}
+
+bool iree_slim_mutex_try_lock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ return iree_mutex_impl_try_lock(mutex);
+}
+
+void iree_slim_mutex_unlock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ iree_mutex_impl_unlock(mutex);
+}
+
+#elif defined(IREE_PLATFORM_HAS_FUTEX)
+
+// This implementation is a combo of several sources:
+//
+// Basics of Futexes by Eli Bendersky:
+// https://eli.thegreenplace.net/2018/basics-of-futexes/
+//
+// Futex based locks for C11’s generic atomics by Jens Gustedt:
+// https://hal.inria.fr/hal-01236734/document
+//
+// Mutexes and Condition Variables using Futexes:
+// http://locklessinc.com/articles/mutex_cv_futex/
+//
+// The high bit of the atomic value indicates whether the lock is held; each
+// thread tries to transition the bit from 0->1 to acquire the lock and 1->0 to
+// release it. The lower bits of the value are whether there are any interested
+// waiters. We track these waiters so that we know when we can avoid performing
+// the futex wake syscall.
+
+#define iree_slim_mutex_value(value) (0x80000000u | (value))
+#define iree_slim_mutex_is_locked(value) (0x80000000u & (value))
+
+void iree_slim_mutex_initialize(iree_slim_mutex_t* out_mutex) {
+ memset(out_mutex, 0, sizeof(*out_mutex));
+}
+
+void iree_slim_mutex_deinitialize(iree_slim_mutex_t* mutex) {
+ // Assert unlocked (callers must ensure the mutex is no longer in use).
+ SYNC_ASSERT(
+ iree_atomic_load_int32(&mutex->value, iree_memory_order_seq_cst) == 0);
+}
+
+void iree_slim_mutex_lock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ // Try first to acquire the lock from an unlocked state.
+ // Note that the weak form can fail spuriously. That's fine, as the perf
+ // benefit in the uncontended cases is worth the additional loop below that
+ // will correctly handle any such failures in contended cases.
+ int32_t value = 0;
+ if (iree_atomic_compare_exchange_weak_int32(
+ &mutex->value, &value, iree_slim_mutex_value(1),
+ iree_memory_order_acquire, iree_memory_order_relaxed)) {
+ // Successfully took the lock and there were no other waiters.
+ return;
+ }
+
+ // Increment the count bits to indicate that we want the lock and are willing
+ // to wait for it to be available. Note that between the CAS above and this
+ // the lock could have been made available and we want to ensure we don't
+ // change the lock bit.
+ value =
+ iree_atomic_fetch_add_int32(&mutex->value, 1, iree_memory_order_relaxed) +
+ 1;
+
+ while (true) {
+ // While the lock is available: try to acquire it for this thread.
+ while (!iree_slim_mutex_is_locked(value)) {
+ if (iree_atomic_compare_exchange_weak_int32(
+ &mutex->value, &value, iree_slim_mutex_value(value),
+ iree_memory_order_acquire, iree_memory_order_relaxed)) {
+ // Successfully took the lock.
+ return;
+ }
+
+ // Spin a small amount to give us a tiny chance of falling through to the
+ // wait. We can tune this value based on likely contention, however 10-60
+ // is the recommended value and we should keep it in that order of
+ // magnitude. A way to think of this is "how many spins would we have to
+ // do to equal one call to iree_futex_wait" - if it's faster just to do
+ // a futex wait then we shouldn't be spinning!
+ // TODO(benvanik): measure on real workload on ARM; maybe remove entirely.
+ int spin_count = 100;
+ for (int i = 0; i < spin_count && iree_slim_mutex_is_locked(value); ++i) {
+ value =
+ iree_atomic_load_int32(&mutex->value, iree_memory_order_relaxed);
+ }
+ }
+
+ // While the lock is unavailable: wait for it to become available.
+ while (iree_slim_mutex_is_locked(value)) {
+ // NOTE: we don't care about wait failure here as we are going to loop
+ // and check again anyway.
+ iree_futex_wait(&mutex->value, value, IREE_INFINITE_TIMEOUT_MS);
+ value = iree_atomic_load_int32(&mutex->value, iree_memory_order_relaxed);
+ }
+ }
+}
+
+bool iree_slim_mutex_try_lock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ // Attempt to acquire the lock from an unlocked state.
+ // We don't care if this fails spuriously as that's the whole point of a try.
+ int32_t value = 0;
+ return iree_atomic_compare_exchange_weak_int32(
+ &mutex->value, &value, iree_slim_mutex_value(1),
+ iree_memory_order_acquire, iree_memory_order_relaxed);
+}
+
+void iree_slim_mutex_unlock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ // Transition 1->0 (unlocking with no waiters) or 2->1 (with waiters).
+ if (iree_atomic_fetch_sub_int32(&mutex->value, iree_slim_mutex_value(1),
+ iree_memory_order_release) !=
+ iree_slim_mutex_value(1)) {
+ // One (or more) waiters; wake a single one to avoid a thundering herd of
+ // multiple threads all waking and trying to grab the lock (as only one will
+ // win).
+ //
+ // Note that futexes (futeces? futices? futii?) are unfair and what thread
+ // gets woken is undefined (not FIFO on waiters).
+ iree_futex_wake(&mutex->value, 1);
+ }
+}
+
+#else
+
+// Pass-through to iree_mutex_t as a fallback for platforms without a futex we
+// can use to implement a slim lock. Note that since we are reusing iree_mutex_t
+// when tracing all slim mutexes will be traced along with the fat mutexes.
+
+void iree_slim_mutex_initialize(iree_slim_mutex_t* out_mutex) {
+ iree_mutex_initialize(&out_mutex->impl);
+}
+
+void iree_slim_mutex_deinitialize(iree_slim_mutex_t* mutex) {
+ iree_mutex_deinitialize(&mutex->impl);
+}
+
+void iree_slim_mutex_lock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ iree_mutex_lock(&mutex->impl);
+}
+
+bool iree_slim_mutex_try_lock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ return iree_mutex_try_lock(&mutex->impl);
+}
+
+void iree_slim_mutex_unlock(iree_slim_mutex_t* mutex)
+ IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
+ iree_mutex_unlock(&mutex->impl);
+}
+
+#endif // IREE_PLATFORM_*
+
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_SLOW_LOCKS
+
+//==============================================================================
+// iree_notification_t
+//==============================================================================
+
+#if IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+
+// No-op implementation that is only used when there is guaranteed to be one
+// thread at a time touching IREE-related code. It is unsafe to use in any
+// situation where either IREE or a user of IREE has multiple threads!
+
+void iree_notification_initialize(iree_notification_t* out_notification) {
+ memset(out_notification, 0, sizeof(*out_notification));
+}
+
+void iree_notification_deinitialize(iree_notification_t* notification) {}
+
+void iree_notification_post(iree_notification_t* notification, int32_t count) {}
+
+iree_wait_token_t iree_notification_prepare_wait(
+ iree_notification_t* notification) {
+ return (iree_wait_token_t)0;
+}
+
+bool iree_notification_commit_wait(iree_notification_t* notification,
+ iree_wait_token_t wait_token,
+ iree_time_t deadline_ns) {
+ return true;
+}
+
+void iree_notification_cancel_wait(iree_notification_t* notification) {}
+
+#elif !defined(IREE_PLATFORM_HAS_FUTEX)
+
+// Emulation of a lock-free futex-backed notification using pthreads.
+// This is a normal cond-var-like usage with support for our prepare/cancel API
+// so that users can still perform their own wait logic.
+
+void iree_notification_initialize(iree_notification_t* out_notification) {
+ memset(out_notification, 0, sizeof(*out_notification));
+ pthread_mutex_init(&out_notification->mutex, NULL);
+ pthread_cond_init(&out_notification->cond, NULL);
+}
+
+void iree_notification_deinitialize(iree_notification_t* notification) {
+ // Assert no more waiters (callers must tear down waiters first).
+ pthread_mutex_lock(¬ification->mutex);
+ SYNC_ASSERT(notification->waiters == 0);
+ pthread_cond_destroy(¬ification->cond);
+ pthread_mutex_unlock(¬ification->mutex);
+ pthread_mutex_destroy(¬ification->mutex);
+}
+
+void iree_notification_post(iree_notification_t* notification, int32_t count) {
+ pthread_mutex_lock(¬ification->mutex);
+ ++notification->epoch;
+ if (notification->waiters > 0) {
+ // NOTE: we only do the signal if we have waiters - this avoids a syscall
+ // in cases where no one is actively listening.
+ if (count == IREE_ALL_WAITERS) {
+ pthread_cond_broadcast(¬ification->cond);
+ } else {
+ for (int32_t i = 0; i < count; ++i) {
+ pthread_cond_signal(¬ification->cond);
+ }
+ }
+ }
+ pthread_mutex_unlock(¬ification->mutex);
+}
+
+iree_wait_token_t iree_notification_prepare_wait(
+ iree_notification_t* notification) {
+ pthread_mutex_lock(¬ification->mutex);
+ iree_wait_token_t epoch = notification->epoch;
+ ++notification->waiters;
+ pthread_mutex_unlock(¬ification->mutex);
+ return epoch;
+}
+
+bool iree_notification_commit_wait(iree_notification_t* notification,
+ iree_wait_token_t wait_token,
+ iree_time_t deadline_ns) {
+ struct timespec abs_ts = {
+ .tv_sec = (time_t)(deadline_ns / 1000000000ull),
+ .tv_nsec = (long)(deadline_ns % 1000000000ull),
+ };
+
+ pthread_mutex_lock(¬ification->mutex);
+
+ // Spin until notified and the epoch increments from what we captured during
+ // iree_notification_prepare_wait.
+ bool result = true;
+ while (notification->epoch == wait_token) {
+ int ret = pthread_cond_timedwait(¬ification->cond, ¬ification->mutex,
+ &abs_ts);
+ if (ret != 0) {
+ // Wait failed (timeout/etc); cancel the wait.
+ // This may happen in spurious wakes but that's fine - the caller is
+ // designed to handle looping again and may want the chance to do some
+ // bookkeeping while it has the thread.
+ result = false;
+ break;
+ }
+ }
+
+ // Remove us from the waiter list - the caller will need to reacquire a wait
+ // token if it wants to wait again.
+ SYNC_ASSERT(notification->waiters > 0);
+ --notification->waiters;
+
+ pthread_mutex_unlock(¬ification->mutex);
+
+ return result;
+}
+
+void iree_notification_cancel_wait(iree_notification_t* notification) {
+ pthread_mutex_lock(¬ification->mutex);
+ SYNC_ASSERT(notification->waiters > 0);
+ --notification->waiters;
+ pthread_mutex_unlock(¬ification->mutex);
+}
+
+#else
+
+// The 64-bit value used to atomically read-modify-write (RMW) the state is
+// split in two and treated as independent 32-bit ints:
+//
+// MSB (63) 32 LSB (0)
+// +-------------------------------------+-------------------------------------+
+// | epoch/notification count | waiter count |
+// +-------------------------------------+-------------------------------------+
+//
+// We use the epoch to wait/wake the futex (which is 32-bits), and as such when
+// we pass the value address to the futex APIs we need to ensure we are only
+// passing the most significant 32-bit value regardless of endianness.
+//
+// We use signed addition on the full 64-bit value to increment/decrement the
+// waiter count. This means that an add of -1ll will decrement the waiter count
+// and do nothing to the epoch count.
+#if defined(IREE_ENDIANNESS_LITTLE)
+#define IREE_NOTIFICATION_EPOCH_OFFSET (/*words=*/1)
+#else
+#define IREE_NOTIFICATION_EPOCH_OFFSET (/*words=*/0)
+#endif // IREE_ENDIANNESS_*
+#define iree_notification_epoch_address(notification) \
+ ((iree_atomic_int32_t*)(&(notification)->value) + \
+ IREE_NOTIFICATION_EPOCH_OFFSET)
+#define IREE_NOTIFICATION_WAITER_INC 0x0000000000000001ull
+#define IREE_NOTIFICATION_WAITER_DEC 0xFFFFFFFFFFFFFFFFull
+#define IREE_NOTIFICATION_WAITER_MASK 0x00000000FFFFFFFFull
+#define IREE_NOTIFICATION_EPOCH_SHIFT 32
+#define IREE_NOTIFICATION_EPOCH_INC \
+ (0x00000001ull << IREE_NOTIFICATION_EPOCH_SHIFT)
+
+void iree_notification_initialize(iree_notification_t* out_notification) {
+ memset(out_notification, 0, sizeof(*out_notification));
+}
+
+void iree_notification_deinitialize(iree_notification_t* notification) {
+ // Assert no more waiters (callers must tear down waiters first).
+ SYNC_ASSERT(
+ (iree_atomic_load_int64(¬ification->value, iree_memory_order_seq_cst) &
+ IREE_NOTIFICATION_WAITER_MASK) == 0);
+}
+
+void iree_notification_post(iree_notification_t* notification, int32_t count) {
+ uint64_t previous_value = iree_atomic_fetch_add_int64(
+ ¬ification->value, IREE_NOTIFICATION_EPOCH_INC,
+ iree_memory_order_acq_rel);
+ // Ensure we have at least one waiter; wake up to |count| of them.
+ if (IREE_UNLIKELY(previous_value & IREE_NOTIFICATION_WAITER_MASK)) {
+ iree_futex_wake(iree_notification_epoch_address(notification), count);
+ }
+}
+
+iree_wait_token_t iree_notification_prepare_wait(
+ iree_notification_t* notification) {
+ uint64_t previous_value = iree_atomic_fetch_add_int64(
+ ¬ification->value, IREE_NOTIFICATION_WAITER_INC,
+ iree_memory_order_acq_rel);
+ return (iree_wait_token_t)(previous_value >> IREE_NOTIFICATION_EPOCH_SHIFT);
+}
+
+bool iree_notification_commit_wait(iree_notification_t* notification,
+ iree_wait_token_t wait_token,
+ iree_time_t deadline_ns) {
+ bool result = true;
+
+ // Spin until notified and the epoch increments from what we captured during
+ // iree_notification_prepare_wait.
+ while ((iree_atomic_load_int64(¬ification->value,
+ iree_memory_order_acquire) >>
+ IREE_NOTIFICATION_EPOCH_SHIFT) == wait_token) {
+ // NOTE: we do an abs->rel conversion within the loop so that we can account
+ // for spurious wakes that may cause us to loop several times with waits of
+ // various time inbetween.
+ uint32_t timeout_ms = iree_absolute_deadline_to_timeout_ms(deadline_ns);
+ iree_status_code_t status_code = iree_futex_wait(
+ iree_notification_epoch_address(notification), wait_token, timeout_ms);
+ if (status_code != IREE_STATUS_OK) {
+ result = false;
+ break;
+ }
+ }
+
+ // TODO(benvanik): benchmark under real workloads.
+ // iree_memory_order_relaxed would suffice for correctness but the faster
+ // the waiter count gets to 0 the less likely we'll wake on the futex.
+ uint64_t previous_value = iree_atomic_fetch_add_int64(
+ ¬ification->value, IREE_NOTIFICATION_WAITER_DEC,
+ iree_memory_order_seq_cst);
+ SYNC_ASSERT((previous_value & IREE_NOTIFICATION_WAITER_MASK) != 0);
+
+ return result;
+}
+
+void iree_notification_cancel_wait(iree_notification_t* notification) {
+ // TODO(benvanik): benchmark under real workloads.
+ // iree_memory_order_relaxed would suffice for correctness but the faster
+ // the waiter count gets to 0 the less likely we'll wake on the futex.
+ uint64_t previous_value = iree_atomic_fetch_add_int64(
+ ¬ification->value, IREE_NOTIFICATION_WAITER_DEC,
+ iree_memory_order_seq_cst);
+ SYNC_ASSERT((previous_value & IREE_NOTIFICATION_WAITER_MASK) != 0);
+}
+
+#endif // DISABLED / HAS_FUTEX
+
+bool iree_notification_await(iree_notification_t* notification,
+ iree_condition_fn_t condition_fn,
+ void* condition_arg, iree_timeout_t timeout) {
+ if (IREE_LIKELY(condition_fn(condition_arg))) {
+ // Fast-path with condition already met.
+ return true;
+ }
+
+ // If a (silly) query then bail immediately after our first condition check.
+ // Otherwise we may have a real deadline and want it in absolute form so that
+ // we can easily handle spurious wakes.
+ if (iree_timeout_is_immediate(timeout)) return false;
+ const iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
+
+ // Slow-path: try-wait until the condition is met.
+ while (true) {
+ iree_wait_token_t wait_token = iree_notification_prepare_wait(notification);
+ if (condition_fn(condition_arg)) {
+ // Condition is now met; no need to wait on the futex.
+ iree_notification_cancel_wait(notification);
+ return true;
+ } else {
+ if (!iree_notification_commit_wait(notification, wait_token,
+ deadline_ns)) {
+ // Wait hit the deadline before we hit the condition.
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
diff --git a/runtime/src/iree/base/internal/synchronization.h b/runtime/src/iree/base/internal/synchronization.h
new file mode 100644
index 0000000..45f3f59
--- /dev/null
+++ b/runtime/src/iree/base/internal/synchronization.h
@@ -0,0 +1,398 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// NOTE: the best kind of synchronization is no synchronization; always try to
+// design your algorithm so that you don't need anything from this file :)
+// See https://travisdowns.github.io/blog/2020/07/06/concurrency-costs.html
+
+#ifndef IREE_BASE_INTERNAL_SYNCHRONIZATION_H_
+#define IREE_BASE_INTERNAL_SYNCHRONIZATION_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/atomics.h"
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+
+// NOTE: clang cannot support thread annotations in C code due to some
+// representational bugs... which means that we can't use it here. Boo.
+// There's some workarounds I've seen but getting TSAN working would be much
+// easier as a starting point.
+#if 0 // defined(IREE_COMPILER_CLANG)
+#define IREE_THREAD_ANNOTATION_ATTRIBUTE(x) __attribute__((x))
+#else
+#define IREE_THREAD_ANNOTATION_ATTRIBUTE(x)
+#endif // IREE_COMPILER_CLANG
+
+#ifdef __cplusplus
+// Documents if a shared field or global variable needs to be protected by a
+// mutex. IREE_GUARDED_BY() allows the user to specify a particular mutex that
+// should be held when accessing the annotated variable.
+#define IREE_GUARDED_BY(x) IREE_THREAD_ANNOTATION_ATTRIBUTE(guarded_by(x))
+#else
+#define IREE_GUARDED_BY(x)
+#endif // __cplusplus
+
+#ifdef __cplusplus
+// Like IREE_GUARDED_BY but specifies that the contents of a pointer are guarded
+// by a mutex instead of the pointer itself.
+#define IREE_PTR_GUARDED_BY(x) \
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(pt_guarded_by(x))
+#else
+#define IREE_PTR_GUARDED_BY(x)
+#endif // __cplusplus
+
+// Allow users to fully disable all synchronization for systems that are known
+// to never need it. This removes our dependency on pthreads.
+#if !IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+
+// NOTE: we only support futex when not using tsan as we need to add annotations
+// for tsan to understand what we are doing.
+// https://github.com/llvm-mirror/compiler-rt/blob/master/include/sanitizer/tsan_interface.h
+#if defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_EMSCRIPTEN) || \
+ defined(IREE_PLATFORM_LINUX) || defined(IREE_PLATFORM_WINDOWS)
+#if !defined(IREE_SANITIZER_THREAD)
+#define IREE_PLATFORM_HAS_FUTEX 1
+#endif // !IREE_SANITIZER_THREAD
+#endif // IREE_PLATFORM_*
+
+#if defined(IREE_PLATFORM_APPLE)
+#include <os/lock.h>
+#endif // IREE_PLATFORM_APPLE
+
+#if !defined(IREE_PLATFORM_WINDOWS)
+#include <pthread.h>
+#endif // !IREE_PLATFORM_WINDOWS
+
+// We have the CRITICAL_SECTION path for now but Slim Reader/Writer lock (SRW)
+// is much better (and what std::mutex uses). SRW doesn't spin, though, and has
+// some other implications that don't quite line up with pthread_mutex_t on most
+// platforms. Once we have larger end-to-end benchmarks we should choose based
+// on workloads.
+#define IREE_MUTEX_USE_WIN32_SRW 1
+
+#endif // !IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define IREE_ALL_WAITERS INT32_MAX
+#define IREE_INFINITE_TIMEOUT_MS UINT32_MAX
+
+//==============================================================================
+// iree_mutex_t
+//==============================================================================
+
+// A normal fat mutex (ala std::mutex).
+// This may be implemented as a slim mutex on certain platforms but in the worst
+// case will be the native platform primitive (like pthread_mutex_t) and as such
+// should not be embedded in structures meant to be kept small.
+//
+// Windows: Slim Reader/Writer (SRW) Locks
+// All others: pthread_mutex_t
+typedef struct iree_mutex_t IREE_THREAD_ANNOTATION_ATTRIBUTE(
+ capability("mutex")) {
+#if IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+ int reserved;
+#elif defined(IREE_PLATFORM_WINDOWS) && defined(IREE_MUTEX_USE_WIN32_SRW)
+ SRWLOCK value;
+#elif defined(IREE_PLATFORM_WINDOWS)
+ CRITICAL_SECTION value;
+#else
+ pthread_mutex_t value;
+#endif // IREE_PLATFORM_*
+#if (IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_SLOW_LOCKS)
+ uint32_t lock_id;
+#endif // IREE_TRACING_FEATURE_SLOW_LOCKS
+} iree_mutex_t;
+
+#if (IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_SLOW_LOCKS)
+// Initializes |out_mutex| to the well-defined unlocked contents.
+// Must be called prior to using any other iree_mutex_* method.
+#define iree_mutex_initialize(out_mutex) \
+ static const iree_tracing_location_t TracyConcat( \
+ __tracy_source_location, __LINE__) = {NULL, __FUNCTION__, __FILE__, \
+ (uint32_t)__LINE__, 0}; \
+ iree_mutex_initialize_impl(&TracyConcat(__tracy_source_location, __LINE__), \
+ out_mutex);
+void iree_mutex_initialize_impl(const iree_tracing_location_t* src_loc,
+ iree_mutex_t* out_mutex);
+#else
+// Initializes |out_mutex| to the well-defined unlocked contents.
+// Must be called prior to using any other iree_mutex_* method.
+void iree_mutex_initialize(iree_mutex_t* out_mutex);
+#endif // IREE_TRACING_FEATURE_SLOW_LOCKS
+
+// Deinitializes |mutex| (after a prior call to iree_mutex_initialize).
+// The mutex must not be held by any thread.
+void iree_mutex_deinitialize(iree_mutex_t* mutex)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(locks_excluded(mutex));
+
+// Locks the |mutex| and returns when held by the caller.
+void iree_mutex_lock(iree_mutex_t* mutex)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(acquire_capability(mutex));
+
+// Tries to lock the |mutex| and returns true if the caller holds the lock.
+bool iree_mutex_try_lock(iree_mutex_t* mutex)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(try_acquire_capability(true, mutex));
+
+// Unlocks the |mutex|, which must be held by the caller.
+void iree_mutex_unlock(iree_mutex_t* mutex)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(release_capability(mutex));
+
+//==============================================================================
+// iree_slim_mutex_t
+//==============================================================================
+
+// TODO(benvanik): instrument with tracy; need to capture source location on
+// init and add storage for ID.
+
+// A lightweight unfair lock.
+// Depending on platform this is significantly smaller than a mutex (4-8 bytes
+// vs 64+ bytes), can always be statically initialized/requires no allocations,
+// and performs the minimal amount of work possible while still playing nicely
+// with the OS thread scheduler.
+//
+// Unlike a full mutex these don't have the ability to be shared across
+// processes (not something we care about), don't have a way to define timeouts,
+// and have only a binary held/unheld state. They are often an order of
+// magnitude faster in uncontended/lightly-contended code and the same
+// performance in highly-contended code, though, so it's worth it for locks that
+// be guarding small data structures (queue pointers, etc) and touched from many
+// threads. Since they are so lightweight it's possible to embed them per-object
+// instead of per-manager and change from a single highly-contended lock to
+// thousands of almost completely uncontended slim locks.
+//
+// Though these locks support spinning they always have a fallback path that
+// ends up calling into the kernel to properly wait the thread. This is critical
+// to avoid pathological cases under contention and allowing for thread priority
+// inheritance when there are multiple threads competing that may otherwise be
+// scheduled in a potentially livelocking order.
+//
+// The "unfair" here comes from the fact that it's possible on certain platforms
+// for certain threads to never be able to acquire the lock in cases of
+// extremely high contention or widely disparate thread priority levels. This is
+// mitigated by ensuring only very small regions of code are guarded and that
+// there's enough work happening outside of the lock on any particular thread to
+// ensure that there's some chance of other threads being able to acquire it.
+//
+// MacOS/iOS: os_unfair_lock
+// Spins and after a short backoff drops to a futex-like behavior of waiting
+// in the kernel. Unfortunately real futexes aren't supported.
+// See:
+// https://developer.apple.com/documentation/os/synchronization
+// https://opensource.apple.com/source/libplatform/libplatform-125/src/os/lock.c.auto.html
+//
+// Emscripten: emscripten_futex_wait/emscripten_futex_wake
+// Spins and after a short backoff drops to a futex-like behavior of waiting
+// in the kernel.
+// See:
+// https://github.com/emscripten-core/emscripten/blob/b43474f55aeb49083b9df74fdd0e52ec8decf788/system/include/emscripten/threading.h#L114-L120
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Atomics/wait
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Atomics/notify
+//
+// Windows: WaitOnAddress/WakeByAddress*
+// Spins and after a short backoff drops to a futex and waits in the kernel.
+// See:
+// https://docs.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-waitonaddress
+// https://devblogs.microsoft.com/oldnewthing/20170601-00/?p=96265
+//
+// Linux/Android/others: futex
+// Spins and after a short backoff drops to a futex and waits in the kernel.
+// See:
+// http://locklessinc.com/articles/futex_cheat_sheet/
+// https://man7.org/linux/man-pages/man2/futex.2.html
+// https://eli.thegreenplace.net/2018/basics-of-futexes/
+// https://bartoszmilewski.com/2008/09/01/thin-lock-vs-futex/
+typedef struct iree_slim_mutex_t IREE_THREAD_ANNOTATION_ATTRIBUTE(
+ capability("mutex")) {
+#if IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+ int reserved;
+#elif (IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_FAST_LOCKS)
+ iree_mutex_t impl; // re-route to slow mutex
+#elif defined(IREE_PLATFORM_APPLE)
+ os_unfair_lock value;
+#elif defined(IREE_PLATFORM_WINDOWS) && defined(IREE_MUTEX_USE_WIN32_SRW)
+ SRWLOCK value;
+#elif defined(IREE_PLATFORM_HAS_FUTEX)
+ iree_atomic_int32_t value;
+#else
+ iree_mutex_t impl; // fallback
+#endif // IREE_PLATFORM_*
+} iree_slim_mutex_t;
+
+#if (IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_FAST_LOCKS)
+// Initializes |out_mutex| to the well-defined unlocked contents.
+// Must be called prior to using any other iree_slim_mutex_* method.
+#define iree_slim_mutex_initialize(out_mutex) \
+ static const iree_tracing_location_t TracyConcat( \
+ __tracy_source_location, __LINE__) = {NULL, __FUNCTION__, __FILE__, \
+ (uint32_t)__LINE__, 0}; \
+ iree_slim_mutex_initialize_impl( \
+ &TracyConcat(__tracy_source_location, __LINE__), out_mutex);
+void iree_slim_mutex_initialize_impl(const iree_tracing_location_t* src_loc,
+ iree_slim_mutex_t* out_mutex);
+#else
+// Initializes |out_mutex| to the well-defined unlocked contents.
+// Must be called prior to using any other iree_slim_mutex_* method.
+//
+// Though optional (static initialization is fine) this is required to support
+// lock tracing. Assume it's (mostly) free and always call it if possible. This
+// also allows us to swap in a non-slim lock for enhanced debugging if we run
+// into threading issues.
+void iree_slim_mutex_initialize(iree_slim_mutex_t* out_mutex);
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_FAST_LOCKS
+
+// Deinitializes |mutex| (after a prior call to iree_slim_mutex_initialize).
+// The mutex must not be held by any thread.
+void iree_slim_mutex_deinitialize(iree_slim_mutex_t* mutex)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(locks_excluded(mutex));
+
+// Locks the |mutex| and returns when held by the caller.
+void iree_slim_mutex_lock(iree_slim_mutex_t* mutex)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(acquire_capability(mutex));
+
+// Tries to lock the |mutex| and returns true if the caller holds the lock.
+bool iree_slim_mutex_try_lock(iree_slim_mutex_t* mutex)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(try_acquire_capability(true, mutex));
+
+// Unlocks the |mutex|, which must be held by the caller.
+void iree_slim_mutex_unlock(iree_slim_mutex_t* mutex)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(release_capability(mutex));
+
+//==============================================================================
+// iree_notification_t
+//==============================================================================
+
+// TODO(benvanik): add tracy support for watching the waits.
+
+// A lightweight wait-free cross-thread notification mechanism.
+// Classically called an 'event counter', these replace the use of condvars in
+// lock-free code where you wouldn't want to guard a lock-free data structure
+// with a lock.
+//
+// See:
+// http://www.1024cores.net/home/lock-free-algorithms/eventcounts
+// https://software.intel.com/en-us/forums/intel-threading-building-blocks/topic/299245
+// https://github.com/r10a/Event-Counts
+// https://github.com/facebook/folly/blob/master/folly/experimental/EventCount.h
+// https://github.com/concurrencykit/ck/blob/master/include/ck_ec.h
+typedef struct iree_notification_t {
+#if IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+ // Nothing required. Unused field to make compilers happy.
+ int reserved;
+#elif !defined(IREE_PLATFORM_HAS_FUTEX)
+ // No futex on darwin/when using TSAN, so use mutex/condvar instead.
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ uint32_t epoch;
+ uint32_t waiters;
+#else
+ iree_atomic_int64_t value;
+#endif // IREE_PLATFORM_*
+} iree_notification_t;
+
+#if IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+#define IREE_NOTIFICATION_INIT \
+ { IREE_ATOMIC_VAR_INIT(0) }
+#elif !defined(IREE_PLATFORM_HAS_FUTEX)
+#define IREE_NOTIFICATION_INIT \
+ { PTHREAD_MUTEX_INITIALIZER, PTHREAD_COND_INITIALIZER, 0, 0 }
+#else
+#define IREE_NOTIFICATION_INIT \
+ { IREE_ATOMIC_VAR_INIT(0) }
+#endif // notification type
+
+// Initializes a notification to no waiters and an initial epoch of 0.
+void iree_notification_initialize(iree_notification_t* out_notification);
+
+// Deinitializes |notification| (after a prior call to
+// iree_notification_initialize). No threads may be waiting on the notification.
+void iree_notification_deinitialize(iree_notification_t* notification);
+
+// Notifies up to |count| waiters of a change. Each waiter will wake and can
+// check to see if they need to do any additional work.
+// To notify all potential waiters pass IREE_ALL_WAITERS.
+//
+// Acts as (at least) a memory_order_release barrier:
+// A store operation with this memory order performs the release operation: no
+// reads or writes in the current thread can be reordered after this store.
+// All writes in the current thread are visible in other threads that acquire
+// the same atomic variable and writes that carry a dependency into the atomic
+// variable become visible in other threads that consume the same atomic.
+void iree_notification_post(iree_notification_t* notification, int32_t count);
+
+typedef uint32_t iree_wait_token_t; // opaque
+
+// Prepares for a wait operation, returning a token that must be passed to
+// iree_notification_commit_wait to perform the actual wait.
+//
+// Acts as a memory_order_acq_rel barrier:
+// A read-modify-write operation with this memory order is both an acquire
+// operation and a release operation. No memory reads or writes in the current
+// thread can be reordered before or after this store. All writes in other
+// threads that release the same atomic variable are visible before the
+// modification and the modification is visible in other threads that acquire
+// the same atomic variable.
+iree_wait_token_t iree_notification_prepare_wait(
+ iree_notification_t* notification);
+
+// Commits a pending wait operation when the caller has ensured it must wait.
+// Waiting will continue until a notification has been posted or |deadline_ns|
+// is reached. Returns false if the deadline is reached before a notification is
+// posted.
+//
+// Acts as (at least) a memory_order_acquire barrier:
+// A load operation with this memory order performs the acquire operation on
+// the affected memory location: no reads or writes in the current thread can
+// be reordered before this load. All writes in other threads that release the
+// same atomic variable are visible in the current thread.
+bool iree_notification_commit_wait(iree_notification_t* notification,
+ iree_wait_token_t wait_token,
+ iree_time_t deadline_ns);
+
+// Cancels a pending wait operation without blocking.
+//
+// Acts as (at least) a memory_order_relaxed barrier:
+// Relaxed operation: there are no synchronization or ordering constraints
+// imposed on other reads or writes, only this operation's atomicity is
+// guaranteed.
+void iree_notification_cancel_wait(iree_notification_t* notification);
+
+// Returns true if the condition is true.
+// |arg| is the |condition_arg| passed to the await function.
+// Implementations must ensure they are coherent with their state values.
+typedef bool (*iree_condition_fn_t)(void* arg);
+
+// Blocks and waits until |condition_fn| returns true. Other threads must modify
+// state checked by the |condition_fn| and post the notification.
+// Returns true if the condition is true before |timeout| is reached. If the
+// timeout is infinite then the return will always be true.
+//
+// Example:
+// thread 1:
+// bool check_flag_pred(void* arg) {
+// return iree_atomic_int32_load((iree_atomic_int32_t*)arg,
+// iree_memory_order_acquire) == 1;
+// }
+// iree_atomic_int32_t* flag = ...;
+// iree_notification_await(¬ification, check_flag_pred, flag);
+// thread 2:
+// iree_atomic_int32_store(flag, 1, iree_memory_order_release);
+// iree_notification_post(¬ification, IREE_ALL_WAITERS);
+bool iree_notification_await(iree_notification_t* notification,
+ iree_condition_fn_t condition_fn,
+ void* condition_arg, iree_timeout_t timeout);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_BASE_INTERNAL_SYNCHRONIZATION_H_
diff --git a/runtime/src/iree/base/internal/synchronization_benchmark.cc b/runtime/src/iree/base/internal/synchronization_benchmark.cc
new file mode 100644
index 0000000..9bdc13a
--- /dev/null
+++ b/runtime/src/iree/base/internal/synchronization_benchmark.cc
@@ -0,0 +1,256 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cstddef>
+#include <mutex>
+
+#include "benchmark/benchmark.h"
+#include "iree/base/internal/synchronization.h"
+
+namespace {
+
+//==============================================================================
+// Inlined timing utils
+//==============================================================================
+
+void SpinDelay(int count, int* data) {
+ // This emulates work we may be doing while holding the lock (like swapping
+ // around some pointers).
+ for (size_t i = 0; i < count * 10; ++i) {
+ ++(*data);
+ benchmark::DoNotOptimize(*data);
+ }
+}
+
+//==============================================================================
+// iree_mutex_t / iree_slim_mutex_t
+//==============================================================================
+
+void BM_Mutex(benchmark::State& state) {
+ static iree_mutex_t* mu = ([]() -> iree_mutex_t* {
+ auto mutex = new iree_mutex_t();
+ iree_mutex_initialize(mutex);
+ return mutex;
+ })();
+ for (auto _ : state) {
+ iree_mutex_lock(mu);
+ benchmark::DoNotOptimize(*mu);
+ iree_mutex_unlock(mu);
+ }
+}
+BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu();
+
+template <typename MutexType>
+class RaiiLocker;
+
+template <>
+class RaiiLocker<iree_mutex_t> {
+ public:
+ static void Initialize(iree_mutex_t* out_mu) {
+ iree_mutex_initialize(out_mu);
+ }
+ static void Deinitialize(iree_mutex_t* mu) { iree_mutex_deinitialize(mu); }
+ explicit RaiiLocker(iree_mutex_t* mu)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis)
+ : mu_(mu) {
+ iree_mutex_lock(mu_);
+ }
+ ~RaiiLocker() IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis) {
+ iree_mutex_unlock(mu_);
+ }
+
+ private:
+ iree_mutex_t* mu_;
+};
+
+template <>
+class RaiiLocker<iree_slim_mutex_t> {
+ public:
+ static void Initialize(iree_slim_mutex_t* out_mu) {
+ iree_slim_mutex_initialize(out_mu);
+ }
+ static void Deinitialize(iree_slim_mutex_t* mu) {
+ iree_slim_mutex_deinitialize(mu);
+ }
+ explicit RaiiLocker(iree_slim_mutex_t* mu)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis)
+ : mu_(mu) {
+ iree_slim_mutex_lock(mu_);
+ }
+ ~RaiiLocker() IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis) {
+ iree_slim_mutex_unlock(mu_);
+ }
+
+ private:
+ iree_slim_mutex_t* mu_;
+};
+
+template <>
+class RaiiLocker<std::mutex> {
+ public:
+ static void Initialize(std::mutex* out_mu) {}
+ static void Deinitialize(std::mutex* mu) {}
+ explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); }
+ ~RaiiLocker() { mu_->unlock(); }
+
+ private:
+ std::mutex* mu_;
+};
+
+template <typename MutexType>
+void BM_CreateDelete(benchmark::State& state) {
+ for (auto _ : state) {
+ MutexType mu;
+ RaiiLocker<MutexType>::Initialize(&mu);
+ benchmark::DoNotOptimize(mu);
+ RaiiLocker<MutexType>::Deinitialize(&mu);
+ }
+}
+
+BENCHMARK_TEMPLATE(BM_CreateDelete, iree_mutex_t)->UseRealTime()->Threads(1);
+
+BENCHMARK_TEMPLATE(BM_CreateDelete, iree_slim_mutex_t)
+ ->UseRealTime()
+ ->Threads(1);
+
+BENCHMARK_TEMPLATE(BM_CreateDelete, std::mutex)->UseRealTime()->Threads(1);
+
+template <typename MutexType>
+void BM_Uncontended(benchmark::State& state) {
+ MutexType mu;
+ RaiiLocker<MutexType>::Initialize(&mu);
+ int data = 0;
+ int local = 0;
+ for (auto _ : state) {
+ // Here we model both local work outside of the critical section as well as
+ // some work inside of the critical section. The idea is to capture some
+ // more or less realisitic contention levels.
+ // If contention is too low, the benchmark won't measure anything useful.
+ // If contention is unrealistically high, the benchmark will favor
+ // bad mutex implementations that block and otherwise distract threads
+ // from the mutex and shared state for as much as possible.
+ // To achieve this amount of local work is multiplied by number of threads
+ // to keep ratio between local work and critical section approximately
+ // equal regardless of number of threads.
+ SpinDelay(100 * state.threads(), &local);
+ RaiiLocker<MutexType> locker(&mu);
+ SpinDelay(static_cast<int>(state.range(0)), &data);
+ }
+}
+
+BENCHMARK_TEMPLATE(BM_Uncontended, iree_mutex_t)
+ ->UseRealTime()
+ ->Threads(1)
+ ->Arg(50)
+ ->Arg(200);
+
+BENCHMARK_TEMPLATE(BM_Uncontended, iree_slim_mutex_t)
+ ->UseRealTime()
+ ->Threads(1)
+ ->Arg(50)
+ ->Arg(200);
+
+BENCHMARK_TEMPLATE(BM_Uncontended, std::mutex)
+ ->UseRealTime()
+ ->Threads(1)
+ ->Arg(50)
+ ->Arg(200);
+
+template <typename MutexType>
+void BM_Contended(benchmark::State& state) {
+ struct Shared {
+ MutexType mu;
+ int data = 0;
+ Shared() { RaiiLocker<MutexType>::Initialize(&mu); }
+ };
+ static auto* shared = new Shared();
+ int local = 0;
+ for (auto _ : state) {
+ // Here we model both local work outside of the critical section as well as
+ // some work inside of the critical section. The idea is to capture some
+ // more or less realisitic contention levels.
+ // If contention is too low, the benchmark won't measure anything useful.
+ // If contention is unrealistically high, the benchmark will favor
+ // bad mutex implementations that block and otherwise distract threads
+ // from the mutex and shared state for as much as possible.
+ // To achieve this amount of local work is multiplied by number of threads
+ // to keep ratio between local work and critical section approximately
+ // equal regardless of number of threads.
+ SpinDelay(100 * state.threads(), &local);
+ RaiiLocker<MutexType> locker(&shared->mu);
+ SpinDelay(static_cast<int>(state.range(0)), &shared->data);
+ }
+}
+
+BENCHMARK_TEMPLATE(BM_Contended, iree_mutex_t)
+ ->UseRealTime()
+ // ThreadPerCpu poorly handles non-power-of-two CPU counts.
+ ->Threads(1)
+ ->Threads(2)
+ ->Threads(4)
+ ->Threads(6)
+ ->Threads(8)
+ ->Threads(12)
+ ->Threads(16)
+ ->Threads(24)
+ ->Threads(32)
+ ->Threads(48)
+ ->Threads(64)
+ ->Threads(96)
+ // Some empirically chosen amounts of work in critical section.
+ // 1 is low contention, 200 is high contention and few values in between.
+ ->Arg(50)
+ ->Arg(200);
+
+BENCHMARK_TEMPLATE(BM_Contended, iree_slim_mutex_t)
+ ->UseRealTime()
+ // ThreadPerCpu poorly handles non-power-of-two CPU counts.
+ ->Threads(1)
+ ->Threads(2)
+ ->Threads(4)
+ ->Threads(6)
+ ->Threads(8)
+ ->Threads(12)
+ ->Threads(16)
+ ->Threads(24)
+ ->Threads(32)
+ ->Threads(48)
+ ->Threads(64)
+ ->Threads(96)
+ // Some empirically chosen amounts of work in critical section.
+ // 1 is low contention, 200 is high contention and few values in between.
+ ->Arg(50)
+ ->Arg(200);
+
+BENCHMARK_TEMPLATE(BM_Contended, std::mutex)
+ ->UseRealTime()
+ // ThreadPerCpu poorly handles non-power-of-two CPU counts.
+ ->Threads(1)
+ ->Threads(2)
+ ->Threads(4)
+ ->Threads(6)
+ ->Threads(8)
+ ->Threads(12)
+ ->Threads(16)
+ ->Threads(24)
+ ->Threads(32)
+ ->Threads(48)
+ ->Threads(64)
+ ->Threads(96)
+ // Some empirically chosen amounts of work in critical section.
+ // 1 is low contention, 200 is high contention and few values in between.
+ ->Arg(50)
+ ->Arg(200);
+
+//==============================================================================
+// iree_notification_t
+//==============================================================================
+
+// TODO(benvanik): benchmark this; it should in the worst case be as bad as
+// mutex/futex (as that's what is used), but at the moment we don't really
+// care beyond that.
+
+} // namespace
diff --git a/runtime/src/iree/base/internal/synchronization_test.cc b/runtime/src/iree/base/internal/synchronization_test.cc
new file mode 100644
index 0000000..a44b994
--- /dev/null
+++ b/runtime/src/iree/base/internal/synchronization_test.cc
@@ -0,0 +1,218 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/synchronization.h"
+
+#include <thread>
+
+#include "iree/testing/gtest.h"
+
+namespace {
+
+//==============================================================================
+// Test utils
+//==============================================================================
+
+template <typename T>
+class Mutex;
+
+template <>
+class Mutex<iree_mutex_t> {
+ public:
+ static void Initialize(iree_mutex_t* out_mu) {
+ iree_mutex_initialize(out_mu);
+ }
+ static void Deinitialize(iree_mutex_t* mu) { iree_mutex_deinitialize(mu); }
+ static void Lock(iree_mutex_t* mu)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis) {
+ iree_mutex_lock(mu);
+ }
+ static bool TryLock(iree_mutex_t* mu)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis) {
+ return iree_mutex_try_lock(mu);
+ }
+ static void Unlock(iree_mutex_t* mu)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis) {
+ iree_mutex_unlock(mu);
+ }
+};
+
+template <>
+class Mutex<iree_slim_mutex_t> {
+ public:
+ static void Initialize(iree_slim_mutex_t* out_mu) {
+ iree_slim_mutex_initialize(out_mu);
+ }
+ static void Deinitialize(iree_slim_mutex_t* mu) {
+ iree_slim_mutex_deinitialize(mu);
+ }
+ static void Lock(iree_slim_mutex_t* mu)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis) {
+ iree_slim_mutex_lock(mu);
+ }
+ static bool TryLock(iree_slim_mutex_t* mu)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis) {
+ return iree_slim_mutex_try_lock(mu);
+ }
+ static void Unlock(iree_slim_mutex_t* mu)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis) {
+ iree_slim_mutex_unlock(mu);
+ }
+};
+
+// Tests that a mutex allows exclusive access to a region by touching it from
+// multiple threads.
+template <typename T>
+void TestMutexExclusiveAccess() {
+ // We'll increment the counter back and forth as we touch it from multiple
+ // threads.
+ int counter = 0;
+
+ T mu;
+ Mutex<T>::Initialize(&mu);
+
+ // Hold the lock at the start. The threads should block waiting for the lock
+ // to be released so they can take it.
+ ASSERT_EQ(0, counter);
+ Mutex<T>::Lock(&mu);
+
+ // Start up a thread to ++counter (it should block since we hold the lock).
+ std::thread th1([&]() {
+ Mutex<T>::Lock(&mu);
+ ++counter;
+ Mutex<T>::Unlock(&mu);
+ });
+
+ // Unlock and wait for the thread to acquire the lock and finish its work.
+ ASSERT_EQ(0, counter);
+ Mutex<T>::Unlock(&mu);
+ th1.join();
+
+ // Thread should have been able to increment the counter.
+ ASSERT_EQ(1, counter);
+
+ Mutex<T>::Deinitialize(&mu);
+}
+
+// Tests that try lock bails when the lock is held by another thread.
+template <typename T>
+void TestMutexExclusiveAccessTryLock() {
+ int counter = 0;
+ T mu;
+ Mutex<T>::Initialize(&mu);
+
+ // Hold the lock at the start. The try lock should fail and the thread should
+ // exit without changing the counter value.
+ ASSERT_EQ(0, counter);
+ Mutex<T>::Lock(&mu);
+ std::thread th1([&]() {
+ if (Mutex<T>::TryLock(&mu)) {
+ ++counter;
+ Mutex<T>::Unlock(&mu);
+ }
+ });
+
+ // Wait for the thread to try (and fail).
+ th1.join();
+ Mutex<T>::Unlock(&mu);
+
+ // The thread should not have been able to change the counter.
+ ASSERT_EQ(0, counter);
+
+ Mutex<T>::Deinitialize(&mu);
+}
+
+//==============================================================================
+// iree_mutex_t
+//==============================================================================
+
+TEST(MutexTest, Lifetime) {
+ iree_mutex_t mutex;
+ iree_mutex_initialize(&mutex);
+ bool did_lock = iree_mutex_try_lock(&mutex);
+ EXPECT_TRUE(did_lock);
+ if (did_lock) iree_mutex_unlock(&mutex);
+ iree_mutex_lock(&mutex);
+ iree_mutex_unlock(&mutex);
+ iree_mutex_deinitialize(&mutex);
+}
+
+TEST(MutexTest, ExclusiveAccess) { TestMutexExclusiveAccess<iree_mutex_t>(); }
+
+TEST(MutexTest, ExclusiveAccessTryLock) {
+ TestMutexExclusiveAccessTryLock<iree_mutex_t>();
+}
+
+//==============================================================================
+// iree_slim_mutex_t
+//==============================================================================
+
+TEST(SlimMutexTest, Lifetime) {
+ iree_slim_mutex_t mutex;
+ iree_slim_mutex_initialize(&mutex);
+ bool did_lock = iree_slim_mutex_try_lock(&mutex);
+ EXPECT_TRUE(did_lock);
+ if (did_lock) iree_slim_mutex_unlock(&mutex);
+ iree_slim_mutex_lock(&mutex);
+ iree_slim_mutex_unlock(&mutex);
+ iree_slim_mutex_deinitialize(&mutex);
+}
+
+TEST(SlimMutexTest, ExclusiveAccess) {
+ TestMutexExclusiveAccess<iree_slim_mutex_t>();
+}
+
+TEST(SlimMutexTest, ExclusiveAccessTryLock) {
+ TestMutexExclusiveAccessTryLock<iree_slim_mutex_t>();
+}
+
+//==============================================================================
+// iree_notification_t
+//==============================================================================
+
+// Tested implicitly in threading_test.cc.
+
+TEST(NotificationTest, TimeoutImmediate) {
+ iree_notification_t notification;
+ iree_notification_initialize(¬ification);
+
+ iree_time_t start_ns = iree_time_now();
+
+ EXPECT_FALSE(iree_notification_await(
+ ¬ification,
+ +[](void* entry_arg) -> bool {
+ return false; // condition is never true
+ },
+ NULL, iree_immediate_timeout()));
+
+ iree_duration_t delta_ns = iree_time_now() - start_ns;
+ iree_duration_t delta_ms = delta_ns / 1000000;
+ EXPECT_LT(delta_ms, 50); // slop
+
+ iree_notification_deinitialize(¬ification);
+}
+
+TEST(NotificationTest, Timeout) {
+ iree_notification_t notification;
+ iree_notification_initialize(¬ification);
+
+ iree_time_t start_ns = iree_time_now();
+
+ EXPECT_FALSE(iree_notification_await(
+ ¬ification,
+ +[](void* entry_arg) -> bool {
+ return false; // condition is never true
+ },
+ NULL, iree_make_timeout_ms(100)));
+
+ iree_duration_t delta_ns = iree_time_now() - start_ns;
+ iree_duration_t delta_ms = delta_ns / 1000000;
+ EXPECT_GE(delta_ms, 50); // slop
+
+ iree_notification_deinitialize(¬ification);
+}
+
+} // namespace
diff --git a/runtime/src/iree/base/internal/threading.c b/runtime/src/iree/base/internal/threading.c
new file mode 100644
index 0000000..699941d
--- /dev/null
+++ b/runtime/src/iree/base/internal/threading.c
@@ -0,0 +1,167 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/threading.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/internal/threading_impl.h"
+
+int iree_strncpy_s(char* IREE_RESTRICT dest, size_t destsz,
+ const char* IREE_RESTRICT src, size_t count) {
+#if defined(IREE_COMPILER_MSVC) || defined(__STDC_LIB_EXT1__)
+ return strncpy_s(dest, destsz, src, count);
+#else
+ if (!src || !dest || !destsz) return EINVAL;
+ size_t src_len = strnlen(src, destsz);
+ if (count >= destsz && destsz <= src_len) return ERANGE;
+ if (src_len > count) src_len = count;
+ while (*src != 0 && src_len > 0) {
+ *(dest++) = *(src++);
+ --src_len;
+ }
+ *dest = 0;
+ return 0;
+#endif // GNU
+}
+
+//==============================================================================
+// iree_thread_affinity_t
+//==============================================================================
+
+// TODO(benvanik): add more helpers and possibly move cpuinfo usage into here.
+
+void iree_thread_affinity_set_any(iree_thread_affinity_t* out_thread_affinity) {
+ memset(out_thread_affinity, 0x00, sizeof(*out_thread_affinity));
+}
+
+//==============================================================================
+// iree_thread_override_list_t
+//==============================================================================
+// This is shared by multiple platform implementations and gets stripped in LTO
+// when unused.
+
+struct iree_thread_override_t {
+ iree_thread_override_list_t* list;
+ iree_thread_override_t* next;
+ iree_thread_override_t* prev;
+ iree_thread_t* thread;
+ iree_thread_priority_class_t priority_class;
+};
+
+void iree_thread_override_list_initialize(
+ iree_thread_set_priority_fn_t set_priority_fn,
+ iree_thread_priority_class_t base_priority_class,
+ iree_allocator_t allocator, iree_thread_override_list_t* out_list) {
+ memset(out_list, 0, sizeof(*out_list));
+ out_list->set_priority_fn = set_priority_fn;
+ out_list->base_priority_class = base_priority_class;
+ out_list->allocator = allocator;
+ iree_slim_mutex_initialize(&out_list->mutex);
+ out_list->current_priority_class = base_priority_class;
+}
+
+void iree_thread_override_list_deinitialize(iree_thread_override_list_t* list) {
+#if !defined(NDEBUG)
+ // Assert that all overrides have been removed (and properly freed).
+ iree_slim_mutex_lock(&list->mutex);
+ assert(!list->head);
+ iree_slim_mutex_unlock(&list->mutex);
+#endif // !NDEBUG
+
+ iree_slim_mutex_deinitialize(&list->mutex);
+}
+
+// Updates the priority class of the thread to the maximum across all overrides
+// and the base thread priority class.
+//
+// NOTE: assumes the lock is held so the list can be safely walked.
+static void iree_thread_override_list_update_priority_class(
+ iree_thread_override_list_t* list, iree_thread_t* thread) {
+ // Compute the new maximum priority class with the override now added.
+ iree_thread_priority_class_t max_priority_class = list->base_priority_class;
+ for (iree_thread_override_t* override = list->head; override != NULL;
+ override = override->next) {
+ max_priority_class = iree_max(max_priority_class, override->priority_class);
+ }
+ bool needs_update = max_priority_class != list->current_priority_class;
+ list->current_priority_class = max_priority_class;
+
+ // Change priority if needed (this way we are avoiding syscalls if we get a
+ // wave of overrides at the same priority class).
+ //
+ // NOTE: we do this inside the lock so that we don't lose priorities. It'd be
+ // nice to do this outside the lock if we could so we aren't holding it during
+ // a syscall. Overrides should (hopefully) be infrequent enough that this is
+ // rarely called.
+ if (needs_update) {
+ list->set_priority_fn(thread, max_priority_class);
+ }
+}
+
+iree_thread_override_t* iree_thread_override_list_add(
+ iree_thread_override_list_t* list, iree_thread_t* thread,
+ iree_thread_priority_class_t priority_class) {
+ // Allocate the override struct we'll pass back to the caller.
+ iree_thread_override_t* override = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ list->allocator, sizeof(*override), (void**)&override);
+ if (IREE_UNLIKELY(!iree_status_is_ok(iree_status_consume_code(status)))) {
+ return NULL;
+ }
+ override->list = list;
+ override->next = NULL;
+ override->prev = NULL;
+ override->thread = thread;
+ override->priority_class = priority_class;
+
+ iree_slim_mutex_lock(&list->mutex);
+
+ // Add the override to the list.
+ override->next = list->head;
+ if (list->head) {
+ list->head->prev = override;
+ }
+ list->head = override;
+
+ // Update and change priority if needed.
+ // NOTE: the lock must be held.
+ iree_thread_override_list_update_priority_class(list, thread);
+
+ iree_slim_mutex_unlock(&list->mutex);
+
+ return override;
+}
+
+void iree_thread_override_remove_self(iree_thread_override_t* override) {
+ iree_thread_override_list_t* list = override->list;
+ iree_slim_mutex_lock(&list->mutex);
+
+ // Remove the override from the list.
+ if (override->prev) {
+ override->prev->next = override->next;
+ }
+ if (override->next) {
+ override->next->prev = override->prev;
+ }
+ if (list->head == override) {
+ list->head = override->next;
+ }
+
+ // Update and change priority if needed.
+ // NOTE: the lock must be held.
+ iree_thread_t* thread = override->thread;
+ iree_thread_override_list_update_priority_class(list, thread);
+
+ iree_slim_mutex_unlock(&list->mutex);
+
+ // Deallocate the override outside of the lock as no one should be using it
+ // anymore.
+ iree_allocator_free(list->allocator, override);
+}
diff --git a/runtime/src/iree/base/internal/threading.h b/runtime/src/iree/base/internal/threading.h
new file mode 100644
index 0000000..1518fd0
--- /dev/null
+++ b/runtime/src/iree/base/internal/threading.h
@@ -0,0 +1,179 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_THREADING_H_
+#define IREE_BASE_INTERNAL_THREADING_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//==============================================================================
+// iree_thread_t
+//==============================================================================
+
+typedef struct iree_thread_t iree_thread_t;
+
+// Specifies a thread's priority class.
+// These translate roughly to the same thing across all platforms, though they
+// are just a hint and the schedulers on various platforms may behave very
+// differently. When in doubt prefer to write code that works at the extremes
+// of the classes.
+typedef enum iree_thread_priority_class_e {
+ // Lowest possible priority used for background/idle work.
+ // Maps to QOS_CLASS_BACKGROUND.
+ IREE_THREAD_PRIORITY_CLASS_LOWEST = -2,
+ // Low priority work but still something the user expects to complete soon.
+ // Maps to QOS_CLASS_UTILITY.
+ IREE_THREAD_PRIORITY_CLASS_LOW = -1,
+ // Normal/default priority for the system.
+ // Maps to QOS_CLASS_DEFAULT.
+ IREE_THREAD_PRIORITY_CLASS_NORMAL = 0,
+ // High priority work for operations the user is waiting on.
+ // Maps to QOS_CLASS_USER_INITIATED.
+ IREE_THREAD_PRIORITY_CLASS_HIGH = 1,
+ // Highest possible priority used for interactive work.
+ // Maps to QOS_CLASS_USER_INTERACTIVE.
+ IREE_THREAD_PRIORITY_CLASS_HIGHEST = 2,
+} iree_thread_priority_class_t;
+
+// Specifies the processor affinity for a particular thread.
+// Each platform handles this differently (if at all).
+//
+// macOS/iOS:
+// Only affinity tags are supported; the ID will be used by the kernel to
+// group threads that having matching values together and (hopefully) schedule
+// them on cores that may share some level of the cache hierarchy. The API is
+// effectively just asking nicely and hoping the kernel is on the same
+// wavelength.
+//
+// Linux/Android:
+// sched_setaffinity is used to pin the thread to the core with the given ID.
+// There are, naturally, issues on Android where if the governor has turned
+// off some cores (such as powering down big cores in an ARM big.LITTLE
+// configuration) the affinity request will be dropped on the floor even if
+// the cores are later enabled. This is one of the reasons why we note in
+// iree_thread_request_affinity that requests may need to be made at
+// ¯\_(ツ)_/¯ intervals. In the future we can try to hook into power
+// management infra to see if we can tell when we need to do this.
+//
+// Windows:
+// Stuff just works. Love it.
+typedef struct iree_thread_affinity_t {
+ uint32_t specified : 1;
+ uint32_t smt : 1;
+ uint32_t group : 7;
+ uint32_t id : 23;
+} iree_thread_affinity_t;
+
+// Sets |thread_affinity| to match with any processor in the system.
+void iree_thread_affinity_set_any(iree_thread_affinity_t* out_thread_affinity);
+
+// Thread creation parameters.
+// All are optional and the entire struct can safely be zero-initialized.
+typedef struct iree_thread_create_params_t {
+ // Developer-visible name for the thread displayed in tooling.
+ // May be omitted for the system-default name (usually thread ID).
+ iree_string_view_t name;
+
+ // Stack size of the new thread, in bytes. If omitted a platform-defined
+ // default system stack size will be used.
+ size_t stack_size;
+
+ // Whether to create the thread in a suspended state. The thread will be
+ // initialized but not call the entry routine until it is resumed with
+ // iree_thread_resume. This can be useful to avoid a thundering herd upon
+ // creation of many threads.
+ bool create_suspended;
+
+ // Initial priority class.
+ // This may be changed later via iree_thread_priority_class_override_begin;
+ // see that for more information.
+ iree_thread_priority_class_t priority_class;
+
+ // Initial thread affinity.
+ // This may be changed later via iree_thread_request_affinity; see that for
+ // more information.
+ iree_thread_affinity_t initial_affinity;
+} iree_thread_create_params_t;
+
+typedef int (*iree_thread_entry_t)(void* entry_arg);
+
+// Creates a new thread and calls |entry| with |entry_arg|.
+// |params| can be used to specify additional thread creation parameters but can
+// also be zero-initialized to use defaults.
+//
+// The thread will be created and configured prior to returning from the
+// function. If the create_suspended parameter is set the thread will be
+// suspended and must be resumed with iree_thread_resume. Otherwise, the thread
+// may already be inside of the |entry| function by the time the function
+// returns.
+//
+// |entry_arg| lifetime is not managed and unless the caller is waiting for the
+// thread to start must not be stack-allocated.
+iree_status_t iree_thread_create(iree_thread_entry_t entry, void* entry_arg,
+ iree_thread_create_params_t params,
+ iree_allocator_t allocator,
+ iree_thread_t** out_thread);
+
+// Retains the given |thread| for the caller.
+void iree_thread_retain(iree_thread_t* thread);
+
+// Releases the given |thread| from the caller.
+void iree_thread_release(iree_thread_t* thread);
+
+// Returns a platform-defined thread ID for the given |thread|.
+uintptr_t iree_thread_id(iree_thread_t* thread);
+
+typedef struct iree_thread_override_t iree_thread_override_t;
+
+// Begins overriding the priority class of the given |thread|.
+// The priority of the thread will be the max of the base priority and the
+// overridden priority. Callers must pass the returned override token to
+// iree_thread_override_end.
+iree_thread_override_t* iree_thread_priority_class_override_begin(
+ iree_thread_t* thread, iree_thread_priority_class_t priority_class);
+
+// Ends a priority class override that was began for a thread with
+// iree_thread_priority_class_override_begin.
+void iree_thread_override_end(iree_thread_override_t* override_token);
+
+// Updates the thread affinity of the given |thread|.
+// Affinities are not sticky and may need to be refreshed over time as CPUs are
+// enabled/disabled by the OS (such as power mode changes, governor adjustments,
+// etc). Users wanting to ensure threads have specific affinities may want to
+// request updates whenever new large amounts of work are about to be performed.
+//
+// NOTE: thread affinities are just a hint. The OS scheduler is free to do
+// whatever it wants up to and including entirely ignoring the specified
+// affinity. In many cases where cores are oversubscribed setting an affinity
+// mask can pessimize battery/thermals/performance as the OS will sometimes try
+// to shuffle around threads to disable physical cores/etc.
+//
+// Compatibility warning: Apple/darwin only support affinity groups, with each
+// unique affinity sharing time with all others of the same value. This means
+// that trying to get clever with several thread sets with overlapping
+// affinities will likely not work as expected. Try to stick with threads that
+// run only on a single processor.
+void iree_thread_request_affinity(iree_thread_t* thread,
+ iree_thread_affinity_t affinity);
+
+// Resumes |thread| if it was created suspended.
+// This has no effect if the thread is not suspended.
+void iree_thread_resume(iree_thread_t* thread);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_BASE_INTERNAL_THREADING_H_
diff --git a/runtime/src/iree/base/internal/threading_darwin.c b/runtime/src/iree/base/internal/threading_darwin.c
new file mode 100644
index 0000000..7f3bd00
--- /dev/null
+++ b/runtime/src/iree/base/internal/threading_darwin.c
@@ -0,0 +1,250 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// NOTE: must be first to ensure that we can define settings for all includes.
+#include "iree/base/internal/threading_impl.h"
+
+#if defined(IREE_PLATFORM_APPLE)
+
+#include <errno.h>
+#include <mach/mach.h>
+#include <mach/thread_act.h>
+#include <pthread.h>
+#include <string.h>
+
+#include "iree/base/internal/atomics.h"
+#include "iree/base/internal/threading.h"
+#include "iree/base/tracing.h"
+
+// Useful to see how pthreads is implemented on (old) darwin:
+// https://opensource.apple.com/source/Libc/Libc-825.40.1/pthreads/pthread.c.auto.html
+
+struct iree_thread_t {
+ iree_atomic_ref_count_t ref_count;
+ iree_allocator_t allocator;
+
+ char name[16];
+ pthread_t handle;
+ mach_port_t mach_port;
+
+ iree_thread_entry_t entry;
+ void* entry_arg;
+
+ iree_atomic_int32_t is_suspended;
+};
+
+static qos_class_t iree_thread_qos_class_for_priority_class(
+ iree_thread_priority_class_t priority_class);
+
+static void iree_thread_set_name(const char* name) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ pthread_setname_np(name);
+ IREE_TRACE_SET_THREAD_NAME(name);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static void* iree_thread_start_routine(void* param) {
+ // NOTE: we own a reference to the thread handle so that the creation
+ // thread can't delete this out from under us.
+ iree_thread_t* thread = (iree_thread_t*)param;
+
+ // Set the thread name used by debuggers and tracy (which must be called on
+ // the thread).
+ iree_thread_set_name(thread->name);
+
+ // "Consume" the entry info so that we don't see it again (as we don't own
+ // its lifetime).
+ iree_thread_entry_t entry = thread->entry;
+ void* entry_arg = thread->entry_arg;
+ thread->entry = NULL;
+ thread->entry_arg = NULL;
+
+ // Release our ownership of the thread handle. If the creating thread doesn't
+ // want it this will free the memory and fully detach the thread.
+ iree_thread_release(thread);
+
+ // Call the user thread entry point function.
+ // Note that this can be a tail-call which saves a stack frame in all threads
+ // (which is really just to make call stacks in debuggers much cleaner).
+ return (void*)((uintptr_t)entry(entry_arg));
+}
+
+iree_status_t iree_thread_create(iree_thread_entry_t entry, void* entry_arg,
+ iree_thread_create_params_t params,
+ iree_allocator_t allocator,
+ iree_thread_t** out_thread) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Allocate our thread struct; we'll use it to shuttle params into the thread
+ // (including the user-specified entry_arg).
+ iree_thread_t* thread = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(allocator, sizeof(*thread), (void**)&thread);
+ if (!iree_status_is_ok(status)) {
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+ iree_atomic_ref_count_init(&thread->ref_count);
+ thread->allocator = allocator;
+ thread->entry = entry;
+ thread->entry_arg = entry_arg;
+ iree_strncpy_s(thread->name, IREE_ARRAYSIZE(thread->name), params.name.data,
+ iree_min(params.name.size, IREE_ARRAYSIZE(thread->name) - 1));
+ iree_atomic_store_int32(&thread->is_suspended,
+ params.create_suspended ? 1 : 0,
+ iree_memory_order_relaxed);
+
+ pthread_attr_t thread_attr;
+ pthread_attr_init(&thread_attr);
+ pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
+ if (params.stack_size) {
+ pthread_attr_setstacksize(&thread_attr, params.stack_size);
+ }
+
+ // Ensure we start with the right QoS class.
+ qos_class_t qos_class =
+ iree_thread_qos_class_for_priority_class(params.priority_class);
+ pthread_attr_set_qos_class_np(&thread_attr, qos_class, 0);
+
+ // Retain the thread for the thread itself; this way if the caller immediately
+ // releases the iree_thread_t handle the thread won't explode.
+ iree_thread_retain(thread);
+ *out_thread = thread;
+
+ // Create the thread either suspended or running as the user requested.
+ int rc;
+ if (params.create_suspended) {
+ IREE_TRACE_ZONE_BEGIN_NAMED(z1, "pthread_create_suspended_np");
+ rc = pthread_create_suspended_np(&thread->handle, &thread_attr,
+ &iree_thread_start_routine, thread);
+ IREE_TRACE_ZONE_END(z1);
+ } else {
+ IREE_TRACE_ZONE_BEGIN_NAMED(z1, "pthread_create");
+ rc = pthread_create(&thread->handle, &thread_attr,
+ &iree_thread_start_routine, thread);
+ IREE_TRACE_ZONE_END(z1);
+ }
+ pthread_attr_destroy(&thread_attr);
+ if (rc != 0) {
+ iree_thread_release(thread); // for self
+ iree_thread_release(thread); // for caller
+ *out_thread = NULL;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(IREE_STATUS_INTERNAL,
+ "thread creation failed with %d", rc);
+ }
+
+ thread->mach_port = pthread_mach_thread_np(thread->handle);
+ if (params.initial_affinity.specified) {
+ iree_thread_request_affinity(thread, params.initial_affinity);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void iree_thread_delete(iree_thread_t* thread) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_thread_resume(thread);
+ pthread_join(thread->handle, NULL);
+
+ iree_allocator_free(thread->allocator, thread);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_thread_retain(iree_thread_t* thread) {
+ if (thread) {
+ iree_atomic_ref_count_inc(&thread->ref_count);
+ }
+}
+
+void iree_thread_release(iree_thread_t* thread) {
+ if (thread && iree_atomic_ref_count_dec(&thread->ref_count) == 1) {
+ iree_thread_delete(thread);
+ }
+}
+
+uintptr_t iree_thread_id(iree_thread_t* thread) {
+ return (uintptr_t)thread->handle;
+}
+
+// Maps an IREE iree_thread_priority_class_t value to a QoS type.
+// https://developer.apple.com/library/archive/documentation/Performance/Conceptual/EnergyGuide-iOS/PrioritizeWorkWithQoS.html
+static qos_class_t iree_thread_qos_class_for_priority_class(
+ iree_thread_priority_class_t priority_class) {
+ switch (priority_class) {
+ case IREE_THREAD_PRIORITY_CLASS_LOWEST:
+ return QOS_CLASS_BACKGROUND;
+ case IREE_THREAD_PRIORITY_CLASS_LOW:
+ return QOS_CLASS_UTILITY;
+ default:
+ case IREE_THREAD_PRIORITY_CLASS_NORMAL:
+ return QOS_CLASS_DEFAULT;
+ case IREE_THREAD_PRIORITY_CLASS_HIGH:
+ return QOS_CLASS_USER_INITIATED;
+ case IREE_THREAD_PRIORITY_CLASS_HIGHEST:
+ return QOS_CLASS_USER_INTERACTIVE;
+ }
+}
+
+iree_thread_override_t* iree_thread_priority_class_override_begin(
+ iree_thread_t* thread, iree_thread_priority_class_t priority_class) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ qos_class_t qos_class =
+ iree_thread_qos_class_for_priority_class(priority_class);
+ pthread_override_t override =
+ pthread_override_qos_class_start_np(thread->handle, qos_class, 0);
+
+ IREE_TRACE_ZONE_END(z0);
+ return (iree_thread_override_t*)override;
+}
+
+void iree_thread_override_end(iree_thread_override_t* override) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ pthread_override_qos_class_end_np((pthread_override_t)override);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_thread_request_affinity(iree_thread_t* thread,
+ iree_thread_affinity_t affinity) {
+ if (!affinity.specified) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // See:
+ // https://gist.github.com/Coneko/4234842
+ // https://fergofrog.com/code/cbowser/xnu/osfmk/mach/thread_policy.h.html
+ // http://www.hybridkernel.com/2015/01/18/binding_threads_to_cores_osx.html
+ thread_affinity_policy_data_t policy_data = {affinity.id};
+ thread_policy_set(thread->mach_port, THREAD_AFFINITY_POLICY,
+ (thread_policy_t)(&policy_data),
+ THREAD_AFFINITY_POLICY_COUNT);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_thread_resume(iree_thread_t* thread) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // NOTE: we don't track the suspend/resume depth here because we don't
+ // expose suspend as an operation (yet). If we did we'd want to make sure we
+ // always balance suspend/resume or else we'll mess with any
+ // debuggers/profilers that may be suspending threads for their own uses.
+ int32_t expected = 1;
+ if (iree_atomic_compare_exchange_strong_int32(
+ &thread->is_suspended, &expected, 0, iree_memory_order_seq_cst,
+ iree_memory_order_seq_cst)) {
+ thread_resume(thread->mach_port);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+#endif // IREE_PLATFORM_APPLE
diff --git a/runtime/src/iree/base/internal/threading_impl.h b/runtime/src/iree/base/internal/threading_impl.h
new file mode 100644
index 0000000..0fdbbd6
--- /dev/null
+++ b/runtime/src/iree/base/internal/threading_impl.h
@@ -0,0 +1,77 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_THREADING_IMPL_H_
+#define IREE_BASE_INTERNAL_THREADING_IMPL_H_
+
+// Ensure that any posix header we include exposes GNU stuff. Ignored on
+// platforms where we either don't have the GNU stuff or don't have posix
+// headers at all.
+//
+// Note that this does not need to be the same for all compilation units, only
+// those we want to access the non-portable features in. It *must* be defined
+// prior to including any of the files, though, as otherwise header-guards will
+// cause the setting at the time of first inclusion to win.
+//
+// https://stackoverflow.com/a/5583764
+#define _GNU_SOURCE 1
+
+#include <assert.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/internal/threading.h"
+#include "iree/base/target_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// strncpy_s shall copy the first N characters of src to dst, where N is the
+// lesser of MaxCount and the length of src.
+//
+// We have this here patching over GNU being stubborn about supporting this.
+// If we start using it other places we can move it into a helper file.
+int iree_strncpy_s(char* dest, size_t destsz, const char* src, size_t count);
+
+typedef void (*iree_thread_set_priority_fn_t)(
+ iree_thread_t* thread, iree_thread_priority_class_t priority_class);
+
+typedef struct iree_thread_override_list_t {
+ iree_thread_set_priority_fn_t set_priority_fn;
+ iree_thread_priority_class_t base_priority_class;
+ iree_allocator_t allocator;
+ iree_slim_mutex_t mutex;
+ iree_thread_priority_class_t current_priority_class;
+ iree_thread_override_t* head;
+} iree_thread_override_list_t;
+
+// Initializes the override list for a thread with |base_priority_class|.
+// |set_priority_fn| will be used to update the thread priority when needed.
+void iree_thread_override_list_initialize(
+ iree_thread_set_priority_fn_t set_priority_fn,
+ iree_thread_priority_class_t base_priority_class,
+ iree_allocator_t allocator, iree_thread_override_list_t* out_list);
+
+// Deinitializes an override list; expects that all overrides have been removed.
+void iree_thread_override_list_deinitialize(iree_thread_override_list_t* list);
+
+// Adds a new override to the list and returns an allocated handle.
+iree_thread_override_t* iree_thread_override_list_add(
+ iree_thread_override_list_t* list, iree_thread_t* thread,
+ iree_thread_priority_class_t priority_class);
+
+// Removes an override from its parent list and deallocates it.
+void iree_thread_override_remove_self(iree_thread_override_t* override);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // IREE_BASE_INTERNAL_THREADING_IMPL_H_
diff --git a/runtime/src/iree/base/internal/threading_pthreads.c b/runtime/src/iree/base/internal/threading_pthreads.c
new file mode 100644
index 0000000..a197f50
--- /dev/null
+++ b/runtime/src/iree/base/internal/threading_pthreads.c
@@ -0,0 +1,356 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// NOTE: must be first to ensure that we can define settings for all includes.
+#include "iree/base/internal/threading_impl.h"
+
+#if defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_EMSCRIPTEN) || \
+ defined(IREE_PLATFORM_LINUX)
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <pthread.h>
+#include <sched.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "iree/base/internal/atomics.h"
+#include "iree/base/internal/call_once.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/internal/threading.h"
+#include "iree/base/tracing.h"
+
+#if defined(IREE_PLATFORM_EMSCRIPTEN)
+#include <emscripten/threading.h>
+#endif // IREE_PLATFORM_EMSCRIPTEN
+
+// Older glibc doesn't have a gettid wrapper:
+// https://stackoverflow.com/a/63494768
+#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 30
+#define gettid() syscall(SYS_gettid)
+#endif
+
+struct iree_thread_t {
+ iree_atomic_ref_count_t ref_count;
+ iree_allocator_t allocator;
+
+ char name[16];
+ pthread_t handle;
+
+ iree_thread_entry_t entry;
+ void* entry_arg;
+
+ iree_atomic_int32_t suspend_count;
+ iree_notification_t suspend_barrier;
+
+ // Thread-safe (has its own synchronization).
+ iree_thread_override_list_t qos_override_list;
+};
+
+static void iree_thread_set_priority_class(
+ iree_thread_t* thread, iree_thread_priority_class_t priority_class);
+
+static bool iree_thread_resumed_predicate(void* arg) {
+ iree_thread_t* thread = (iree_thread_t*)arg;
+ return iree_atomic_load_int32(&thread->suspend_count,
+ iree_memory_order_seq_cst) == 0;
+}
+
+#if defined(IREE_PLATFORM_EMSCRIPTEN)
+
+static int iree_thread_set_name(pthread_t handle, const char* name) {
+ emscripten_set_thread_name(handle, name);
+ return 0;
+}
+
+#else
+
+typedef int (*pthread_setname_np_fn_t)(pthread_t thread, const char* name);
+
+static pthread_setname_np_fn_t iree_pthread_setname_np_fn = NULL;
+static void iree_thread_try_query_setname_fn(void) {
+ iree_pthread_setname_np_fn =
+ (pthread_setname_np_fn_t)dlsym(RTLD_DEFAULT, "pthread_setname_np");
+}
+
+static int iree_thread_set_name(pthread_t handle, const char* name) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ static iree_once_flag fn_query_flag = IREE_ONCE_FLAG_INIT;
+ iree_call_once(&fn_query_flag, iree_thread_try_query_setname_fn);
+ int rc;
+ if (iree_pthread_setname_np_fn) {
+ rc = iree_pthread_setname_np_fn(handle, name);
+ } else {
+ rc = EINVAL;
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return rc;
+}
+
+#endif // IREE_PLATFORM_EMSCRIPTEN
+
+static void* iree_thread_start_routine(void* param) {
+ // NOTE: we own a reference to the thread handle so that the creation
+ // thread can't delete this out from under us.
+ iree_thread_t* thread = (iree_thread_t*)param;
+
+ // Set the thread name used by debuggers and tracy (which must be called on
+ // the thread).
+ iree_thread_set_name(thread->handle, thread->name);
+ IREE_TRACE_SET_THREAD_NAME(thread->name);
+
+ // Wait until we resume if we were created suspended.
+ while (iree_atomic_load_int32(&thread->suspend_count,
+ iree_memory_order_seq_cst) > 0) {
+ iree_notification_await(&thread->suspend_barrier,
+ iree_thread_resumed_predicate, thread,
+ iree_infinite_timeout());
+ }
+
+ // "Consume" the entry info so that we don't see it again (as we don't own
+ // its lifetime).
+ iree_thread_entry_t entry = thread->entry;
+ void* entry_arg = thread->entry_arg;
+ thread->entry = NULL;
+ thread->entry_arg = NULL;
+
+ // Release our ownership of the thread handle. If the creating thread doesn't
+ // want it this will free the memory and fully detach the thread.
+ iree_thread_release(thread);
+
+ // Call the user thread entry point function.
+ // Note that this can be a tail-call which saves a stack frame in all threads
+ // (which is really just to make call stacks in debuggers much cleaner).
+ return (void*)((uintptr_t)entry(entry_arg));
+}
+
+iree_status_t iree_thread_create(iree_thread_entry_t entry, void* entry_arg,
+ iree_thread_create_params_t params,
+ iree_allocator_t allocator,
+ iree_thread_t** out_thread) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Allocate our thread struct; we'll use it to shuttle params into the thread
+ // (including the user-specified entry_arg).
+ iree_thread_t* thread = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(allocator, sizeof(*thread), (void**)&thread);
+ if (!iree_status_is_ok(status)) {
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+ iree_atomic_ref_count_init(&thread->ref_count);
+ thread->allocator = allocator;
+ thread->entry = entry;
+ thread->entry_arg = entry_arg;
+ iree_strncpy_s(thread->name, IREE_ARRAYSIZE(thread->name), params.name.data,
+ iree_min(params.name.size, IREE_ARRAYSIZE(thread->name) - 1));
+ thread->suspend_count = IREE_ATOMIC_VAR_INIT(params.create_suspended ? 1 : 0);
+ iree_notification_initialize(&thread->suspend_barrier);
+ iree_thread_override_list_initialize(iree_thread_set_priority_class,
+ params.priority_class, thread->allocator,
+ &thread->qos_override_list);
+
+ pthread_attr_t thread_attr;
+ pthread_attr_init(&thread_attr);
+ pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
+ if (params.stack_size) {
+ pthread_attr_setstacksize(&thread_attr, params.stack_size);
+ }
+
+ // Retain the thread for the thread itself; this way if the caller immediately
+ // releases the iree_thread_t handle the thread won't explode.
+ iree_thread_retain(thread);
+ *out_thread = thread;
+
+ // Unfortunately we can't create the thread suspended (no API). This means
+ // that we are likely to incur some thrashing here as the thread gets spun up
+ // immediately. We emulate the create_suspended behavior by waiting in the
+ // thread until iree_thread_resume is called which at least gives us the same
+ // execution order guarantee across all platforms.
+ int rc;
+ {
+ IREE_TRACE_ZONE_BEGIN_NAMED(z1, "pthread_create");
+ rc = pthread_create(&thread->handle, &thread_attr,
+ &iree_thread_start_routine, thread);
+ IREE_TRACE_ZONE_END(z1);
+ }
+ pthread_attr_destroy(&thread_attr);
+ if (rc != 0) {
+ iree_thread_release(thread); // for self
+ iree_thread_release(thread); // for caller
+ *out_thread = NULL;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(IREE_STATUS_INTERNAL,
+ "thread creation failed with %d", rc);
+ }
+
+ if (params.priority_class != IREE_THREAD_PRIORITY_CLASS_NORMAL) {
+ iree_thread_set_priority_class(thread, params.priority_class);
+ }
+ if (params.initial_affinity.specified) {
+ iree_thread_request_affinity(thread, params.initial_affinity);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void iree_thread_delete(iree_thread_t* thread) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_thread_resume(thread);
+ pthread_join(thread->handle, NULL);
+
+ iree_notification_deinitialize(&thread->suspend_barrier);
+ iree_thread_override_list_deinitialize(&thread->qos_override_list);
+ iree_allocator_free(thread->allocator, thread);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_thread_retain(iree_thread_t* thread) {
+ if (thread) {
+ iree_atomic_ref_count_inc(&thread->ref_count);
+ }
+}
+
+void iree_thread_release(iree_thread_t* thread) {
+ if (thread && iree_atomic_ref_count_dec(&thread->ref_count) == 1) {
+ iree_thread_delete(thread);
+ }
+}
+
+uintptr_t iree_thread_id(iree_thread_t* thread) {
+ return (uintptr_t)thread->handle;
+}
+
+// Maps an IREE iree_thread_priority_class_t value to a pthreads priority param.
+// The min/max ranges of the priority are implementation dependent so we need to
+// do this at runtime.
+static struct sched_param iree_thread_sched_param_for_priority_class(
+ int policy, iree_thread_priority_class_t priority_class) {
+ struct sched_param param;
+ memset(¶m, 0, sizeof(param));
+ int min_priority = sched_get_priority_min(policy);
+ int max_priority = sched_get_priority_max(policy);
+ int normal_priority = (max_priority - min_priority) / 2 + min_priority;
+ switch (priority_class) {
+ case IREE_THREAD_PRIORITY_CLASS_LOWEST:
+ param.sched_priority = min_priority;
+ break;
+ case IREE_THREAD_PRIORITY_CLASS_LOW:
+ param.sched_priority =
+ (normal_priority - min_priority) / 2 + min_priority;
+ break;
+ case IREE_THREAD_PRIORITY_CLASS_NORMAL:
+ param.sched_priority = normal_priority;
+ break;
+ case IREE_THREAD_PRIORITY_CLASS_HIGH:
+ param.sched_priority =
+ (max_priority - normal_priority) / 2 + normal_priority;
+ break;
+ case IREE_THREAD_PRIORITY_CLASS_HIGHEST:
+ param.sched_priority = max_priority;
+ break;
+ }
+ return param;
+}
+
+// Sets the thread priority to the given |priority_class|, resetting any
+// previous value.
+//
+// NOTE: this probably doesn't work on Android, because Android.
+// They seem to use linux LWPs and setpriority/nice on the tid will actually
+// change the priority. It doesn't seem possible to elevate priority above
+// normal (without root), but it would at least be useful to be able to
+// indicate background threads.
+//
+// See:
+// https://stackoverflow.com/questions/17398075/change-native-thread-priority-on-android-in-c-c
+// https://android.googlesource.com/platform/frameworks/native/+/android-4.2.2_r1/include/utils/ThreadDefs.h
+//
+// TODO(benvanik): try this from filament:
+// https://github.com/google/filament/blob/56682794d398236c4caa5be40d80acdb73a13bc8/libs/utils/src/JobSystem.cpp
+static void iree_thread_set_priority_class(
+ iree_thread_t* thread, iree_thread_priority_class_t priority_class) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+#if defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_EMSCRIPTEN)
+ // TODO(benvanik): Some sort of solution on Android, if possible (see above)
+ // TODO(benvanik): Some sort of solution on Emscripten, if possible
+#else
+ int policy = 0;
+ struct sched_param param;
+ pthread_getschedparam(thread->handle, &policy, ¶m);
+ param = iree_thread_sched_param_for_priority_class(policy, priority_class);
+ pthread_setschedparam(thread->handle, policy, ¶m);
+#endif // IREE_PLATFORM_ANDROID
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_thread_override_t* iree_thread_priority_class_override_begin(
+ iree_thread_t* thread, iree_thread_priority_class_t priority_class) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_thread_override_t* override = iree_thread_override_list_add(
+ &thread->qos_override_list, thread, priority_class);
+ IREE_TRACE_ZONE_END(z0);
+ return override;
+}
+
+void iree_thread_override_end(iree_thread_override_t* override) {
+ if (!override) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_thread_override_remove_self(override);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_thread_request_affinity(iree_thread_t* thread,
+ iree_thread_affinity_t affinity) {
+ if (!affinity.specified) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ cpu_set_t cpu_set;
+ CPU_ZERO(&cpu_set);
+ CPU_SET(affinity.id, &cpu_set);
+ if (affinity.smt) {
+ CPU_SET(affinity.id + 1, &cpu_set);
+ }
+
+#if defined(IREE_PLATFORM_ANDROID)
+ // `pthread_gettid_np` is only available on API 21+ and it is needed to set
+ // affinity so skip it for older API versions.
+#if __ANDROID_API__ >= 21
+ // Android doesn't have pthread_setaffinity_np but that's usually just
+ // implemented as this sequence anyway:
+ pid_t tid = pthread_gettid_np(thread->handle);
+ sched_setaffinity(tid, sizeof(cpu_set), &cpu_set);
+#endif // __ANDROID_API__ >= 21
+#elif defined(IREE_PLATFORM_EMSCRIPTEN)
+ // TODO(benvanik): Some sort of solution on Emscripten, if possible
+#else
+ pthread_setaffinity_np(thread->handle, sizeof(cpu_set), &cpu_set);
+#endif // IREE_PLATFORM_*
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_thread_resume(iree_thread_t* thread) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ if (iree_atomic_exchange_int32(&thread->suspend_count, 0,
+ iree_memory_order_seq_cst) == 1) {
+ iree_notification_post(&thread->suspend_barrier, IREE_ALL_WAITERS);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+#endif // IREE_PLATFORM_*
diff --git a/runtime/src/iree/base/internal/threading_test.cc b/runtime/src/iree/base/internal/threading_test.cc
new file mode 100644
index 0000000..18f4f5e
--- /dev/null
+++ b/runtime/src/iree/base/internal/threading_test.cc
@@ -0,0 +1,223 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/threading.h"
+
+#include <chrono>
+#include <cstring>
+#include <thread>
+
+#include "iree/base/internal/atomics.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/internal/threading_impl.h" // to test the override list
+#include "iree/base/status_cc.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace {
+
+using iree::Status;
+
+//==============================================================================
+// iree_thread_t
+//==============================================================================
+
+TEST(ThreadTest, Lifetime) {
+ // Default parameters:
+ iree_thread_create_params_t params;
+ memset(¶ms, 0, sizeof(params));
+
+ // Our thread: do a bit of math and notify the main test thread when done.
+ struct entry_data_t {
+ iree_atomic_int32_t value;
+ iree_notification_t barrier;
+ } entry_data;
+ iree_atomic_store_int32(&entry_data.value, 123, iree_memory_order_relaxed);
+ iree_notification_initialize(&entry_data.barrier);
+ iree_thread_entry_t entry_fn = +[](void* entry_arg) -> int {
+ auto* entry_data = reinterpret_cast<struct entry_data_t*>(entry_arg);
+ iree_atomic_fetch_add_int32(&entry_data->value, 1,
+ iree_memory_order_acq_rel);
+ iree_notification_post(&entry_data->barrier, IREE_ALL_WAITERS);
+ return 0;
+ };
+
+ // Create the thread and immediately begin running it.
+ iree_thread_t* thread = nullptr;
+ IREE_ASSERT_OK(iree_thread_create(entry_fn, &entry_data, params,
+ iree_allocator_system(), &thread));
+ EXPECT_NE(0, iree_thread_id(thread));
+
+ // Wait for the thread to finish.
+ iree_notification_await(
+ &entry_data.barrier,
+ +[](void* entry_arg) -> bool {
+ auto* entry_data = reinterpret_cast<struct entry_data_t*>(entry_arg);
+ return iree_atomic_load_int32(&entry_data->value,
+ iree_memory_order_relaxed) == (123 + 1);
+ },
+ &entry_data, iree_infinite_timeout());
+
+ // By holding on to the thread object and releasing it here after the thread
+ // has finished, we ensure that destruction occurs on the main thread,
+ // avoiding data races reported by TSan.
+ iree_thread_release(thread);
+ iree_notification_deinitialize(&entry_data.barrier);
+}
+
+TEST(ThreadTest, CreateSuspended) {
+ iree_thread_create_params_t params;
+ memset(¶ms, 0, sizeof(params));
+ params.create_suspended = true;
+
+ struct entry_data_t {
+ iree_atomic_int32_t value;
+ iree_notification_t barrier;
+ } entry_data;
+ iree_atomic_store_int32(&entry_data.value, 123, iree_memory_order_relaxed);
+ iree_notification_initialize(&entry_data.barrier);
+ iree_thread_entry_t entry_fn = +[](void* entry_arg) -> int {
+ auto* entry_data = reinterpret_cast<struct entry_data_t*>(entry_arg);
+ iree_atomic_fetch_add_int32(&entry_data->value, 1,
+ iree_memory_order_acq_rel);
+ iree_notification_post(&entry_data->barrier, IREE_ALL_WAITERS);
+ return 0;
+ };
+
+ iree_thread_t* thread = nullptr;
+ IREE_ASSERT_OK(iree_thread_create(entry_fn, &entry_data, params,
+ iree_allocator_system(), &thread));
+ EXPECT_NE(0, iree_thread_id(thread));
+
+ // NOTE: the thread will not be running and we should not expect a change in
+ // the value. I can't think of a good way to test this, though, so we'll just
+ // wait a moment here and assume that if the thread was able to run it would
+ // have during this wait.
+ ASSERT_EQ(123, iree_atomic_load_int32(&entry_data.value,
+ iree_memory_order_seq_cst));
+ std::this_thread::sleep_for(std::chrono::milliseconds(150));
+ ASSERT_EQ(123, iree_atomic_load_int32(&entry_data.value,
+ iree_memory_order_seq_cst));
+
+ // Resume the thread and wait for it to finish its work.
+ iree_thread_resume(thread);
+ iree_notification_await(
+ &entry_data.barrier,
+ +[](void* entry_arg) -> bool {
+ auto* entry_data = reinterpret_cast<struct entry_data_t*>(entry_arg);
+ return iree_atomic_load_int32(&entry_data->value,
+ iree_memory_order_relaxed) == (123 + 1);
+ },
+ &entry_data, iree_infinite_timeout());
+ iree_notification_deinitialize(&entry_data.barrier);
+ iree_thread_release(thread);
+}
+
+// NOTE: testing whether priority took effect is really hard given that on
+// certain platforms the priority may not be respected or may be clamped by
+// the system. This is here to test the mechanics of the priority override code
+// on our side and assumes that if we tell the OS something it respects it.
+TEST(ThreadTest, PriorityOverride) {
+ iree_thread_create_params_t params;
+ memset(¶ms, 0, sizeof(params));
+
+ struct entry_data_t {
+ iree_atomic_int32_t value;
+ iree_notification_t barrier;
+ } entry_data;
+ iree_atomic_store_int32(&entry_data.value, 0, iree_memory_order_relaxed);
+ iree_notification_initialize(&entry_data.barrier);
+ iree_thread_entry_t entry_fn = +[](void* entry_arg) -> int {
+ auto* entry_data = reinterpret_cast<struct entry_data_t*>(entry_arg);
+ iree_atomic_fetch_add_int32(&entry_data->value, 1,
+ iree_memory_order_acq_rel);
+ iree_notification_post(&entry_data->barrier, IREE_ALL_WAITERS);
+ return 0;
+ };
+
+ iree_thread_t* thread = nullptr;
+ IREE_ASSERT_OK(iree_thread_create(entry_fn, &entry_data, params,
+ iree_allocator_system(), &thread));
+ EXPECT_NE(0, iree_thread_id(thread));
+
+ // Push a few overrides.
+ iree_thread_override_t* override0 = iree_thread_priority_class_override_begin(
+ thread, IREE_THREAD_PRIORITY_CLASS_HIGH);
+ EXPECT_NE(nullptr, override0);
+ iree_thread_override_t* override1 = iree_thread_priority_class_override_begin(
+ thread, IREE_THREAD_PRIORITY_CLASS_HIGHEST);
+ EXPECT_NE(nullptr, override1);
+ iree_thread_override_t* override2 = iree_thread_priority_class_override_begin(
+ thread, IREE_THREAD_PRIORITY_CLASS_LOWEST);
+ EXPECT_NE(nullptr, override2);
+
+ // Wait for the thread to finish.
+ iree_notification_await(
+ &entry_data.barrier,
+ +[](void* entry_arg) -> bool {
+ auto* entry_data = reinterpret_cast<struct entry_data_t*>(entry_arg);
+ return iree_atomic_load_int32(&entry_data->value,
+ iree_memory_order_relaxed) == 1;
+ },
+ &entry_data, iree_infinite_timeout());
+ iree_notification_deinitialize(&entry_data.barrier);
+
+ // Pop overrides (in opposite order intentionally).
+ iree_thread_override_end(override0);
+ iree_thread_override_end(override1);
+ iree_thread_override_end(override2);
+
+ iree_thread_release(thread);
+}
+
+//==============================================================================
+// iree_thread_override_list_t
+//==============================================================================
+// This is an implementation detail but useful to test on its own as it's shared
+// across several platform implementations.
+
+TEST(ThreadOverrideListTest, PriorityClass) {
+ static iree_thread_t* kThreadSentinel =
+ reinterpret_cast<iree_thread_t*>(0x123);
+ static iree_thread_priority_class_t current_priority_class =
+ IREE_THREAD_PRIORITY_CLASS_NORMAL;
+ iree_thread_override_list_t list;
+ iree_thread_override_list_initialize(
+ +[](iree_thread_t* thread, iree_thread_priority_class_t priority_class) {
+ EXPECT_EQ(kThreadSentinel, thread);
+ EXPECT_NE(current_priority_class, priority_class);
+ current_priority_class = priority_class;
+ },
+ current_priority_class, iree_allocator_system(), &list);
+
+ // (NORMAL) -> HIGH -> [ignored LOW] -> HIGHEST
+ ASSERT_EQ(IREE_THREAD_PRIORITY_CLASS_NORMAL, current_priority_class);
+ iree_thread_override_t* override0 = iree_thread_override_list_add(
+ &list, kThreadSentinel, IREE_THREAD_PRIORITY_CLASS_HIGH);
+ EXPECT_NE(nullptr, override0);
+ ASSERT_EQ(IREE_THREAD_PRIORITY_CLASS_HIGH, current_priority_class);
+ iree_thread_override_t* override1 = iree_thread_override_list_add(
+ &list, kThreadSentinel, IREE_THREAD_PRIORITY_CLASS_LOW);
+ EXPECT_NE(nullptr, override1);
+ ASSERT_EQ(IREE_THREAD_PRIORITY_CLASS_HIGH, current_priority_class);
+ iree_thread_override_t* override2 = iree_thread_override_list_add(
+ &list, kThreadSentinel, IREE_THREAD_PRIORITY_CLASS_HIGHEST);
+ EXPECT_NE(nullptr, override2);
+ ASSERT_EQ(IREE_THREAD_PRIORITY_CLASS_HIGHEST, current_priority_class);
+
+ // Out of order to ensure highest bit sticks:
+ ASSERT_EQ(IREE_THREAD_PRIORITY_CLASS_HIGHEST, current_priority_class);
+ iree_thread_override_remove_self(override1);
+ ASSERT_EQ(IREE_THREAD_PRIORITY_CLASS_HIGHEST, current_priority_class);
+ iree_thread_override_remove_self(override0);
+ ASSERT_EQ(IREE_THREAD_PRIORITY_CLASS_HIGHEST, current_priority_class);
+ iree_thread_override_remove_self(override2);
+ ASSERT_EQ(IREE_THREAD_PRIORITY_CLASS_NORMAL, current_priority_class);
+
+ iree_thread_override_list_deinitialize(&list);
+}
+
+} // namespace
diff --git a/runtime/src/iree/base/internal/threading_win32.c b/runtime/src/iree/base/internal/threading_win32.c
new file mode 100644
index 0000000..6e550e3
--- /dev/null
+++ b/runtime/src/iree/base/internal/threading_win32.c
@@ -0,0 +1,328 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// clang-format off: must be included before all other headers.
+#include "iree/base/internal/threading_impl.h"
+// clang-format on
+
+#if defined(IREE_PLATFORM_WINDOWS)
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/atomics.h"
+#include "iree/base/internal/threading.h"
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+
+// Great documentation:
+// https://www.microsoftpressstore.com/articles/article.aspx?p=2233328
+
+struct iree_thread_t {
+ iree_atomic_ref_count_t ref_count;
+ iree_allocator_t allocator;
+
+ char name[16];
+ HANDLE handle;
+ DWORD id;
+
+ iree_thread_entry_t entry;
+ void* entry_arg;
+
+ iree_atomic_int32_t is_suspended;
+
+ // Thread-safe (has its own synchronization).
+ iree_thread_override_list_t qos_override_list;
+};
+
+static void iree_thread_set_priority_class(
+ iree_thread_t* thread, iree_thread_priority_class_t priority_class);
+
+// Sets the thread's name to the given NUL-terminated string.
+//
+// See:
+// https://docs.microsoft.com/en-us/visualstudio/debugger/how-to-set-a-thread-name-in-native-code
+static void iree_thread_set_name(HANDLE handle, const char* name) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Try first to use the modern SetThreadDescription API.
+ // This will work even if a debugger is not attached meaning that tools that
+ // don't use the debugger API can still query thread names. It's only
+ // available on Win10+.
+ typedef HRESULT(WINAPI * SetThreadDescriptionFn)(HANDLE hThread,
+ PCWSTR lpThreadDescription);
+ SetThreadDescriptionFn pSetThreadDescription =
+ (SetThreadDescriptionFn)GetProcAddress(GetModuleHandleW(L"Kernel32.dll"),
+ "SetThreadDescription");
+ if (pSetThreadDescription) {
+ wchar_t name_wide[16] = {0};
+ MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, name, -1, name_wide,
+ IREE_ARRAYSIZE(name_wide) - 1);
+ pSetThreadDescription(handle, name_wide);
+ IREE_TRACE_ZONE_END(z0);
+ return;
+ }
+
+ if (!IsDebuggerPresent()) {
+ // The name is only captured if a debugger is attached so we can avoid
+ // doing any of the work if none is present. This means that a debugger
+ // attached to the process after thread creation won't see thread names but
+ // that's a rare case anyway.
+ IREE_TRACE_ZONE_END(z0);
+ return;
+ }
+
+#pragma pack(push, 8)
+ struct THREADNAME_INFO {
+ DWORD dwType; // Must be 0x1000.
+ LPCSTR szName; // Pointer to name (in user addr space).
+ DWORD dwThreadID; // Thread ID (-1=caller thread).
+ DWORD dwFlags; // Reserved for future use, must be zero.
+ };
+#pragma pack(pop)
+
+#pragma warning(push)
+#pragma warning(disable : 6320 6322)
+ struct THREADNAME_INFO info;
+ info.dwType = 0x1000;
+ info.szName = name;
+ info.dwThreadID = GetThreadId(handle);
+ info.dwFlags = 0;
+ __try {
+ RaiseException(0x406D1388u, 0, sizeof(info) / sizeof(ULONG_PTR),
+ (ULONG_PTR*)(&info));
+ } __except (EXCEPTION_EXECUTE_HANDLER) {
+ }
+#pragma warning(pop)
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static DWORD WINAPI iree_thread_start_routine(LPVOID param) {
+ // NOTE: we own a reference to the thread handle so that the creation
+ // thread can't delete this out from under us.
+ iree_thread_t* thread = (iree_thread_t*)param;
+
+ // Set the thread name used by tracy (which must be called on the thread).
+ IREE_TRACE_SET_THREAD_NAME(thread->name);
+
+ // "Consume" the entry info so that we don't see it again (as we don't own
+ // its lifetime).
+ iree_thread_entry_t entry = thread->entry;
+ void* entry_arg = thread->entry_arg;
+ thread->entry = NULL;
+ thread->entry_arg = NULL;
+
+ // Release our ownership of the thread handle. If the creating thread doesn't
+ // want it this will free the memory and fully detach the thread.
+ iree_thread_release(thread);
+
+ // Call the user thread entry point function.
+ // Note that this can be a tail-call which saves a stack frame in all threads
+ // (which is really just to make call stacks in debuggers much cleaner).
+ return (DWORD)entry(entry_arg);
+}
+
+iree_status_t iree_thread_create(iree_thread_entry_t entry, void* entry_arg,
+ iree_thread_create_params_t params,
+ iree_allocator_t allocator,
+ iree_thread_t** out_thread) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Allocate our thread struct; we'll use it to shuttle params into the thread
+ // (including the user-specified entry_arg).
+ iree_thread_t* thread = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(allocator, sizeof(*thread), (void**)&thread);
+ if (!iree_status_is_ok(status)) {
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+ iree_atomic_ref_count_init(&thread->ref_count);
+ thread->allocator = allocator;
+ thread->entry = entry;
+ thread->entry_arg = entry_arg;
+ strncpy_s(thread->name, IREE_ARRAYSIZE(thread->name), params.name.data,
+ min(params.name.size, IREE_ARRAYSIZE(thread->name) - 1));
+ iree_atomic_store_int32(&thread->is_suspended,
+ params.create_suspended ? 1 : 0,
+ iree_memory_order_relaxed);
+ iree_thread_override_list_initialize(iree_thread_set_priority_class,
+ params.priority_class, thread->allocator,
+ &thread->qos_override_list);
+
+ // Retain the thread for the thread itself; this way if the caller immediately
+ // releases the iree_thread_t handle the thread won't explode.
+ iree_thread_retain(thread);
+ *out_thread = thread;
+
+ // Create the thread either suspended or running as the user requested.
+ {
+ IREE_TRACE_ZONE_BEGIN_NAMED(z1, "CreateThread");
+ thread->handle = CreateThread(
+ NULL, params.stack_size, iree_thread_start_routine, thread,
+ params.create_suspended ? CREATE_SUSPENDED : 0, &thread->id);
+ IREE_TRACE_ZONE_END(z1);
+ }
+ if (thread->handle == INVALID_HANDLE_VALUE) {
+ iree_thread_release(thread); // for self
+ iree_thread_release(thread); // for caller
+ *out_thread = NULL;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(IREE_STATUS_INTERNAL,
+ "thread creation failed with %lu", GetLastError());
+ }
+
+ // Immediately set thread properties before resuming (so that we don't
+ // start on the wrong core/at the wrong priority).
+ if (!iree_string_view_is_empty(params.name)) {
+ iree_thread_set_name(thread->handle, thread->name);
+ }
+ if (params.priority_class != IREE_THREAD_PRIORITY_CLASS_NORMAL) {
+ iree_thread_set_priority_class(thread, params.priority_class);
+ }
+ if (params.initial_affinity.specified) {
+ iree_thread_request_affinity(thread, params.initial_affinity);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void iree_thread_delete(iree_thread_t* thread) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_thread_resume(thread);
+
+ if (thread->id != GetCurrentThreadId()) {
+ // Join with the thread. Since threads can delete themselves we must ensure
+ // they don't try to join with themselves and deadlock.
+ WaitForSingleObject(thread->handle, INFINITE);
+ }
+ CloseHandle(thread->handle);
+ iree_thread_override_list_deinitialize(&thread->qos_override_list);
+ iree_allocator_free(thread->allocator, thread);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_thread_retain(iree_thread_t* thread) {
+ if (thread) {
+ iree_atomic_ref_count_inc(&thread->ref_count);
+ }
+}
+
+void iree_thread_release(iree_thread_t* thread) {
+ if (thread && iree_atomic_ref_count_dec(&thread->ref_count) == 1) {
+ iree_thread_delete(thread);
+ }
+}
+
+uintptr_t iree_thread_id(iree_thread_t* thread) {
+ return (uintptr_t)thread->id;
+}
+
+// Sets the thread priority to the given |priority_class| immediately.
+static void iree_thread_set_priority_class(
+ iree_thread_t* thread, iree_thread_priority_class_t priority_class) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ DWORD priority = THREAD_PRIORITY_NORMAL;
+ switch (priority_class) {
+ case IREE_THREAD_PRIORITY_CLASS_LOWEST:
+ priority = THREAD_PRIORITY_LOWEST;
+ break;
+ case IREE_THREAD_PRIORITY_CLASS_LOW:
+ priority = THREAD_PRIORITY_BELOW_NORMAL;
+ break;
+ case IREE_THREAD_PRIORITY_CLASS_NORMAL:
+ priority = THREAD_PRIORITY_NORMAL;
+ break;
+ case IREE_THREAD_PRIORITY_CLASS_HIGH:
+ priority = THREAD_PRIORITY_ABOVE_NORMAL;
+ break;
+ case IREE_THREAD_PRIORITY_CLASS_HIGHEST:
+ priority = THREAD_PRIORITY_HIGHEST;
+ break;
+ }
+ SetThreadPriority(thread->handle, priority);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_thread_override_t* iree_thread_priority_class_override_begin(
+ iree_thread_t* thread, iree_thread_priority_class_t priority_class) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_thread_override_t* override = iree_thread_override_list_add(
+ &thread->qos_override_list, thread, priority_class);
+ IREE_TRACE_ZONE_END(z0);
+ return override;
+}
+
+void iree_thread_override_end(iree_thread_override_t* override) {
+ if (!override) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_thread_override_remove_self(override);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_thread_request_affinity(iree_thread_t* thread,
+ iree_thread_affinity_t affinity) {
+ if (!affinity.specified) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ char affinity_desc[32];
+ int affinity_desc_length = snprintf(
+ affinity_desc, IREE_ARRAYSIZE(affinity_desc), "group=%d, id=%d, smt=%d",
+ affinity.group, affinity.id, affinity.smt);
+ IREE_TRACE_ZONE_APPEND_TEXT_STRING_VIEW(z0, affinity_desc,
+ affinity_desc_length);
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+ GROUP_AFFINITY group_affinity;
+ memset(&group_affinity, 0, sizeof(group_affinity));
+ group_affinity.Group = affinity.group;
+ KAFFINITY affinity_mask = 1ull << affinity.id;
+ if (affinity.smt) {
+ affinity_mask |= 1ull << (affinity.id + 1);
+ }
+ group_affinity.Mask = affinity_mask;
+ SetThreadGroupAffinity(thread->handle, &group_affinity, NULL);
+
+ // TODO(benvanik): figure out of this is a bad thing; sometimes it can result
+ // in the scheduler alternating cores within the affinity mask; in theory it's
+ // just an SMT ID change and doesn't have any impact on caches but it'd be
+ // good to check.
+ PROCESSOR_NUMBER ideal_processor;
+ memset(&ideal_processor, 0, sizeof(ideal_processor));
+ ideal_processor.Group = affinity.group;
+ ideal_processor.Number = affinity.id;
+ SetThreadIdealProcessorEx(thread->handle, &ideal_processor, NULL);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_thread_resume(iree_thread_t* thread) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // NOTE: we don't track the suspend/resume depth here because we don't
+ // expose suspend as an operation (yet). If we did we'd want to make sure we
+ // always balance suspend/resume or else we'll mess with any
+ // debuggers/profilers that may be suspending threads for their own uses.
+ int32_t expected = 1;
+ if (iree_atomic_compare_exchange_strong_int32(
+ &thread->is_suspended, &expected, 0, iree_memory_order_seq_cst,
+ iree_memory_order_seq_cst)) {
+ ResumeThread(thread->handle);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+#endif // IREE_PLATFORM_WINDOWS
diff --git a/runtime/src/iree/base/internal/wait_handle.c b/runtime/src/iree/base/internal/wait_handle.c
new file mode 100644
index 0000000..b3e1ed3
--- /dev/null
+++ b/runtime/src/iree/base/internal/wait_handle.c
@@ -0,0 +1,102 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/wait_handle.h"
+
+#include <string.h>
+
+//===----------------------------------------------------------------------===//
+// iree_wait_handle_t
+//===----------------------------------------------------------------------===//
+
+void iree_wait_handle_wrap_primitive(
+ iree_wait_primitive_type_t primitive_type,
+ iree_wait_primitive_value_t primitive_value,
+ iree_wait_handle_t* out_handle) {
+ memset(out_handle, 0, sizeof(*out_handle));
+ out_handle->type = primitive_type;
+ out_handle->value = primitive_value;
+}
+
+void iree_wait_handle_deinitialize(iree_wait_handle_t* handle) {
+ memset(handle, 0, sizeof(*handle));
+}
+
+iree_status_t iree_wait_handle_ctl(iree_wait_source_t wait_source,
+ iree_wait_source_command_t command,
+ const void* params, void** inout_ptr) {
+ iree_wait_handle_t* wait_handle = iree_wait_handle_from_source(&wait_source);
+ switch (command) {
+ case IREE_WAIT_SOURCE_COMMAND_QUERY: {
+ iree_status_code_t* out_wait_status_code = (iree_status_code_t*)inout_ptr;
+ if (iree_wait_handle_is_immediate(*wait_handle)) {
+ // Immediately resolved.
+ *out_wait_status_code = IREE_STATUS_OK;
+ return iree_ok_status();
+ } else {
+ // Poll the handle: a deadline exceeded indicates unresolved.
+ iree_status_t status =
+ iree_wait_one(wait_handle, IREE_TIME_INFINITE_PAST);
+ if (iree_status_is_deadline_exceeded(status)) {
+ *out_wait_status_code = IREE_STATUS_DEFERRED;
+ return iree_status_ignore(status);
+ }
+ return status;
+ }
+ }
+ case IREE_WAIT_SOURCE_COMMAND_WAIT_ONE: {
+ // Wait for the handle.
+ return iree_wait_one(
+ wait_handle,
+ iree_timeout_as_deadline_ns(
+ ((const iree_wait_source_wait_params_t*)params)->timeout));
+ }
+ case IREE_WAIT_SOURCE_COMMAND_EXPORT: {
+ iree_wait_primitive_type_t target_type =
+ ((const iree_wait_source_export_params_t*)params)->target_type;
+ if (target_type != IREE_WAIT_PRIMITIVE_TYPE_ANY &&
+ target_type != wait_handle->type) {
+ return iree_make_status(
+ IREE_STATUS_UNAVAILABLE,
+ "requested wait primitive type %d is unavailable; have %d",
+ (int)target_type, (int)wait_handle->type);
+ }
+ iree_wait_primitive_t* out_wait_primitive =
+ (iree_wait_primitive_t*)inout_ptr;
+ out_wait_primitive->type = wait_handle->type;
+ out_wait_primitive->value = wait_handle->value;
+ return iree_ok_status();
+ }
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unimplemented wait_source command");
+ }
+}
+
+IREE_API_EXPORT iree_status_t iree_wait_source_import(
+ iree_wait_primitive_t wait_primitive, iree_wait_source_t* out_wait_source) {
+ if (iree_wait_primitive_is_immediate(wait_primitive)) {
+ *out_wait_source = iree_wait_source_immediate();
+ } else {
+ iree_wait_handle_t* wait_handle =
+ (iree_wait_handle_t*)out_wait_source->storage;
+ iree_wait_handle_wrap_primitive(wait_primitive.type, wait_primitive.value,
+ wait_handle);
+ out_wait_source->ctl = iree_wait_handle_ctl;
+ }
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_event_t
+//===----------------------------------------------------------------------===//
+
+iree_wait_source_t iree_event_await(iree_event_t* event) {
+ iree_wait_source_t wait_source;
+ memcpy(wait_source.storage, event, sizeof(*event));
+ wait_source.ctl = iree_wait_handle_ctl;
+ return wait_source;
+}
diff --git a/runtime/src/iree/base/internal/wait_handle.h b/runtime/src/iree/base/internal/wait_handle.h
new file mode 100644
index 0000000..b173e7c
--- /dev/null
+++ b/runtime/src/iree/base/internal/wait_handle.h
@@ -0,0 +1,243 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_WAIT_HANDLE_H_
+#define IREE_BASE_INTERNAL_WAIT_HANDLE_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_wait_handle_t
+//===----------------------------------------------------------------------===//
+
+// Non-owning handle reference to a waitable object.
+// TODO(benvanik): packing to ensure we are getting the expected alignments.
+typedef struct iree_wait_handle_t {
+ union {
+ // Used by iree_wait_set_t storage to track the number of duplicate
+ // instances of a particular handle within the set to avoid needing to store
+ // them all separately. A dupe_count of 0 means there is one unique handle.
+ uint32_t dupe_count : 16;
+ // Used by iree_wait_any and iree_wait_set_erase to optimize the
+ // wait-wake-erase pattern by avoiding the need to scan the internal storage
+ // list to erase a handle.
+ uint32_t index : 16;
+ // (3 bytes total available)
+ uint8_t storage[3];
+ } set_internal;
+ // Inlined iree_wait_primitive_t to get better packing:
+ iree_wait_primitive_type_t type; // uint8_t
+ iree_wait_primitive_value_t value;
+} iree_wait_handle_t;
+static_assert(sizeof(iree_wait_handle_t) <= sizeof(uint64_t) * 2,
+ "iree_wait_handle_t must fit in 16-bytes so it can be stored in "
+ "other data structures");
+
+// Returns a wait handle that is immediately resolved.
+static inline iree_wait_handle_t iree_wait_handle_immediate(void) {
+ iree_wait_handle_t wait_handle;
+ memset(&wait_handle, 0, sizeof(wait_handle));
+ return wait_handle;
+}
+
+// Returns true if the wait |handle| is resolved immediately (empty).
+static inline bool iree_wait_handle_is_immediate(iree_wait_handle_t handle) {
+ return handle.type == IREE_WAIT_PRIMITIVE_TYPE_NONE;
+}
+
+// Initializes a wait handle with the given primitive type and value.
+// Wait handles do not retain the provided primitives and they must be kept
+// valid (allocated and open) for the duration any wait handle references them.
+void iree_wait_handle_wrap_primitive(
+ iree_wait_primitive_type_t primitive_type,
+ iree_wait_primitive_value_t primitive_value,
+ iree_wait_handle_t* out_handle);
+
+// Deinitializes a wait handle.
+// Note that wait handles do not retain the underlying wait primitive and
+// deinitializing a handle will not close the resource.
+void iree_wait_handle_deinitialize(iree_wait_handle_t* handle);
+
+// Closes a wait handle and resets |handle|.
+void iree_wait_handle_close(iree_wait_handle_t* handle);
+
+// iree_wait_source_t control function.
+iree_status_t iree_wait_handle_ctl(iree_wait_source_t wait_source,
+ iree_wait_source_command_t command,
+ const void* params, void** inout_ptr);
+
+// Returns a pointer to the wait handle in |wait_source| if it is using
+// iree_wait_handle_ctl and otherwise NULL.
+static inline iree_wait_handle_t* iree_wait_handle_from_source(
+ iree_wait_source_t* wait_source) {
+ return wait_source->ctl == iree_wait_handle_ctl
+ ? (iree_wait_handle_t*)wait_source->storage
+ : NULL;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_wait_set_t
+//===----------------------------------------------------------------------===//
+
+// A platform-specific cache of wait handles that can be multi-waited.
+// By caching callers don't need to build the list each wait and implementations
+// can store acceleration information or kernel API data structures and either
+// optimize or make compliant sets such as by deduplicating or sorting by
+// primitive type to perform a multi-api muli-wait.
+//
+// Certain handle types may also gain benefits: when syncfile is used we can use
+// sync_merge to coalesce wait handles when performing a wait-all on multiple
+// handles.
+//
+// This cache shines when handles are persistent (such as sockets/eventfds/etc)
+// and the set will rarely be changing relative to how many times it will be
+// waited on. It's not as optimal in the cases of one-shot waits on small
+// numbers of handles but those are also the cases where the set overhead is
+// small (2 set insertions all touching hot cache lines is fine) and we gain
+// the benefits of a unified code path and nice error handling/validation.
+//
+// Thread-compatible; only one thread may be manipulating or waiting on a
+// particular set at any time.
+typedef struct iree_wait_set_t iree_wait_set_t;
+
+// Allocates a wait set with the maximum |capacity| of unique handles.
+iree_status_t iree_wait_set_allocate(iree_host_size_t capacity,
+ iree_allocator_t allocator,
+ iree_wait_set_t** out_set);
+
+// Frees a wait set. The wait set must not be being waited on.
+void iree_wait_set_free(iree_wait_set_t* set);
+
+// Returns true if there are no handles registered with the set.
+bool iree_wait_set_is_empty(const iree_wait_set_t* set);
+
+// Inserts a wait handle into the set.
+// If the handle is already in the set it will be reference counted such that a
+// matching number of iree_wait_set_erase calls are required.
+iree_status_t iree_wait_set_insert(iree_wait_set_t* set,
+ iree_wait_handle_t handle);
+
+// Erases a single instance of a wait handle from the set.
+// Decrements the reference count; if the same handle was inserted multiple
+// times then it may still remain in the set after the call returns.
+void iree_wait_set_erase(iree_wait_set_t* set, iree_wait_handle_t handle);
+
+// Clears all handles from the wait set.
+void iree_wait_set_clear(iree_wait_set_t* set);
+
+// TODO(benvanik): signal/interrupt API to make a wait set wake up.
+// Can be implemented with signals/QueueUserAPC/etc. The workaround is that the
+// caller will need to create their own events to add to the set where for
+// transient wakes we could avoid that extra overhead.
+
+// Blocks the caller until all of the passed wait handles are signaled or the
+// |deadline_ns| elapses.
+//
+// A deadline of IREE_DURATION_ZERO will act as a poll and not block the caller.
+// IREE_DURATION_INFINITE can be used to block until signaled.
+//
+// Returns success if all handles were signaled either prior to the call or
+// during the wait.
+//
+// Returns IREE_STATUS_DEADLINE_EXCEEDED if the deadline elapses without all
+// handles having been signaled. Note that zero or more handles may have
+// actually signaled even if the deadline is exceeded (such as if they signal
+// while the waiting thread is resuming from the failed wait).
+//
+// iree_wait_set_t is thread-compatible; only one thread may be manipulating or
+// waiting on a set at any time.
+iree_status_t iree_wait_all(iree_wait_set_t* set, iree_time_t deadline_ns);
+
+// Blocks the caller until at least one of the handles is signaled or the
+// |deadline_ns| elapses.
+//
+// A deadline of IREE_TIME_INFINITE_PAST will act as a poll and not block the
+// caller. IREE_TIME_INFINITE_FUTURE can be used to block until signaled.
+//
+// Returns success if all handles were signaled either prior to the call or
+// during the wait. A handle of one of the signaled handles will be returned in
+// the optional |out_wake_handle| argument; note however that one or more
+// handles may have signaled and which handle is returned is unspecified.
+// Callers are expected to use the handle to short-circuit scanning the handles
+// list but if a full scan is going to happen regardless it can be ignored.
+//
+// |out_wake_handle| contains an optimization for wait-wake-erase set
+// operations; it is cheap to pass the woken handle to iree_wait_set_erase if
+// there are no interleaving operations that change the set layout.
+//
+// Returns IREE_STATUS_DEADLINE_EXCEEDED if the deadline elapses without any
+// handle having been signaled.
+//
+// iree_wait_set_t is thread-compatible; only one thread may be manipulating or
+// waiting on a set at any time.
+iree_status_t iree_wait_any(iree_wait_set_t* set, iree_time_t deadline_ns,
+ iree_wait_handle_t* out_wake_handle);
+
+// Blocks the caller until the given wait handle is signaled or |deadline_ns|
+// elapses. This is functionally equivalent to iree_wait_any/iree_wait_all used
+// on a set with a single handle in it but depending on the implementation may
+// not require additional allocations/state tracking.
+//
+// A deadline of IREE_TIME_INFINITE_PAST will act as a poll and not block the
+// caller. IREE_TIME_INFINITE_FUTURE can be used to block until signaled.
+//
+// Returns success if the handle was signaled either prior to the call or
+// during the wait.
+//
+// Returns IREE_STATUS_DEADLINE_EXCEEDED if the deadline elapses without the
+// handle having been signaled.
+iree_status_t iree_wait_one(iree_wait_handle_t* handle,
+ iree_time_t deadline_ns);
+
+//===----------------------------------------------------------------------===//
+// iree_event_t
+//===----------------------------------------------------------------------===//
+
+// A manual reset event (aka binary semaphore).
+// https://docs.microsoft.com/en-us/windows/win32/sync/event-objects
+//
+// Events are much heavier than iree_notification_t but are waitable objects
+// that can be passed to iree_wait_all/iree_wait_any. Prefer iree_notification_t
+// when multiwaiting is not required.
+//
+// Which primitive is used will depend on the current platform.
+typedef iree_wait_handle_t iree_event_t;
+
+// Initializes an event in either the signaled or unsignaled state.
+// The event must be closed with iree_event_deinitialize.
+iree_status_t iree_event_initialize(bool initial_state,
+ iree_event_t* out_event);
+
+// Deinitializes an event.
+void iree_event_deinitialize(iree_event_t* event);
+
+// Sets the event object to the signaled state.
+// The event stays signaled until iree_event_reset is called. Multiple waiters
+// will be woken and attempted waits while the event is set will succeed
+// immediately.
+void iree_event_set(iree_event_t* event);
+
+// Resets the event object to the unsignaled state.
+// Resetting an event that is already reset has no effect.
+void iree_event_reset(iree_event_t* event);
+
+// Returns a wait_source reference to |event|.
+// The event must be kept live for as long as the reference is live.
+iree_wait_source_t iree_event_await(iree_event_t* event);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_INTERNAL_WAIT_HANDLE_H_
diff --git a/runtime/src/iree/base/internal/wait_handle_epoll.c b/runtime/src/iree/base/internal/wait_handle_epoll.c
new file mode 100644
index 0000000..0bd08d6
--- /dev/null
+++ b/runtime/src/iree/base/internal/wait_handle_epoll.c
@@ -0,0 +1,66 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// NOTE: must be first to ensure that we can define settings for all includes.
+#include "iree/base/internal/wait_handle_impl.h"
+
+#if IREE_WAIT_API == IREE_WAIT_API_EPOLL
+
+#include "iree/base/internal/wait_handle_posix.h"
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// iree_wait_set_t
+//===----------------------------------------------------------------------===//
+
+// TODO(benvanik): iree_wait_set_s using an epoll fd.
+// epoll lets us route the wait set operations right to kernel and not need our
+// own duplicate data structure. epoll is great, just not available on mac/ios
+// so we still need poll for that. linux/android/bsd all have epoll, though.
+struct iree_wait_set_t {
+ // NOTE: we could in theory use the epoll handle directly (iree_wait_set_s
+ // then is just a pointer). Then allocate/free just go straight to the system.
+ int reserved;
+};
+
+iree_status_t iree_wait_set_allocate(iree_host_size_t capacity,
+ iree_allocator_t allocator,
+ iree_wait_set_t** out_set) {
+ // TODO(benvanik): epoll_create()
+}
+
+void iree_wait_set_free(iree_wait_set_t* set) {
+ // TODO(benvanik): close()
+}
+
+iree_status_t iree_wait_set_insert(iree_wait_set_t* set,
+ iree_wait_handle_t handle) {
+ // TODO(benvanik): epoll_ctl(EPOLL_CTL_ADD)
+}
+
+void iree_wait_set_erase(iree_wait_set_t* set, iree_wait_handle_t handle) {
+ // TODO(benvanik): epoll_ctl(EPOLL_CTL_DEL)
+}
+
+void iree_wait_set_clear(iree_wait_set_t* set) {
+ // TODO(benvanik): close and reopen?
+}
+
+iree_status_t iree_wait_all(iree_wait_set_t* set, iree_time_t deadline_ns) {
+ // TODO(benvanik): epoll_wait
+}
+
+iree_status_t iree_wait_any(iree_wait_set_t* set, iree_time_t deadline_ns,
+ iree_wait_handle_t* out_wake_handle) {
+ // TODO(benvanik): epoll_wait
+}
+
+iree_status_t iree_wait_one(iree_wait_handle_t* handle,
+ iree_time_t deadline_ns) {
+ // TODO(benvanik): just use poll?
+}
+
+#endif // IREE_WAIT_API == IREE_WAIT_API_EPOLL
diff --git a/runtime/src/iree/base/internal/wait_handle_impl.h b/runtime/src/iree/base/internal/wait_handle_impl.h
new file mode 100644
index 0000000..b22ba78
--- /dev/null
+++ b/runtime/src/iree/base/internal/wait_handle_impl.h
@@ -0,0 +1,86 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_WAIT_HANDLE_IMPL_H_
+#define IREE_BASE_INTERNAL_WAIT_HANDLE_IMPL_H_
+
+//===----------------------------------------------------------------------===//
+// Platform overrides
+//===----------------------------------------------------------------------===//
+// NOTE: this must come first prior to any local/system includes!
+
+// Ensure that any posix header we include exposes GNU stuff. Ignored on
+// platforms where we either don't have the GNU stuff or don't have posix
+// headers at all.
+//
+// Note that this does not need to be the same for all compilation units, only
+// those we want to access the non-portable features in. It *must* be defined
+// prior to including any of the files, though, as otherwise header-guards will
+// cause the setting at the time of first inclusion to win.
+//
+// https://stackoverflow.com/a/5583764
+#define _GNU_SOURCE 1
+
+//===----------------------------------------------------------------------===//
+// Active wait API implementation selection (wait_handle_*.c)
+//===----------------------------------------------------------------------===//
+
+#include "iree/base/config.h"
+#include "iree/base/target_platform.h"
+
+// NOTE: order matters; priorities are (kqueue|epoll) > ppoll > poll.
+// When overridden with NULL (no platform primitives) or on Win32 we always use
+// those implementations (today).
+#define IREE_WAIT_API_NULL 0
+#define IREE_WAIT_API_INPROC 1
+#define IREE_WAIT_API_WIN32 2
+#define IREE_WAIT_API_POLL 3
+#define IREE_WAIT_API_PPOLL 4
+#define IREE_WAIT_API_EPOLL 5
+#define IREE_WAIT_API_KQUEUE 6
+
+// We allow overriding the wait API via command line flags. If unspecified we
+// try to guess based on the target platform.
+#if !defined(IREE_WAIT_API)
+
+// NOTE: we could be tighter here, but we today only have win32 or not-win32.
+#if IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+#define IREE_WAIT_API IREE_WAIT_API_NULL
+#elif defined(IREE_PLATFORM_GENERIC) || defined(IREE_PLATFORM_EMSCRIPTEN)
+#define IREE_WAIT_API IREE_WAIT_API_INPROC
+#elif defined(IREE_PLATFORM_WINDOWS)
+#define IREE_WAIT_API IREE_WAIT_API_WIN32 // WFMO used in wait_handle_win32.c
+#else
+// TODO(benvanik): EPOLL on android/linux/bsd/etc.
+// TODO(benvanik): KQUEUE on mac/ios.
+// KQUEUE is not implemented yet. Use POLL for mac/ios
+// Android ppoll requires API version >= 21
+#if !defined(IREE_PLATFORM_APPLE) && \
+ (!defined(__ANDROID_API__) || __ANDROID_API__ >= 21)
+#define IREE_WAIT_API IREE_WAIT_API_PPOLL
+#else
+#define IREE_WAIT_API IREE_WAIT_API_POLL
+#endif // insanity
+#endif // IREE_SYNCHRONIZATION_DISABLE_UNSAFE / IREE_PLATFORM_WINDOWS
+
+#endif // !IREE_WAIT_API
+
+// Many implementations share the same posix-like nature (file descriptors/etc)
+// and can share most of their code.
+#if (IREE_WAIT_API == IREE_WAIT_API_POLL) || \
+ (IREE_WAIT_API == IREE_WAIT_API_PPOLL) || \
+ (IREE_WAIT_API == IREE_WAIT_API_EPOLL) || \
+ (IREE_WAIT_API == IREE_WAIT_API_KQUEUE)
+#define IREE_WAIT_API_POSIX_LIKE 1
+#endif // IREE_WAIT_API = posix-like
+
+//===----------------------------------------------------------------------===//
+// Wait handle included with options set
+//===----------------------------------------------------------------------===//
+
+#include "iree/base/internal/wait_handle.h"
+
+#endif // IREE_BASE_INTERNAL_WAIT_HANDLE_IMPL_H_
diff --git a/runtime/src/iree/base/internal/wait_handle_inproc.c b/runtime/src/iree/base/internal/wait_handle_inproc.c
new file mode 100644
index 0000000..eff64cb
--- /dev/null
+++ b/runtime/src/iree/base/internal/wait_handle_inproc.c
@@ -0,0 +1,378 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// clang-format off: must be included before all other headers.
+#include "iree/base/internal/wait_handle_impl.h"
+// clang-format on
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/internal/wait_handle.h"
+#include "iree/base/target_platform.h"
+
+// This implementation uses iree_notification_t - backed by a futex in most
+// cases - to simulate system wait handles. When using a single handle such as
+// an iree_event_t and waiting on it with iree_wait_one things behave just as
+// the base iree_notification_t: threads can block and wait for the event to
+// be signaled. Multi-wait, however, requires some trickery as we need to be
+// able to wake when one or more events are signaled and unfortunately there are
+// no multi-wait futex APIs. To get around this we have a shared notification
+// that is posted every time an event is signaled and multi-waits await that.
+// This can lead to spurious wakes when under heavy load as disparate events may
+// wake unrelated multi-waiters, however by design in IREE we tend to avoid that
+// and centralize waits via things like the task system poller such that this
+// isn't so bad. The cases that are likely to suffer are heavy multi-tenant
+// workloads in the same process but those should be using a real wait handle
+// implementation instead of this bare-metal friendly one anyway.
+#if IREE_WAIT_API == IREE_WAIT_API_INPROC
+
+//===----------------------------------------------------------------------===//
+// iree_wait_primitive_* raw calls
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_futex_handle_t {
+ iree_atomic_int64_t value;
+ iree_notification_t notification;
+} iree_futex_handle_t;
+
+static bool iree_wait_primitive_compare_identical(iree_wait_handle_t* lhs,
+ iree_wait_handle_t* rhs) {
+ return lhs->type == rhs->type &&
+ memcmp(&lhs->value, &rhs->value, sizeof(lhs->value)) == 0;
+}
+
+void iree_wait_handle_close(iree_wait_handle_t* handle) {
+ switch (handle->type) {
+#if defined(IREE_HAVE_WAIT_TYPE_LOCAL_FUTEX)
+ case IREE_WAIT_PRIMITIVE_TYPE_LOCAL_FUTEX: {
+ iree_futex_handle_t* futex =
+ (iree_futex_handle_t*)handle->value.local_futex;
+ iree_notification_deinitialize(&futex->notification);
+ iree_allocator_free(iree_allocator_system(), futex);
+ break;
+ }
+#endif // IREE_HAVE_WAIT_TYPE_LOCAL_FUTEX
+ default:
+ break;
+ }
+ iree_wait_handle_deinitialize(handle);
+}
+
+//===----------------------------------------------------------------------===//
+// Multi-wait emulation
+//===----------------------------------------------------------------------===//
+
+// Returns a notification that is shared with all waiters in the process.
+// Waiting on the notification will cause a wake whenever any event is set.
+static iree_notification_t* iree_wait_multi_notification(void) {
+ static iree_notification_t shared_notification = IREE_NOTIFICATION_INIT;
+ return &shared_notification;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_wait_set_t
+//===----------------------------------------------------------------------===//
+
+struct iree_wait_set_t {
+ iree_allocator_t allocator;
+
+ // Total capacity of handles in the set (including duplicates).
+ // This defines the capacity of handles to ensure that we don't get insanely
+ // hard to debug behavioral differences when some handles happen to be
+ // duplicates vs all being unique.
+ //
+ // If you added 1000 duplicate handles to the set you'd need a capacity
+ // of 1000 even though handle_count (expluding duplicates) would be 1.
+ iree_host_size_t capacity;
+
+ // Total number of handles in the set (including duplicates).
+ // We use this to ensure that we provide consistent capacity errors;
+ iree_host_size_t total_handle_count;
+
+ // Number of handles in the set (excluding duplicates), defining the valid
+ // size of the dense handles list.
+ iree_host_size_t handle_count;
+
+ // De-duped user-provided handles. iree_wait_handle_t::set_internal.dupe_count
+ // is used to indicate how many additional duplicates there are of a
+ // particular handle. For example, dupe_count=0 means that there are no
+ // duplicates.
+ iree_wait_handle_t handles[];
+};
+
+iree_status_t iree_wait_set_allocate(iree_host_size_t capacity,
+ iree_allocator_t allocator,
+ iree_wait_set_t** out_set) {
+ // Be reasonable; 64K objects is too high.
+ if (capacity >= UINT16_MAX) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "wait set capacity of %zu is unreasonably large",
+ capacity);
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, (int64_t)capacity);
+ *out_set = NULL;
+
+ iree_wait_set_t* set = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ allocator, sizeof(*set) + capacity * sizeof(iree_wait_handle_t),
+ (void**)&set);
+ if (iree_status_is_ok(status)) {
+ set->allocator = allocator;
+ set->capacity = capacity;
+ iree_wait_set_clear(set);
+ }
+
+ *out_set = set;
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_wait_set_free(iree_wait_set_t* set) {
+ if (!set) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_allocator_t allocator = set->allocator;
+ iree_allocator_free(allocator, set);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+bool iree_wait_set_is_empty(const iree_wait_set_t* set) {
+ return set->handle_count != 0;
+}
+
+iree_status_t iree_wait_set_insert(iree_wait_set_t* set,
+ iree_wait_handle_t handle) {
+ if (set->total_handle_count + 1 > set->capacity) {
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "wait set capacity %" PRIhsz
+ " reached; no more wait handles available",
+ set->capacity);
+ } else if (handle.type != IREE_WAIT_PRIMITIVE_TYPE_LOCAL_FUTEX) {
+ return iree_make_status(
+ IREE_STATUS_UNIMPLEMENTED,
+ "unimplemented primitive type %d (expected LOCAL_FUTEX)",
+ (int)handle.type);
+ }
+
+ // First check to see if we already have the handle in the set; most native
+ // system APIs don't allow duplicates so we match that behavior here to be
+ // consistent. It also helps in cases where the same event is waited on
+ // multiple times (such as when joining on a semaphore) as they can be routed
+ // to the much more efficient iree_wait_one.
+ for (iree_host_size_t i = 0; i < set->handle_count; ++i) {
+ iree_wait_handle_t* existing_handle = &set->handles[i];
+ if (iree_wait_primitive_compare_identical(existing_handle, &handle)) {
+ // Handle already exists in the set; just increment the reference count.
+ ++existing_handle->set_internal.dupe_count;
+ ++set->total_handle_count;
+ return iree_ok_status();
+ }
+ }
+
+ ++set->total_handle_count;
+ iree_host_size_t index = set->handle_count++;
+ iree_wait_handle_t* stored_handle = &set->handles[index];
+ iree_wait_handle_wrap_primitive(handle.type, handle.value, stored_handle);
+ stored_handle->set_internal.dupe_count = 0; // just us so far
+
+ return iree_ok_status();
+}
+
+void iree_wait_set_erase(iree_wait_set_t* set, iree_wait_handle_t handle) {
+ // Find the user handle in the set. This either requires a linear scan to
+ // find the matching user handle or - if valid - we can use the native index
+ // set after an iree_wait_any wake to do a quick lookup.
+ iree_host_size_t index = handle.set_internal.index;
+ if (IREE_UNLIKELY(index >= set->handle_count) ||
+ IREE_UNLIKELY(!iree_wait_primitive_compare_identical(&set->handles[index],
+ &handle))) {
+ // Fallback to a linear scan of (hopefully) a small list.
+ for (iree_host_size_t i = 0; i < set->handle_count; ++i) {
+ if (iree_wait_primitive_compare_identical(&set->handles[i], &handle)) {
+ index = i;
+ break;
+ }
+ }
+ }
+
+ // Decrement reference count.
+ iree_wait_handle_t* existing_handle = &set->handles[index];
+ if (existing_handle->set_internal.dupe_count-- > 0) {
+ // Still one or more remaining in the set; leave it in the handle list.
+ --set->total_handle_count;
+ return;
+ }
+
+ // No more references remaining; remove from both handle lists.
+ // Since we make no guarantees about the order of the lists we can just swap
+ // with the last value.
+ int tail_index = (int)set->handle_count - 1;
+ if (tail_index > index) {
+ memcpy(&set->handles[index], &set->handles[tail_index],
+ sizeof(*set->handles));
+ }
+ --set->total_handle_count;
+ --set->handle_count;
+}
+
+void iree_wait_set_clear(iree_wait_set_t* set) {
+ memset(&set->handles[0], 0, set->handle_count * sizeof(iree_wait_handle_t));
+ set->total_handle_count = 0;
+ set->handle_count = 0;
+}
+
+typedef struct {
+ iree_wait_set_t* set;
+ iree_wait_handle_t* wake_handle; // if set then wait-any
+} iree_wait_set_check_params_t;
+
+static bool iree_wait_set_check(const iree_wait_set_check_params_t* params) {
+ iree_host_size_t ready_count = 0;
+ for (iree_host_size_t i = 0; i < params->set->handle_count; ++i) {
+ iree_wait_handle_t* wait_handle = ¶ms->set->handles[i];
+ iree_futex_handle_t* futex =
+ (iree_futex_handle_t*)wait_handle->value.local_futex;
+ if (iree_atomic_load_int64(&futex->value, iree_memory_order_acquire) != 0) {
+ ++ready_count;
+ if (params->wake_handle) {
+ *params->wake_handle = *wait_handle;
+ return true;
+ }
+ }
+ }
+ return ready_count == params->set->handle_count;
+}
+
+static iree_status_t iree_wait_multi(iree_wait_set_t* set,
+ iree_time_t deadline_ns,
+ iree_wait_handle_t* out_wake_handle) {
+ if (set->handle_count == 0) return iree_ok_status(); // no-op
+ if (set->handle_count == 1) {
+ // It's much more efficient to use a wait-one as then we will only wake if
+ // the specific handle is signaled; otherwise we will use the multi-wait
+ // notification and potentially wake many times.
+ return iree_wait_one(&set->handles[0], deadline_ns);
+ }
+
+ iree_wait_set_check_params_t params = {
+ .set = set,
+ .wake_handle = out_wake_handle,
+ };
+ if (!iree_notification_await(iree_wait_multi_notification(),
+ (iree_condition_fn_t)iree_wait_set_check,
+ ¶ms, iree_make_deadline(deadline_ns))) {
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+ return iree_ok_status();
+}
+
+iree_status_t iree_wait_all(iree_wait_set_t* set, iree_time_t deadline_ns) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = iree_wait_multi(set, deadline_ns,
+ /*out_wake_handle=*/NULL);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_status_t iree_wait_any(iree_wait_set_t* set, iree_time_t deadline_ns,
+ iree_wait_handle_t* out_wake_handle) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ memset(out_wake_handle, 0, sizeof(*out_wake_handle));
+ iree_status_t status = iree_wait_multi(set, deadline_ns, out_wake_handle);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static bool iree_futex_handle_check(iree_futex_handle_t* futex) {
+ return iree_atomic_load_int64(&futex->value, iree_memory_order_acquire) != 0;
+}
+
+iree_status_t iree_wait_one(iree_wait_handle_t* handle,
+ iree_time_t deadline_ns) {
+ if (handle->type == IREE_WAIT_PRIMITIVE_TYPE_NONE) {
+ return iree_ok_status();
+ }
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status = iree_ok_status();
+ if (handle->type == IREE_WAIT_PRIMITIVE_TYPE_LOCAL_FUTEX) {
+ iree_futex_handle_t* futex =
+ (iree_futex_handle_t*)handle->value.local_futex;
+ if (!iree_notification_await(&futex->notification,
+ (iree_condition_fn_t)iree_futex_handle_check,
+ futex, iree_make_deadline(deadline_ns))) {
+ status = iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+ } else {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unhandled primitive type");
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_event_t
+//===----------------------------------------------------------------------===//
+
+iree_status_t iree_event_initialize(bool initial_state,
+ iree_event_t* out_event) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ memset(out_event, 0, sizeof(*out_event));
+
+ iree_futex_handle_t* futex = NULL;
+ iree_status_t status = iree_allocator_malloc(iree_allocator_system(),
+ sizeof(*futex), (void**)&futex);
+ if (iree_status_is_ok(status)) {
+ out_event->type = IREE_WAIT_PRIMITIVE_TYPE_LOCAL_FUTEX;
+ out_event->value.local_futex = (void*)futex;
+ iree_atomic_store_int64(&futex->value, initial_state ? 1 : 0,
+ iree_memory_order_release);
+ iree_notification_initialize(&futex->notification);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_event_deinitialize(iree_event_t* event) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_wait_handle_close(event);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_event_set(iree_event_t* event) {
+ if (!event) return;
+ iree_futex_handle_t* futex = (iree_futex_handle_t*)event->value.local_futex;
+ if (!futex) return;
+
+ // Try to transition from unset -> set.
+ // No-op if already set and otherwise we successfully signaled the event and
+ // need to notify all waiters.
+ if (iree_atomic_exchange_int64(&futex->value, 1, iree_memory_order_release) ==
+ 0) {
+ // Notify those waiting on just this event.
+ iree_notification_post(&futex->notification, IREE_ALL_WAITERS);
+ // Notify any multi-waits that may have this event as part of their set.
+ iree_notification_post(iree_wait_multi_notification(), IREE_ALL_WAITERS);
+ }
+}
+
+void iree_event_reset(iree_event_t* event) {
+ if (!event) return;
+ iree_futex_handle_t* futex = (iree_futex_handle_t*)event->value.local_futex;
+ if (!futex) return;
+ iree_atomic_store_int64(&futex->value, 0, iree_memory_order_release);
+}
+
+#endif // IREE_WAIT_API == IREE_WAIT_API_INPROC
diff --git a/runtime/src/iree/base/internal/wait_handle_kqueue.c b/runtime/src/iree/base/internal/wait_handle_kqueue.c
new file mode 100644
index 0000000..826ce51
--- /dev/null
+++ b/runtime/src/iree/base/internal/wait_handle_kqueue.c
@@ -0,0 +1,63 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// NOTE: must be first to ensure that we can define settings for all includes.
+#include "iree/base/internal/wait_handle_impl.h"
+
+#if IREE_WAIT_API == IREE_WAIT_API_KQUEUE
+
+#include "iree/base/internal/wait_handle_posix.h"
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// iree_wait_set_t
+//===----------------------------------------------------------------------===//
+
+// TODO(benvanik): iree_wait_set_s using a kqueue.
+// Could just cast the kqueue() fd to iree_wait_set_s* to avoid allocs.
+// https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/kqueue.2.html
+struct iree_wait_set_t {
+ int reserved;
+};
+
+iree_status_t iree_wait_set_allocate(iree_host_size_t capacity,
+ iree_allocator_t allocator,
+ iree_wait_set_t** out_set) {
+ // TODO(benvanik): kqueue support
+}
+
+void iree_wait_set_free(iree_wait_set_t* set) {
+ // TODO(benvanik): close()
+}
+
+iree_status_t iree_wait_set_insert(iree_wait_set_t* set,
+ iree_wait_handle_t handle) {
+ // TODO(benvanik): kqueue support
+}
+
+void iree_wait_set_erase(iree_wait_set_t* set, iree_wait_handle_t handle) {
+ // TODO(benvanik): kqueue support
+}
+
+void iree_wait_set_clear(iree_wait_set_t* set) {
+ // TODO(benvanik): kqueue support
+}
+
+iree_status_t iree_wait_all(iree_wait_set_t* set, iree_time_t deadline_ns) {
+ // TODO(benvanik): kqueue support
+}
+
+iree_status_t iree_wait_any(iree_wait_set_t* set, iree_time_t deadline_ns,
+ iree_wait_handle_t* out_wake_handle) {
+ // TODO(benvanik): kqueue support
+}
+
+iree_status_t iree_wait_one(iree_wait_handle_t* handle,
+ iree_time_t deadline_ns) {
+ // TODO(benvanik): kqueue support
+}
+
+#endif // IREE_WAIT_API == IREE_WAIT_API_KQUEUE
diff --git a/runtime/src/iree/base/internal/wait_handle_null.c b/runtime/src/iree/base/internal/wait_handle_null.c
new file mode 100644
index 0000000..0dd8614
--- /dev/null
+++ b/runtime/src/iree/base/internal/wait_handle_null.c
@@ -0,0 +1,92 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// clang-format off: must be included before all other headers.
+#include "iree/base/internal/wait_handle_impl.h"
+// clang-format on
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/wait_handle.h"
+#include "iree/base/target_platform.h"
+
+#if IREE_WAIT_API == IREE_WAIT_API_NULL
+
+//===----------------------------------------------------------------------===//
+// iree_wait_primitive_* raw calls
+//===----------------------------------------------------------------------===//
+
+void iree_wait_handle_close(iree_wait_handle_t* handle) {
+ iree_wait_handle_deinitialize(handle);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_wait_set_t
+//===----------------------------------------------------------------------===//
+
+struct iree_wait_set_t {
+ int reserved;
+};
+
+iree_status_t iree_wait_set_allocate(iree_host_size_t capacity,
+ iree_allocator_t allocator,
+ iree_wait_set_t** out_set) {
+ *out_set = NULL;
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "wait primitives not available on this platform");
+}
+
+void iree_wait_set_free(iree_wait_set_t* set) {}
+
+iree_status_t iree_wait_set_insert(iree_wait_set_t* set,
+ iree_wait_handle_t handle) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "wait primitives not available on this platform");
+}
+
+void iree_wait_set_erase(iree_wait_set_t* set, iree_wait_handle_t handle) {}
+
+void iree_wait_set_clear(iree_wait_set_t* set) {}
+
+iree_status_t iree_wait_all(iree_wait_set_t* set, iree_time_t deadline_ns) {
+ return iree_make_status(IREE_STATUS_DEADLINE_EXCEEDED,
+ "wait primitives not available on this platform");
+}
+
+iree_status_t iree_wait_any(iree_wait_set_t* set, iree_time_t deadline_ns,
+ iree_wait_handle_t* out_wake_handle) {
+ return iree_make_status(IREE_STATUS_DEADLINE_EXCEEDED,
+ "wait primitives not available on this platform");
+}
+
+iree_status_t iree_wait_one(iree_wait_handle_t* handle,
+ iree_time_t deadline_ns) {
+ return iree_make_status(IREE_STATUS_DEADLINE_EXCEEDED,
+ "wait primitives not available on this platform");
+}
+
+//===----------------------------------------------------------------------===//
+// iree_event_t
+//===----------------------------------------------------------------------===//
+
+iree_status_t iree_event_initialize(bool initial_state,
+ iree_event_t* out_event) {
+ memset(out_event, 0, sizeof(*out_event));
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "events not available on this platform");
+}
+
+void iree_event_deinitialize(iree_event_t* event) {}
+
+void iree_event_set(iree_event_t* event) {}
+
+void iree_event_reset(iree_event_t* event) {}
+
+#endif // IREE_WAIT_API == IREE_WAIT_API_NULL
diff --git a/runtime/src/iree/base/internal/wait_handle_poll.c b/runtime/src/iree/base/internal/wait_handle_poll.c
new file mode 100644
index 0000000..5eba4e7
--- /dev/null
+++ b/runtime/src/iree/base/internal/wait_handle_poll.c
@@ -0,0 +1,406 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// NOTE: must be first to ensure that we can define settings for all includes.
+#include "iree/base/internal/wait_handle_impl.h"
+
+#if IREE_WAIT_API == IREE_WAIT_API_POLL || IREE_WAIT_API == IREE_WAIT_API_PPOLL
+
+#include <errno.h>
+#include <poll.h>
+#include <time.h>
+
+#include "iree/base/internal/wait_handle_posix.h"
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// Platform utilities
+//===----------------------------------------------------------------------===//
+
+// ppoll is preferred as it has a much better timing mechanism; poll can have a
+// large slop on the deadline as not only is it at ms timeout granularity but
+// in general tends to round more.
+//
+// poll/ppoll may spuriously wake with an EINTR. We don't do anything with that
+// opportunity (no fancy signal stuff), but we do need to retry the poll and
+// ensure that we do so with an updated timeout based on the deadline.
+//
+// Documentation: https://linux.die.net/man/2/poll
+
+#if IREE_WAIT_API == IREE_WAIT_API_POLL
+static iree_status_t iree_syscall_poll(struct pollfd* fds, nfds_t nfds,
+ iree_time_t deadline_ns,
+ int* out_signaled_count) {
+ *out_signaled_count = 0;
+ int rv = -1;
+ do {
+ uint32_t timeout_ms = iree_absolute_deadline_to_timeout_ms(deadline_ns);
+ rv = poll(fds, nfds, (int)timeout_ms);
+ } while (rv < 0 && errno == EINTR);
+ if (rv > 0) {
+ // One or more events set.
+ *out_signaled_count = rv;
+ return iree_ok_status();
+ } else if (IREE_UNLIKELY(rv < 0)) {
+ return iree_make_status(iree_status_code_from_errno(errno),
+ "poll failure %d", errno);
+ }
+ // rv == 0
+ // Timeout; no events set.
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+}
+#elif IREE_WAIT_API == IREE_WAIT_API_PPOLL
+static iree_status_t iree_syscall_poll(struct pollfd* fds, nfds_t nfds,
+ iree_time_t deadline_ns,
+ int* out_signaled_count) {
+ *out_signaled_count = 0;
+ int rv = -1;
+ do {
+ // Convert the deadline into a tmo_p struct for ppoll that controls whether
+ // the call is blocking or non-blocking. Note that we must do this every
+ // iteration of the loop as a previous ppoll may have taken some of the
+ // time.
+ //
+ // See the ppoll docs for more information as to what the expected value is:
+ // http://man7.org/linux/man-pages/man2/poll.2.html
+ struct timespec timeout_ts;
+ struct timespec* tmo_p = &timeout_ts;
+ if (deadline_ns == IREE_TIME_INFINITE_PAST) {
+ // Block never.
+ memset(&timeout_ts, 0, sizeof(timeout_ts));
+ } else if (deadline_ns == IREE_TIME_INFINITE_FUTURE) {
+ // Block forever (NULL timeout to ppoll).
+ tmo_p = NULL;
+ } else {
+ // Wait only for as much time as we have before the deadline is exceeded.
+ iree_duration_t timeout_ns = deadline_ns - iree_time_now();
+ if (timeout_ns < 0) {
+ // We've reached the deadline; we'll still perform the poll though as
+ // the caller is likely expecting that behavior (intentional context
+ // switch/thread yield/etc).
+ memset(&timeout_ts, 0, sizeof(timeout_ts));
+ } else {
+ timeout_ts.tv_sec = (time_t)(timeout_ns / 1000000000ull);
+ timeout_ts.tv_nsec = (long)(timeout_ns % 1000000000ull);
+ }
+ }
+ rv = ppoll(fds, nfds, tmo_p, NULL);
+ } while (rv < 0 && errno == EINTR);
+ if (rv > 0) {
+ // One or more events set.
+ *out_signaled_count = rv;
+ return iree_ok_status();
+ } else if (rv < 0) {
+ return iree_make_status(iree_status_code_from_errno(errno),
+ "ppoll failure %d", errno);
+ }
+ // rv == 0
+ // Timeout; no events set.
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+}
+#else
+#error "unsupported IREE_WAIT_API value"
+#endif // IREE_WAIT_API
+
+//===----------------------------------------------------------------------===//
+// iree_wait_set_t
+//===----------------------------------------------------------------------===//
+
+struct iree_wait_set_t {
+ iree_allocator_t allocator;
+
+ // Total capacity of each handle list.
+ iree_host_size_t handle_capacity;
+
+ // Total number of valid user_handles/poll_fds.
+ iree_host_size_t handle_count;
+
+ // User-provided handles.
+ // We only really need to track these so that we can preserve the handle
+ // types; we could either just do that (a few bytes) or keep them here as-is
+ // where they are a bit easier to debug.
+ iree_wait_handle_t* user_handles;
+
+ // Native list of fds+req we can pass to poll/ppoll/etc and that will receive
+ // the output information like which events were triggered during the wait.
+ //
+ // pollfd::events is specified when the fds are added to the set and then each
+ // wait pollfd::revents is modified during the poll syscall.
+ struct pollfd* poll_fds;
+};
+
+iree_status_t iree_wait_set_allocate(iree_host_size_t capacity,
+ iree_allocator_t allocator,
+ iree_wait_set_t** out_set) {
+ IREE_ASSERT_ARGUMENT(out_set);
+
+ // Be reasonable; 64K objects is too high (even if poll supports it, which is
+ // hard to tell if it does).
+ if (capacity >= UINT16_MAX) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "wait set capacity of %zu is unreasonably large",
+ capacity);
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_host_size_t user_handle_list_size =
+ capacity * iree_sizeof_struct(iree_wait_handle_t);
+ iree_host_size_t poll_fd_list_size = capacity * sizeof(struct pollfd);
+ iree_host_size_t total_size = iree_sizeof_struct(iree_wait_set_t) +
+ user_handle_list_size + poll_fd_list_size;
+
+ iree_wait_set_t* set = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(allocator, total_size, (void**)&set));
+ set->allocator = allocator;
+ set->handle_capacity = capacity;
+ iree_wait_set_clear(set);
+
+ set->user_handles =
+ (iree_wait_handle_t*)((uint8_t*)set +
+ iree_sizeof_struct(iree_wait_set_t));
+ set->poll_fds =
+ (struct pollfd*)((uint8_t*)set->user_handles + user_handle_list_size);
+
+ *out_set = set;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+void iree_wait_set_free(iree_wait_set_t* set) {
+ if (!set) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_allocator_free(set->allocator, set);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+bool iree_wait_set_is_empty(const iree_wait_set_t* set) {
+ return set->handle_count != 0;
+}
+
+iree_status_t iree_wait_set_insert(iree_wait_set_t* set,
+ iree_wait_handle_t handle) {
+ if (set->handle_count + 1 > set->handle_capacity) {
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "wait set capacity reached");
+ }
+
+ iree_host_size_t index = set->handle_count++;
+
+ iree_wait_handle_t* user_handle = &set->user_handles[index];
+ iree_wait_handle_wrap_primitive(handle.type, handle.value, user_handle);
+
+ // NOTE: poll will ignore any negative fds.
+ struct pollfd* poll_fd = &set->poll_fds[index];
+ poll_fd->fd = iree_wait_primitive_get_read_fd(&handle);
+ poll_fd->events = POLLIN | POLLPRI; // implicit POLLERR | POLLHUP | POLLNVAL
+ poll_fd->revents = 0;
+
+ return iree_ok_status();
+}
+
+void iree_wait_set_erase(iree_wait_set_t* set, iree_wait_handle_t handle) {
+ // Find the user handle in the set. This either requires a linear scan to
+ // find the matching user handle or - if valid - we can use the native index
+ // set after an iree_wait_any wake to do a quick lookup.
+ iree_host_size_t index = handle.set_internal.index;
+ if (IREE_UNLIKELY(index >= set->handle_count) ||
+ IREE_UNLIKELY(!iree_wait_primitive_compare_identical(
+ &set->user_handles[index], &handle))) {
+ // Fallback to a linear scan of (hopefully) a small list.
+ for (iree_host_size_t i = 0; i < set->handle_count; ++i) {
+ if (iree_wait_primitive_compare_identical(&set->user_handles[i],
+ &handle)) {
+ index = i;
+ break;
+ }
+ }
+ }
+
+ // Remove from both handle lists.
+ // Since we make no guarantees about the order of the lists we can just swap
+ // with the last value.
+ int tail_index = (int)set->handle_count - 1;
+ if (tail_index > index) {
+ memcpy(&set->poll_fds[index], &set->poll_fds[tail_index],
+ sizeof(*set->poll_fds));
+ memcpy(&set->user_handles[index], &set->user_handles[tail_index],
+ sizeof(*set->user_handles));
+ }
+ --set->handle_count;
+}
+
+void iree_wait_set_clear(iree_wait_set_t* set) { set->handle_count = 0; }
+
+// Maps a poll revent bitfield result to a status (on failure) and an indicator
+// of whether the event was signaled.
+static iree_status_t iree_wait_set_resolve_poll_events(short revents,
+ bool* out_signaled) {
+ if (revents & POLLERR) {
+ return iree_make_status(IREE_STATUS_INTERNAL, "POLLERR on fd");
+ } else if (revents & POLLHUP) {
+ return iree_make_status(IREE_STATUS_CANCELLED, "POLLHUP on fd");
+ } else if (revents & POLLNVAL) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "POLLNVAL on fd");
+ }
+ *out_signaled = (revents & POLLIN) != 0;
+ return iree_ok_status();
+}
+
+iree_status_t iree_wait_all(iree_wait_set_t* set, iree_time_t deadline_ns) {
+ // Make the syscall only when we have at least one valid fd.
+ // Don't use this as a sleep.
+ if (set->handle_count <= 0) {
+ return iree_ok_status();
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // TODO(benvanik): see if we can use tracy's mutex tracking to make waits
+ // nicer (at least showing signal->wait relations).
+
+ // Certain poll implementations have a nasty behavior where they allow
+ // negative fds to ignore entries... except for at [0]. To avoid any
+ // additional tracking here we manage a local pollfd list that we keep offset
+ // to the first non-negative fd.
+ //
+ // Gotcha is buried in here (and various spooky bug reports on the web):
+ // https://manpages.debian.org/buster/manpages-dev/poll.2.en.html
+ // This provides an easy way of ignoring a file descriptor for a single
+ // poll() call: simply negate the fd field. Note, however, that this
+ // technique can't be used to ignore file descriptor 0.
+ //
+ // Thanks guys 🙄
+ struct pollfd* poll_fd_base = set->poll_fds;
+ nfds_t poll_fd_count = set->handle_count;
+
+ // Wait-all requires that we repeatedly poll until all handles have been
+ // signaled. To reduce overhead (and not miss events) we mark any handle we
+ // have successfully polled as invalid (fd<0) so that the kernel ignores it.
+ // Only when all handles are invalid does it mean that we've actually waited
+ // for all of them.
+ iree_status_t status = iree_ok_status();
+ int unsignaled_count = poll_fd_count;
+ do {
+ // Eat any negative handles at the start to avoid the mentioned fd[0] bug.
+ while (poll_fd_base[0].fd < 0) {
+ ++poll_fd_base;
+ --poll_fd_count;
+ }
+
+ int signaled_count = 0;
+ status = iree_syscall_poll(poll_fd_base, poll_fd_count, deadline_ns,
+ &signaled_count);
+ if (!iree_status_is_ok(status)) {
+ // Failed during the poll itself. Ensure that we fall-through and refresh
+ // the poll_fds handle list.
+ break;
+ }
+ unsignaled_count -= signaled_count;
+
+ // Neuter any that have successfully resolved.
+ for (nfds_t i = 0; i < poll_fd_count; ++i) {
+ if (poll_fd_base[i].fd < 0) continue;
+ bool signaled = false;
+ status =
+ iree_wait_set_resolve_poll_events(poll_fd_base[i].revents, &signaled);
+ if (!iree_status_is_ok(status)) {
+ // One (or more) fds had an issue. Ensure that we fall-through and
+ // refresh the poll_fds handle list.
+ break;
+ }
+ if (signaled) {
+ // Negate fd so that we ignore it in the next poll.
+ poll_fd_base[i].fd = -poll_fd_base[i].fd;
+ }
+ }
+ } while (unsignaled_count > 0);
+
+ // Since we destroyed the list of handles during the operation we need to
+ // refresh them with their fds so that the next wait can happen. This is the
+ // kind of thing kqueue/epoll solves (mutable in-place updates on polls) and
+ // an unfortunate reality of using an ancient API. Thankfully most waits are
+ // wait-any so a little loop isn't the worst thing in the wait-all case.
+ for (nfds_t i = 0; i < set->handle_count; ++i) {
+ set->poll_fds[i].fd = -set->poll_fds[i].fd;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_status_t iree_wait_any(iree_wait_set_t* set, iree_time_t deadline_ns,
+ iree_wait_handle_t* out_wake_handle) {
+ // Make the syscall only when we have at least one valid fd.
+ // Don't use this as a sleep.
+ if (set->handle_count <= 0) {
+ memset(out_wake_handle, 0, sizeof(*out_wake_handle));
+ return iree_ok_status();
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // TODO(benvanik): see if we can use tracy's mutex tracking to make waits
+ // nicer (at least showing signal->wait relations).
+
+ // Wait-any lets us just poll all the handles we have without needing to worry
+ // about whether all of them were signaled.
+ int signaled_count = 0;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_syscall_poll(set->poll_fds, set->handle_count, deadline_ns,
+ &signaled_count));
+
+ // Find at least one signaled handle.
+ memset(out_wake_handle, 0, sizeof(*out_wake_handle));
+ if (signaled_count > 0) {
+ for (iree_host_size_t i = 0; i < set->handle_count; ++i) {
+ bool signaled = false;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_wait_set_resolve_poll_events(set->poll_fds[i].revents,
+ &signaled));
+ if (signaled) {
+ memcpy(out_wake_handle, &set->user_handles[i],
+ sizeof(*out_wake_handle));
+ out_wake_handle->set_internal.index = i;
+ break;
+ }
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+iree_status_t iree_wait_one(iree_wait_handle_t* handle,
+ iree_time_t deadline_ns) {
+ struct pollfd poll_fds;
+ poll_fds.fd = iree_wait_primitive_get_read_fd(handle);
+ if (poll_fds.fd == -1) return false;
+ poll_fds.events = POLLIN;
+ poll_fds.revents = 0;
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // TODO(benvanik): see if we can use tracy's mutex tracking to make waits
+ // nicer (at least showing signal->wait relations).
+
+ // Just check for our single handle/event.
+ // The benefit of this is that we didn't need to heap alloc the pollfds and
+ // the cache should all stay hot. Reusing the same iree_syscall_pool as the
+ // multi-wait variants ensures consistent handling (and the same syscall
+ // showing in strace/tracy/etc).
+ int signaled_count = 0;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_syscall_poll(&poll_fds, 1, deadline_ns, &signaled_count));
+
+ IREE_TRACE_ZONE_END(z0);
+ return signaled_count ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+}
+
+#endif // IREE_WAIT_API == IREE_WAIT_API_POLL ||
+ // IREE_WAIT_API == IREE_WAIT_API_PPOLL
diff --git a/runtime/src/iree/base/internal/wait_handle_posix.c b/runtime/src/iree/base/internal/wait_handle_posix.c
new file mode 100644
index 0000000..fcec4b8
--- /dev/null
+++ b/runtime/src/iree/base/internal/wait_handle_posix.c
@@ -0,0 +1,288 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/wait_handle_posix.h"
+
+#include "iree/base/tracing.h"
+
+#if defined(IREE_WAIT_API_POSIX_LIKE)
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#if defined(IREE_HAVE_WAIT_TYPE_EVENTFD)
+#include <sys/eventfd.h>
+#endif // IREE_HAVE_WAIT_TYPE_EVENTFD
+#if defined(IREE_HAVE_WAIT_TYPE_SYNC_FILE)
+#include <android/sync.h>
+#endif // IREE_HAVE_WAIT_TYPE_SYNC_FILE
+
+//===----------------------------------------------------------------------===//
+// iree_wait_primitive_* raw calls
+//===----------------------------------------------------------------------===//
+
+#if defined(IREE_HAVE_WAIT_TYPE_EVENTFD)
+static iree_status_t iree_wait_primitive_create_eventfd(
+ bool initial_state, iree_wait_handle_t* out_handle) {
+ memset(out_handle, 0, sizeof(*out_handle));
+ out_handle->type = IREE_WAIT_PRIMITIVE_TYPE_EVENT_FD;
+
+ // https://man7.org/linux/man-pages/man2/eventfd.2.html
+ out_handle->value.event.fd =
+ eventfd(initial_state ? 1 : 0, EFD_CLOEXEC | EFD_NONBLOCK);
+ if (IREE_UNLIKELY(out_handle->value.event.fd == -1)) {
+ return iree_make_status(iree_status_code_from_errno(errno),
+ "failed to create eventfd (%d)", errno);
+ }
+
+ return iree_ok_status();
+}
+#endif // IREE_HAVE_WAIT_TYPE_EVENTFD
+
+#if defined(IREE_HAVE_WAIT_TYPE_PIPE)
+static iree_status_t iree_wait_primitive_create_pipe(
+ bool initial_state, iree_wait_handle_t* out_handle) {
+ memset(out_handle, 0, sizeof(*out_handle));
+ out_handle->type = IREE_WAIT_PRIMITIVE_TYPE_PIPE;
+
+ // Create read (fds[0]) and write (fds[1]) handles.
+ // https://man7.org/linux/man-pages/man2/pipe.2.html
+ if (IREE_UNLIKELY(pipe(out_handle->value.pipe.fds) < 0)) {
+ return iree_make_status(iree_status_code_from_errno(errno),
+ "failed to create pipe (%d)", errno);
+ }
+
+ // Set both fds to non-blocking.
+ // NOTE: we could use pipe2 when available on linux to avoid the need for the
+ // fcntl, but BSD/darwin/etc don't have it so we'd still need a fallback. This
+ // is effectively the same as passing O_NONBLOCK to pipe2.
+ for (int i = 0; i < 2; ++i) {
+ if (IREE_UNLIKELY(
+ fcntl(out_handle->value.pipe.fds[i], F_SETFL, O_NONBLOCK) < 0)) {
+ return iree_make_status(iree_status_code_from_errno(errno),
+ "failed to set pipe fd %d to non-blocking (%d)",
+ i, errno);
+ }
+ }
+
+ // Initially triggered means we just write once to the pipe.
+ // This write must not fail as if the caller requested the state they would
+ // likely deadlock if the first read would block.
+ if (initial_state) {
+ iree_status_t status = iree_wait_primitive_write(out_handle);
+ if (!iree_status_is_ok(status)) {
+ iree_wait_handle_close(out_handle);
+ return status;
+ }
+ }
+
+ return iree_ok_status();
+}
+#endif // IREE_HAVE_WAIT_TYPE_PIPE
+
+iree_status_t iree_wait_primitive_create_native(
+ bool initial_state, iree_wait_handle_t* out_handle) {
+ memset(out_handle, 0, sizeof(*out_handle));
+#if defined(IREE_HAVE_WAIT_TYPE_EVENTFD)
+ // Always prefer eventfd when present; they rock.
+ return iree_wait_primitive_create_eventfd(initial_state, out_handle);
+#elif defined(IREE_HAVE_WAIT_TYPE_PIPE)
+ // Pipes are fine but much heavier than eventfds.
+ return iree_wait_primitive_create_pipe(initial_state, out_handle);
+#else
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "no native wait handle type supported");
+#endif // IREE_HAVE_WAIT_TYPE_*
+}
+
+static void iree_wait_handle_close_fd(int fd) {
+ int rv;
+ IREE_SYSCALL(rv, close(fd));
+ // NOTE: we could fail to close if the handle is invalid/already closed/etc.
+ // As Windows has undefined behavior when handles are closed while there are
+ // active waits we don't use fd closes as load-bearing operations and it's
+ // fine to ignore the error.
+}
+
+void iree_wait_handle_close(iree_wait_handle_t* handle) {
+ switch (handle->type) {
+#if defined(IREE_HAVE_WAIT_TYPE_EVENTFD)
+ case IREE_WAIT_PRIMITIVE_TYPE_EVENT_FD: {
+ iree_wait_handle_close_fd(handle->value.event.fd);
+ break;
+ }
+#endif // IREE_HAVE_WAIT_TYPE_EVENTFD
+#if defined(IREE_HAVE_WAIT_TYPE_SYNC_FILE)
+ case IREE_WAIT_PRIMITIVE_TYPE_SYNC_FILE:
+ iree_wait_handle_close_fd(handle->value.sync_file.fd);
+ break;
+#endif // IREE_HAVE_WAIT_TYPE_SYNC_FILE
+#if defined(IREE_HAVE_WAIT_TYPE_PIPE)
+ case IREE_WAIT_PRIMITIVE_TYPE_PIPE: {
+ iree_wait_handle_close_fd(handle->value.pipe.read_fd);
+ iree_wait_handle_close_fd(handle->value.pipe.write_fd);
+ break;
+ }
+#endif // IREE_HAVE_WAIT_TYPE_PIPE
+ default:
+ break;
+ }
+ iree_wait_handle_deinitialize(handle);
+}
+
+bool iree_wait_primitive_compare_identical(const iree_wait_handle_t* lhs,
+ const iree_wait_handle_t* rhs) {
+ return lhs->type == rhs->type &&
+ memcmp(&lhs->value, &rhs->value, sizeof(lhs->value)) == 0;
+}
+
+int iree_wait_primitive_get_read_fd(const iree_wait_handle_t* handle) {
+ switch (handle->type) {
+#if defined(IREE_HAVE_WAIT_TYPE_EVENTFD)
+ case IREE_WAIT_PRIMITIVE_TYPE_EVENT_FD:
+ return handle->value.event.fd;
+#endif // IREE_HAVE_WAIT_TYPE_EVENTFD
+#if defined(IREE_HAVE_WAIT_TYPE_SYNC_FILE)
+ case IREE_WAIT_PRIMITIVE_TYPE_SYNC_FILE:
+ return handle->value.sync_file.fd;
+#endif // IREE_HAVE_WAIT_TYPE_SYNC_FILE
+#if defined(IREE_HAVE_WAIT_TYPE_PIPE)
+ case IREE_WAIT_PRIMITIVE_TYPE_PIPE:
+ return handle->value.pipe.read_fd;
+#endif // IREE_HAVE_WAIT_TYPE_PIPE
+ default:
+ return -1;
+ }
+}
+
+iree_status_t iree_wait_primitive_read(iree_wait_handle_t* handle,
+ iree_time_t deadline_ns) {
+ // Until we need it this does not support anything but polling.
+ // If we want to support auto reset events we'd want to implement blocking.
+ if (deadline_ns != IREE_TIME_INFINITE_PAST) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "reads are just polls today");
+ }
+
+ int rv = -1;
+ switch (handle->type) {
+ case IREE_WAIT_PRIMITIVE_TYPE_NONE:
+ return iree_ok_status(); // no-op
+#if defined(IREE_HAVE_WAIT_TYPE_EVENTFD)
+ case IREE_WAIT_PRIMITIVE_TYPE_EVENT_FD: {
+ eventfd_t val = 0;
+ IREE_SYSCALL(rv, eventfd_read(handle->value.event.fd, &val));
+ break;
+ }
+#endif // IREE_HAVE_WAIT_TYPE_EVENTFD
+#if defined(IREE_HAVE_WAIT_TYPE_SYNC_FILE)
+ case IREE_WAIT_PRIMITIVE_TYPE_SYNC_FILE:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "sync files not yet implemented");
+#endif // IREE_HAVE_WAIT_TYPE_SYNC_FILE
+#if defined(IREE_HAVE_WAIT_TYPE_PIPE)
+ case IREE_WAIT_PRIMITIVE_TYPE_PIPE: {
+ char buf;
+ IREE_SYSCALL(rv, read(handle->value.pipe.read_fd, &buf, 1));
+ break;
+ }
+#endif // IREE_HAVE_WAIT_TYPE_PIPE
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unhandled wait type %d", (int)handle->type);
+ }
+ if (rv >= 0) {
+ // Read completed successfully.
+ return iree_ok_status();
+ } else if (errno == EWOULDBLOCK) {
+ // Would have blocked meaning that there's no data waiting.
+ // NOTE: we purposefully avoid a full status result here as this is a
+ // non-exceptional result.
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ } else {
+ return iree_make_status(iree_status_code_from_errno(errno),
+ "fd read failure %d", errno);
+ }
+}
+
+iree_status_t iree_wait_primitive_write(iree_wait_handle_t* handle) {
+ int rv = -1;
+ switch (handle->type) {
+ case IREE_WAIT_PRIMITIVE_TYPE_NONE:
+ return iree_ok_status(); // no-op
+#if defined(IREE_HAVE_WAIT_TYPE_EVENTFD)
+ case IREE_WAIT_PRIMITIVE_TYPE_EVENT_FD: {
+ IREE_SYSCALL(rv, eventfd_write(handle->value.event.fd, 1ull));
+ break;
+ }
+#endif // IREE_HAVE_WAIT_TYPE_EVENTFD
+#if defined(IREE_HAVE_WAIT_TYPE_SYNC_FILE)
+ case IREE_WAIT_PRIMITIVE_TYPE_SYNC_FILE:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "sync files not yet implemented");
+#endif // IREE_HAVE_WAIT_TYPE_SYNC_FILE
+#if defined(IREE_HAVE_WAIT_TYPE_PIPE)
+ case IREE_WAIT_PRIMITIVE_TYPE_PIPE: {
+ char buf = '\n';
+ IREE_SYSCALL(rv, write(handle->value.pipe.write_fd, &buf, 1));
+ break;
+ }
+#endif // IREE_HAVE_WAIT_TYPE_PIPE
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED, "unhandled wait type");
+ }
+ if (rv >= 0) {
+ // Write completed successfully.
+ return iree_ok_status();
+ } else {
+ return iree_make_status(iree_status_code_from_errno(errno),
+ "fd write failure %d", errno);
+ }
+}
+
+iree_status_t iree_wait_primitive_clear(iree_wait_handle_t* handle) {
+ // No-op for null handles.
+ if (handle->type == IREE_WAIT_PRIMITIVE_TYPE_NONE) return iree_ok_status();
+
+ // Read in a loop until the read would block.
+ // Depending on how the user setup the fd the act of reading may reset the
+ // entire handle (such as with the default eventfd mode) or multiple reads may
+ // be required (such as with semaphores).
+ while (true) {
+ iree_status_t status =
+ iree_wait_primitive_read(handle, IREE_TIME_INFINITE_PAST);
+ if (iree_status_is_deadline_exceeded(status)) {
+ // Would have blocked reading which means we've cleared the fd.
+ return iree_ok_status();
+ } else if (!iree_status_is_ok(status)) {
+ return status;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// iree_event_t
+//===----------------------------------------------------------------------===//
+
+iree_status_t iree_event_initialize(bool initial_state,
+ iree_event_t* out_event) {
+ return iree_wait_primitive_create_native(initial_state, out_event);
+}
+
+void iree_event_deinitialize(iree_event_t* event) {
+ iree_wait_handle_close(event);
+}
+
+void iree_event_set(iree_event_t* event) {
+ IREE_IGNORE_ERROR(iree_wait_primitive_write(event));
+}
+
+void iree_event_reset(iree_event_t* event) {
+ IREE_IGNORE_ERROR(iree_wait_primitive_clear(event));
+}
+
+#endif // IREE_WAIT_API_POSIX_LIKE
diff --git a/runtime/src/iree/base/internal/wait_handle_posix.h b/runtime/src/iree/base/internal/wait_handle_posix.h
new file mode 100644
index 0000000..bf77093
--- /dev/null
+++ b/runtime/src/iree/base/internal/wait_handle_posix.h
@@ -0,0 +1,77 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// NOTE: must be first to ensure that we can define settings for all includes.
+#include "iree/base/internal/wait_handle_impl.h"
+
+#ifndef IREE_BASE_INTERNAL_WAIT_HANDLE_POSIX_H_
+#define IREE_BASE_INTERNAL_WAIT_HANDLE_POSIX_H_
+
+#if defined(IREE_WAIT_API_POSIX_LIKE)
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Perform a syscall with a retry on EINTR (spurious wake/signal/etc).
+//
+// Usage:
+// int rv;
+// IREE_SYSCALL(rv, fcntl(...));
+// if (rv < 0) { /* failure */ }
+#define IREE_SYSCALL(result_value, expr) \
+ do { \
+ result_value = expr; \
+ } while (result_value < 0 && errno == EINTR);
+
+// NOTE: these are intended for low-level signaling and may expose various
+// platform quirks to the caller. Always prefer using a higher level type such
+// as iree_event_t when possible.
+
+// Creates a wait primitive of the type native to the current platform.
+// May fail if resources are exhausted or wait handles are not supported.
+// The handle must be closed with iree_wait_handle_close to release its
+// resources.
+iree_status_t iree_wait_primitive_create_native(bool initial_state,
+ iree_wait_handle_t* out_handle);
+
+// Closes an existing handle from iree_wait_primitive_create_native or
+// iree_wait_primitive_clone. Must not be called while there are any waiters on
+// the handle.
+void iree_wait_handle_close(iree_wait_handle_t* handle);
+
+// Returns true if the two handles are identical in representation.
+// Note that two unique handles may point to the same underlying primitive
+// object (such as when they have been cloned).
+bool iree_wait_primitive_compare_identical(const iree_wait_handle_t* lhs,
+ const iree_wait_handle_t* rhs);
+
+// Returns an fd that can be used to read/wait on the handle.
+// Returns -1 if the handle is invalid.
+int iree_wait_primitive_get_read_fd(const iree_wait_handle_t* handle);
+
+// Reads a nonce from the given handle and blocks the caller if none are
+// available. IREE_TIME_INFINITE_PAST can be used to poll (the call will never
+// block) and IREE_TIME_INFINITE_FUTURE can be used to block until the primitive
+// is written.
+iree_status_t iree_wait_primitive_read(iree_wait_handle_t* handle,
+ iree_time_t deadline_ns);
+
+// Writes a nonce to the given handle causing it to signal any waiters.
+// The exact value written is platform/primitive specific.
+iree_status_t iree_wait_primitive_write(iree_wait_handle_t* handle);
+
+// Clears the wait primitive by repeatedly reading values until no more remain.
+// Never blocks the caller.
+iree_status_t iree_wait_primitive_clear(iree_wait_handle_t* handle);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_WAIT_API_POSIX_LIKE
+
+#endif // IREE_BASE_INTERNAL_WAIT_HANDLE_POSIX_H_
diff --git a/runtime/src/iree/base/internal/wait_handle_test.cc b/runtime/src/iree/base/internal/wait_handle_test.cc
new file mode 100644
index 0000000..c022aee
--- /dev/null
+++ b/runtime/src/iree/base/internal/wait_handle_test.cc
@@ -0,0 +1,857 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/wait_handle.h"
+
+#if !defined(IREE_WAIT_HANDLE_DISABLED)
+
+#include <atomic>
+#include <chrono>
+#include <cstddef>
+#include <cstring>
+#include <thread>
+
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace {
+
+// We don't want to wait too long in here but when we are testing that timeouts
+// work as expected we do have to sometimes wait. These are set to hopefully
+// reduce flakes and not hang a build bot forever if something is broken :)
+constexpr iree_duration_t kShortTimeoutNS = 1000000ull; // 1ms
+constexpr iree_duration_t kLongTimeoutNS = 60000000000ull; // 1min
+
+//===----------------------------------------------------------------------===//
+// IREE_WAIT_PRIMITIVE_TYPE_EVENT_FD
+//===----------------------------------------------------------------------===//
+
+#if defined(IREE_HAVE_WAIT_TYPE_EVENTFD)
+
+// TODO(benvanik): tests wrapping external eventfds.
+
+#endif // IREE_HAVE_WAIT_TYPE_EVENTFD
+
+//===----------------------------------------------------------------------===//
+// IREE_WAIT_PRIMITIVE_TYPE_SYNC_FILE
+//===----------------------------------------------------------------------===//
+
+#if defined(IREE_HAVE_WAIT_TYPE_SYNC_FILE)
+
+// TODO(benvanik): tests wrapping external sync files.
+
+#endif // IREE_HAVE_WAIT_TYPE_SYNC_FILE
+
+//===----------------------------------------------------------------------===//
+// IREE_WAIT_PRIMITIVE_TYPE_PIPE
+//===----------------------------------------------------------------------===//
+
+#if defined(IREE_HAVE_WAIT_TYPE_PIPE)
+
+// TODO(benvanik): tests wrapping external pipes.
+
+#endif // IREE_HAVE_WAIT_TYPE_PIPE
+
+//===----------------------------------------------------------------------===//
+// IREE_WAIT_PRIMITIVE_TYPE_WIN32_HANDLE
+//===----------------------------------------------------------------------===//
+
+#if defined(IREE_HAVE_WAIT_TYPE_WIN32_HANDLE)
+
+// TODO(benvanik): tests wrapping external win32 handles.
+
+#endif // IREE_HAVE_WAIT_TYPE_WIN32_HANDLE
+
+//===----------------------------------------------------------------------===//
+// iree_event_t
+//===----------------------------------------------------------------------===//
+// NOTE: this is testing the user-visible behavior of iree_event_t and the use
+// of functions like iree_wait_one is not exhaustive as that is tested
+// elsewhere.
+
+// Tests that we don't leak.
+TEST(Event, Lifetime) {
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &event));
+ iree_event_deinitialize(&event);
+}
+
+TEST(Event, WaitOneInitialFalse) {
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event));
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+ iree_event_deinitialize(&event);
+}
+
+TEST(Event, WaitOneInitialTrue) {
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &event));
+ IREE_EXPECT_OK(iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+ iree_event_deinitialize(&event);
+}
+
+// Tests an event that was wrapped from an immediate primitive.
+// These are used to neuter events in lists/sets and should be no-ops.
+TEST(Event, ImmediateEvent) {
+ iree_event_t event;
+ iree_wait_handle_wrap_primitive(IREE_WAIT_PRIMITIVE_TYPE_NONE, {0}, &event);
+ iree_event_set(&event);
+ IREE_EXPECT_OK(iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+ iree_event_reset(&event);
+ IREE_EXPECT_OK(iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+}
+
+TEST(Event, SetWait) {
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event));
+
+ // Initially unset.
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+
+ // Set and wait.
+ iree_event_set(&event);
+ IREE_EXPECT_OK(iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+
+ // Set should be sticky until reset manually.
+ IREE_EXPECT_OK(iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+
+ // Resetting should unsignal the event.
+ iree_event_reset(&event);
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+
+ iree_event_deinitialize(&event);
+}
+
+// Tests that we can use set/reset and that certain behavior (such as sets
+// without intervening resets) is allowed. Note that this does not wait and is
+// just testing the client behavior; it's possible to implement these such that
+// a set while another set is pending fails and we want to verify that here.
+TEST(Event, SetReset) {
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event));
+
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+
+ iree_event_set(&event);
+ IREE_EXPECT_OK(iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+ iree_event_set(&event);
+ IREE_EXPECT_OK(iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+
+ iree_event_reset(&event);
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+ iree_event_reset(&event);
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+
+ iree_event_set(&event);
+ IREE_EXPECT_OK(iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+ iree_event_set(&event);
+ IREE_EXPECT_OK(iree_wait_one(&event, IREE_TIME_INFINITE_PAST));
+
+ iree_event_deinitialize(&event);
+}
+
+TEST(Event, BlockingBehavior) {
+ iree_event_t main_to_thread;
+ IREE_ASSERT_OK(
+ iree_event_initialize(/*initial_state=*/false, &main_to_thread));
+ iree_event_t thread_to_main;
+ IREE_ASSERT_OK(
+ iree_event_initialize(/*initial_state=*/false, &thread_to_main));
+
+ // Spinup a thread to signal the event.
+ // Note that it waits on the main_to_thread event until we get further along.
+ std::atomic<bool> did_run_thread{false};
+ std::thread thread([&]() {
+ // Wait for main thread to signal (below).
+ IREE_ASSERT_OK(iree_wait_one(&main_to_thread, IREE_TIME_INFINITE_FUTURE));
+
+ // Set something so we know this ran at all.
+ did_run_thread.store(true);
+
+ // Notify the caller thread.
+ iree_event_set(&thread_to_main);
+ });
+
+ // The thread may take some time to spin up; it must wait for us to allow it
+ // to run its body though so we should be fine here.
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ ASSERT_FALSE(did_run_thread.load());
+
+ // Allow the thread to continue and wait for it to exit.
+ iree_event_set(&main_to_thread);
+ IREE_ASSERT_OK(iree_wait_one(&thread_to_main, IREE_TIME_INFINITE_FUTURE));
+ ASSERT_TRUE(did_run_thread.load());
+
+ thread.join();
+ iree_event_deinitialize(&main_to_thread);
+ iree_event_deinitialize(&thread_to_main);
+}
+
+// Tests using an iree_event_t as a wait source for waiting.
+TEST(Event, WaitSourceBlocking) {
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event));
+ iree_wait_source_t wait_source = iree_event_await(&event);
+
+ // Initially unset.
+ IREE_EXPECT_STATUS_IS(
+ IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_source_wait_one(wait_source, iree_immediate_timeout()));
+
+ // Set and wait.
+ iree_event_set(&event);
+ IREE_EXPECT_OK(
+ iree_wait_source_wait_one(wait_source, iree_immediate_timeout()));
+
+ // Set should be sticky until reset manually.
+ IREE_EXPECT_OK(
+ iree_wait_source_wait_one(wait_source, iree_immediate_timeout()));
+
+ // Resetting should unsignal the event.
+ iree_event_reset(&event);
+ IREE_EXPECT_STATUS_IS(
+ IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_source_wait_one(wait_source, iree_immediate_timeout()));
+
+ iree_event_deinitialize(&event);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_wait_set_t
+//===----------------------------------------------------------------------===//
+
+// Tests basic usage of the wait set API without waiting.
+TEST(WaitSet, Lifetime) {
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event));
+
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, event));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, event));
+ iree_wait_set_erase(wait_set, event);
+ iree_wait_set_clear(wait_set);
+ iree_wait_set_free(wait_set);
+
+ iree_event_deinitialize(&event);
+}
+
+TEST(WaitSet, UnreasonableCapacity) {
+ iree_wait_set_t* wait_set = NULL;
+ iree_status_t status = iree_wait_set_allocate(
+ 1 * 1024 * 1024, iree_allocator_system(), &wait_set);
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_INVALID_ARGUMENT, status);
+ iree_status_free(status);
+}
+
+// Tests that inserting the same handles multiple times is tracked correctly.
+TEST(WaitSet, Deduplication) {
+ iree_event_t ev_unset, ev_dupe;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_dupe));
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+
+ // We want to test for duplication on ev_dupe here so ensure it's added.
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_dupe));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_dupe));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_dupe));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset));
+
+ // Wait should succeed immediately because ev_dupe is set (and our wake handle
+ // should be ev_dupe).
+ iree_wait_handle_t wake_handle;
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+ EXPECT_EQ(0,
+ memcmp(&ev_dupe.value, &wake_handle.value, sizeof(ev_dupe.value)));
+
+ // Erase the events one at a time and ensure we still get the expected number
+ // of waits on ev_dupe.
+ iree_wait_set_erase(wait_set, wake_handle);
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+ EXPECT_EQ(0,
+ memcmp(&ev_dupe.value, &wake_handle.value, sizeof(ev_dupe.value)));
+ iree_wait_set_erase(wait_set, wake_handle);
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+ EXPECT_EQ(0,
+ memcmp(&ev_dupe.value, &wake_handle.value, sizeof(ev_dupe.value)));
+ iree_wait_set_erase(wait_set, wake_handle);
+
+ // Now there should just be ev_unset present in the set and a poll will fail.
+ IREE_EXPECT_STATUS_IS(
+ IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+
+ iree_wait_set_free(wait_set);
+ iree_event_deinitialize(&ev_unset);
+ iree_event_deinitialize(&ev_dupe);
+}
+
+// Tests that clear handles things right in the face of dupes.
+TEST(WaitSet, Clear) {
+ iree_event_t ev_unset, ev_dupe;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_dupe));
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+
+ // We want to test for duplication o n ev_dupe here.
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_dupe));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_dupe));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_dupe));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset));
+
+ // Wait should succeed immediately because ev_dupe is set (and our wake handle
+ // should be ev_dupe).
+ iree_wait_handle_t wake_handle;
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+ EXPECT_EQ(0,
+ memcmp(&ev_dupe.value, &wake_handle.value, sizeof(ev_dupe.value)));
+
+ // Erase all events from the set.
+ iree_wait_set_clear(wait_set);
+
+ // No more events remaining; should pass immediately.
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+
+ iree_wait_set_free(wait_set);
+ iree_event_deinitialize(&ev_unset);
+ iree_event_deinitialize(&ev_dupe);
+}
+
+// Tests iree_wait_all when polling (deadline_ns = IREE_TIME_INFINITE_PAST).
+TEST(WaitSet, WaitAllPolling) {
+ iree_event_t ev_unset_0, ev_unset_1;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset_0));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset_1));
+ iree_event_t ev_set_0, ev_set_1;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set_0));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set_1));
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+
+ // Polls when empty should never block.
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_all(wait_set, IREE_TIME_INFINITE_PAST));
+
+ // Polls with only unset handles should never block.
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_1));
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_all(wait_set, IREE_TIME_INFINITE_PAST));
+
+ // Polls with only set handles should return immediately.
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_1));
+ IREE_ASSERT_OK(iree_wait_all(wait_set, IREE_TIME_INFINITE_PAST));
+
+ // Polls with mixed set/unset should never succeed.
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_1));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_1));
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_all(wait_set, IREE_TIME_INFINITE_PAST));
+
+ iree_wait_set_free(wait_set);
+ iree_event_deinitialize(&ev_unset_0);
+ iree_event_deinitialize(&ev_unset_1);
+ iree_event_deinitialize(&ev_set_0);
+ iree_event_deinitialize(&ev_set_1);
+}
+
+// Tests iree_wait_all with timeouts (deadline_ns = non-zero).
+TEST(WaitSet, WaitAllTimeout) {
+ iree_event_t ev_unset_0, ev_unset_1;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset_0));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset_1));
+ iree_event_t ev_set_0, ev_set_1;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set_0));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set_1));
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+
+ // Timeouts when empty should never block.
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_all(wait_set, iree_time_now() + kShortTimeoutNS));
+
+ // Timeouts with only unset handles should block (and then expire).
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_1));
+ constexpr iree_duration_t kShortTimeoutNS = 1000000ull;
+ IREE_EXPECT_STATUS_IS(
+ IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_all(wait_set, iree_time_now() + kShortTimeoutNS));
+
+ // Timeouts with only set handles should return immediately.
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_1));
+ IREE_ASSERT_OK(iree_wait_all(wait_set, iree_time_now() + kShortTimeoutNS));
+
+ // Timeouts with mixed set/unset should never succeed.
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_1));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_1));
+ IREE_EXPECT_STATUS_IS(
+ IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_all(wait_set, iree_time_now() + kShortTimeoutNS));
+
+ iree_wait_set_free(wait_set);
+ iree_event_deinitialize(&ev_unset_0);
+ iree_event_deinitialize(&ev_unset_1);
+ iree_event_deinitialize(&ev_set_0);
+ iree_event_deinitialize(&ev_set_1);
+}
+
+// Tests iree_wait_all when blocking (deadline_ns = IREE_TIME_INFINITE_FUTURE).
+TEST(WaitSet, WaitAllBlocking) {
+ iree_event_t thread_to_main;
+ IREE_ASSERT_OK(
+ iree_event_initialize(/*initial_state=*/false, &thread_to_main));
+ iree_event_t ev_set_0, ev_set_1;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set_0));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set_1));
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+
+ // Throw in some other set handles so that we are multi-waiting for just the
+ // thread_to_main event to be set.
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_1));
+
+ // Wait forever (no timeout).
+ // We approximate that by forking off a thread to signal our local event. We
+ // can assume that a moderate wait is enough to verify the forever behavior as
+ // otherwise we are probably just messing up the math and will timeout.
+ std::thread thread([&]() {
+ // Notify the caller thread after sleeping (to ensure it's not polling).
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ iree_event_set(&thread_to_main);
+ });
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, thread_to_main));
+ IREE_ASSERT_OK(iree_wait_all(wait_set, IREE_TIME_INFINITE_FUTURE));
+
+ thread.join();
+ iree_wait_set_free(wait_set);
+ iree_event_deinitialize(&thread_to_main);
+ iree_event_deinitialize(&ev_set_0);
+ iree_event_deinitialize(&ev_set_1);
+}
+
+// Tests iree_wait_all when one or more handles are duplicated.
+TEST(WaitSet, WaitAllDuplicates) {
+ iree_event_t ev_set;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set));
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set));
+
+ // Wait should succeed immediately because ev_set is set.
+ IREE_ASSERT_OK(iree_wait_all(wait_set, IREE_TIME_INFINITE_PAST));
+
+ iree_wait_set_free(wait_set);
+ iree_event_deinitialize(&ev_set);
+}
+
+// Tests iree_wait_any; note that this is only focused on testing the wait.
+TEST(WaitSet, WaitAny) {
+ iree_event_t ev_unset, ev_set;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set));
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set));
+
+ // Wait should succeed immediately because ev_set is set (and our wake handle
+ // should be ev_set).
+ iree_wait_handle_t wake_handle;
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+ EXPECT_EQ(0, memcmp(&ev_set.value, &wake_handle.value, sizeof(ev_set.value)));
+
+ iree_wait_set_free(wait_set);
+ iree_event_deinitialize(&ev_unset);
+ iree_event_deinitialize(&ev_set);
+}
+
+// Tests iree_wait_any when polling (deadline_ns = IREE_TIME_INFINITE_PAST).
+TEST(WaitSet, WaitAnyPolling) {
+ iree_event_t ev_unset_0, ev_unset_1;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset_0));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset_1));
+ iree_event_t ev_set_0, ev_set_1;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set_0));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set_1));
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+
+ iree_wait_handle_t empty_handle;
+ memset(&empty_handle, 0, sizeof(empty_handle));
+
+ // Polls when empty should never block and return an empty wake handle.
+ // This is so that if the caller touches the wake_handle they at least have
+ // initialized memory.
+ iree_wait_set_clear(wait_set);
+ iree_wait_handle_t wake_handle;
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+ EXPECT_EQ(0, memcmp(&empty_handle, &wake_handle, sizeof(empty_handle)));
+
+ // Polls with only unset handles should never block.
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_1));
+ IREE_EXPECT_STATUS_IS(
+ IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+ EXPECT_EQ(0, memcmp(&empty_handle, &wake_handle, sizeof(empty_handle)));
+
+ // Polls with only set handles should return immediately.
+ // Note that which handle is returned is not specified.
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_1));
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+ EXPECT_TRUE(
+ 0 ==
+ memcmp(&ev_set_0.value, &wake_handle.value, sizeof(ev_set_0.value)) ||
+ 0 == memcmp(&ev_set_1.value, &wake_handle.value, sizeof(ev_set_1.value)));
+
+ // Polls with mixed set/unset should return immediately.
+ // Note that which handle is returned is not specified but we know it should
+ // at least be one of the signaled ones.
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_1));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_1));
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+ EXPECT_TRUE(
+ 0 ==
+ memcmp(&ev_set_0.value, &wake_handle.value, sizeof(ev_set_0.value)) ||
+ 0 == memcmp(&ev_set_1.value, &wake_handle.value, sizeof(ev_set_1.value)));
+
+ iree_wait_set_free(wait_set);
+ iree_event_deinitialize(&ev_unset_0);
+ iree_event_deinitialize(&ev_unset_1);
+ iree_event_deinitialize(&ev_set_0);
+ iree_event_deinitialize(&ev_set_1);
+}
+
+// Tests iree_wait_any with timeouts (deadline_ns = non-zero).
+TEST(WaitSet, WaitAnyTimeout) {
+ iree_event_t ev_unset_0, ev_unset_1;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset_0));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset_1));
+ iree_event_t ev_set_0, ev_set_1;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set_0));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set_1));
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+
+ iree_wait_handle_t empty_handle;
+ memset(&empty_handle, 0, sizeof(empty_handle));
+
+ // Timeouts when empty should never block.
+ iree_wait_set_clear(wait_set);
+ iree_wait_handle_t wake_handle;
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, iree_time_now() + kShortTimeoutNS, &wake_handle));
+ EXPECT_EQ(0, memcmp(&empty_handle, &wake_handle, sizeof(empty_handle)));
+
+ // Timeouts with only unset handles should block (and then expire).
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_1));
+ constexpr iree_duration_t kShortTimeoutNS = 1000000ull;
+ IREE_EXPECT_STATUS_IS(
+ IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_any(wait_set, iree_time_now() + kShortTimeoutNS, &wake_handle));
+ EXPECT_EQ(0, memcmp(&empty_handle, &wake_handle, sizeof(empty_handle)));
+
+ // Timeouts with only set handles should return immediately and have one of
+ // the set handles as the wake handle.
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_1));
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, iree_time_now() + kShortTimeoutNS, &wake_handle));
+ EXPECT_TRUE(
+ 0 ==
+ memcmp(&ev_set_0.value, &wake_handle.value, sizeof(ev_set_0.value)) ||
+ 0 == memcmp(&ev_set_1.value, &wake_handle.value, sizeof(ev_set_1.value)));
+
+ // Timeouts with mixed set/unset should return immediately and have one of the
+ // set handles as the wake handle.
+ iree_wait_set_clear(wait_set);
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_1));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set_1));
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, iree_time_now() + kShortTimeoutNS, &wake_handle));
+ EXPECT_TRUE(
+ 0 ==
+ memcmp(&ev_set_0.value, &wake_handle.value, sizeof(ev_set_0.value)) ||
+ 0 == memcmp(&ev_set_1.value, &wake_handle.value, sizeof(ev_set_1.value)));
+
+ iree_wait_set_free(wait_set);
+ iree_event_deinitialize(&ev_unset_0);
+ iree_event_deinitialize(&ev_unset_1);
+ iree_event_deinitialize(&ev_set_0);
+ iree_event_deinitialize(&ev_set_1);
+}
+
+// Tests iree_wait_any when blocking (deadline_ns = IREE_TIME_INFINITE_FUTURE).
+TEST(WaitSet, WaitAnyBlocking) {
+ iree_event_t thread_to_main;
+ IREE_ASSERT_OK(
+ iree_event_initialize(/*initial_state=*/false, &thread_to_main));
+ iree_event_t ev_unset_0, ev_unset_1;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset_0));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset_1));
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+
+ // Throw in some unset handles so that we are multi-waiting for just the
+ // thread_to_main event to be set.
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_1));
+
+ // Wait forever (no timeout).
+ // We approximate that by forking off a thread to signal our local event. We
+ // can assume that a moderate wait is enough to verify the forever behavior as
+ // otherwise we are probably just messing up the math and will timeout.
+ std::thread thread([&]() {
+ // Notify the caller thread after sleeping (to ensure it's not polling).
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ iree_event_set(&thread_to_main);
+ });
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, thread_to_main));
+ iree_wait_handle_t wake_handle;
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_FUTURE, &wake_handle));
+ EXPECT_EQ(0, memcmp(&thread_to_main.value, &wake_handle.value,
+ sizeof(thread_to_main.value)));
+
+ thread.join();
+ iree_wait_set_free(wait_set);
+ iree_event_deinitialize(&thread_to_main);
+ iree_event_deinitialize(&ev_unset_0);
+ iree_event_deinitialize(&ev_unset_1);
+}
+
+// Tests that an iree_wait_any followed by an iree_wait_set_erase properly
+// chooses the right handle to erase.
+TEST(WaitSet, WaitAnyErase) {
+ iree_event_t ev_unset_0, ev_unset_1;
+ iree_event_t ev_set;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset_0));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset_1));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set));
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_0));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset_1));
+
+ // Wait should succeed immediately because ev_set is set (and our wake handle
+ // should be ev_set).
+ iree_wait_handle_t wake_handle;
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+ EXPECT_EQ(0, memcmp(&ev_set.value, &wake_handle.value, sizeof(ev_set.value)));
+
+ // Erase the woken handle.
+ // NOTE: to get the behavior we want to test we must pass wake_handle here and
+ // not the ev_set value.
+ iree_wait_set_erase(wait_set, wake_handle);
+
+ // Try to wait again; this time we should timeout because only ev_unset_*
+ // remains in the set.
+ IREE_EXPECT_STATUS_IS(
+ IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+
+ iree_wait_set_free(wait_set);
+ iree_event_deinitialize(&ev_unset_0);
+ iree_event_deinitialize(&ev_unset_1);
+ iree_event_deinitialize(&ev_set);
+}
+
+// Tests that an iree_wait_any followed by an iree_wait_set_erase properly
+// chooses the right handle to erase (the tail one).
+TEST(WaitSet, WaitAnyEraseTail) {
+ iree_event_t ev_unset, ev_set;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set));
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set));
+
+ // Wait should succeed immediately because ev_set is set (and our wake handle
+ // should be ev_set).
+ iree_wait_handle_t wake_handle;
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+ EXPECT_EQ(0, memcmp(&ev_set.value, &wake_handle.value, sizeof(ev_set.value)));
+
+ // Erase the woken handle.
+ // NOTE: to get the behavior we want to test we must pass wake_handle here and
+ // not the ev_set value.
+ iree_wait_set_erase(wait_set, wake_handle);
+
+ // Try to wait again; this time we should timeout because only ev_unset
+ // remains in the set.
+ IREE_EXPECT_STATUS_IS(
+ IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+
+ iree_wait_set_free(wait_set);
+ iree_event_deinitialize(&ev_unset);
+ iree_event_deinitialize(&ev_set);
+}
+
+// Tests that an iree_wait_any followed by an iree_wait_set_erase without using
+// the wake_handle still erases the correct handle.
+TEST(WaitSet, WaitAnyEraseSplit) {
+ iree_event_t ev_unset, ev_set;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set));
+ iree_wait_set_t* wait_set = NULL;
+ IREE_ASSERT_OK(
+ iree_wait_set_allocate(128, iree_allocator_system(), &wait_set));
+
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_unset));
+ IREE_ASSERT_OK(iree_wait_set_insert(wait_set, ev_set));
+
+ // Wait should succeed immediately because ev_set is set (and our wake handle
+ // should be ev_set).
+ iree_wait_handle_t wake_handle;
+ IREE_ASSERT_OK(
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+ EXPECT_EQ(0, memcmp(&ev_set.value, &wake_handle.value, sizeof(ev_set.value)));
+
+ // Erase the woken handle *WITHOUT* using the wake_handle.
+ iree_wait_set_erase(wait_set, ev_set);
+
+ // Try to wait again; this time we should timeout because only ev_unset
+ // remains in the set.
+ IREE_EXPECT_STATUS_IS(
+ IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_any(wait_set, IREE_TIME_INFINITE_PAST, &wake_handle));
+
+ iree_wait_set_free(wait_set);
+ iree_event_deinitialize(&ev_unset);
+ iree_event_deinitialize(&ev_set);
+}
+
+// Tests iree_wait_one when polling (deadline_ns = IREE_TIME_INFINITE_PAST).
+TEST(WaitSet, WaitOnePolling) {
+ iree_event_t ev_unset, ev_set;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set));
+
+ // Polling (don't block even if unset).
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_one(&ev_unset, IREE_TIME_INFINITE_PAST));
+ IREE_ASSERT_OK(iree_wait_one(&ev_set, IREE_TIME_INFINITE_PAST));
+
+ iree_event_deinitialize(&ev_unset);
+ iree_event_deinitialize(&ev_set);
+}
+
+// Tests iree_wait_one with timeouts (deadline_ns = non-zero).
+TEST(WaitSet, WaitOneTimeout) {
+ iree_event_t ev_unset, ev_set;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &ev_unset));
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &ev_set));
+
+ // Force a timeout by waiting on an event that'll never get set.
+ IREE_EXPECT_STATUS_IS(
+ IREE_STATUS_DEADLINE_EXCEEDED,
+ iree_wait_one(&ev_unset, iree_time_now() + kShortTimeoutNS));
+
+ // Ensure we return immediately when waiting on a set value (and not wait
+ // 100 years because we messed up our math).
+ IREE_ASSERT_OK(iree_wait_one(&ev_set, iree_time_now() + kLongTimeoutNS));
+
+ iree_event_deinitialize(&ev_unset);
+ iree_event_deinitialize(&ev_set);
+}
+
+// Tests iree_wait_one when blocking (deadline_ns = IREE_TIME_INFINITE_FUTURE).
+TEST(WaitSet, WaitOneBlocking) {
+ iree_event_t thread_to_main;
+ IREE_ASSERT_OK(
+ iree_event_initialize(/*initial_state=*/false, &thread_to_main));
+
+ // Wait forever (no timeout).
+ // We approximate that by forking off a thread to signal our local event. We
+ // can assume that a moderate wait is enough to verify the forever behavior as
+ // otherwise we are probably just messing up the math and will timeout.
+ std::thread thread([&]() {
+ // Notify the caller thread after sleeping (to ensure it's not polling).
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ iree_event_set(&thread_to_main);
+ });
+ IREE_ASSERT_OK(iree_wait_one(&thread_to_main, IREE_TIME_INFINITE_FUTURE));
+
+ thread.join();
+ iree_event_deinitialize(&thread_to_main);
+}
+
+} // namespace
+} // namespace iree
+
+#endif // !IREE_WAIT_HANDLE_DISABLED
diff --git a/runtime/src/iree/base/internal/wait_handle_win32.c b/runtime/src/iree/base/internal/wait_handle_win32.c
new file mode 100644
index 0000000..b583a36
--- /dev/null
+++ b/runtime/src/iree/base/internal/wait_handle_win32.c
@@ -0,0 +1,468 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// clang-format off: must be included before all other headers.
+#include "iree/base/internal/wait_handle_impl.h"
+// clang-format on
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/wait_handle.h"
+#include "iree/base/target_platform.h"
+
+#if IREE_WAIT_API == IREE_WAIT_API_WIN32
+
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// Platform utilities
+//===----------------------------------------------------------------------===//
+
+static_assert(
+ sizeof(iree_wait_primitive_value_t) == sizeof(HANDLE),
+ "win32 HANDLE type must match uintptr size in wait primitive struct");
+
+//===----------------------------------------------------------------------===//
+// iree_wait_primitive_* raw calls
+//===----------------------------------------------------------------------===//
+
+// Clones a wait handle such that both the |source_handle| and new
+// |out_target_handle| both reference the same wait primitive. The handle must
+// be closed with iree_wait_handle_close as if it had been created.
+static iree_status_t iree_wait_primitive_clone(
+ iree_wait_handle_t* source_handle, iree_wait_handle_t* out_target_handle) {
+ if (source_handle->type != IREE_WAIT_PRIMITIVE_TYPE_WIN32_HANDLE) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "source wait handle must be a win32 HANDLE");
+ }
+
+ iree_wait_primitive_value_t value;
+ memset(&value, 0, sizeof(value));
+ HANDLE process = GetCurrentProcess();
+ if (!DuplicateHandle(process, (HANDLE)source_handle->value.win32.handle,
+ process, (LPHANDLE)&value.win32.handle, 0, FALSE,
+ DUPLICATE_SAME_ACCESS)) {
+ return iree_make_status(
+ iree_status_code_from_win32_error(GetLastError()),
+ "unable to duplicate HANDLE; possibly out of process handles");
+ }
+ iree_wait_handle_wrap_primitive(IREE_WAIT_PRIMITIVE_TYPE_WIN32_HANDLE, value,
+ out_target_handle);
+ return iree_ok_status();
+}
+
+// Closes an existing handle that was either created manually or via
+// iree_wait_primitive_clone. Must not be called while there are any waiters on
+// the handle.
+void iree_wait_handle_close(iree_wait_handle_t* handle) {
+ if (IREE_LIKELY(handle->value.win32.handle != 0)) {
+ CloseHandle((HANDLE)handle->value.win32.handle);
+ }
+ iree_wait_handle_deinitialize(handle);
+}
+
+// Returns true if the two handles share the same underlying primitive object.
+static bool iree_wait_primitive_compare(const iree_wait_handle_t* lhs,
+ const iree_wait_handle_t* rhs) {
+ if (lhs->type != rhs->type) return false;
+ bool handles_match =
+ memcmp(&lhs->value, &rhs->value, sizeof(lhs->value)) == 0;
+ switch (lhs->type) {
+ case IREE_WAIT_PRIMITIVE_TYPE_WIN32_HANDLE:
+ // Note that multiple HANDLEs may point at the same underlying object
+ // (such as if they have been cloned).
+ return handles_match ||
+ CompareObjectHandles((HANDLE)lhs->value.win32.handle,
+ (HANDLE)rhs->value.win32.handle)
+ ? true
+ : false;
+ default:
+ return handles_match;
+ }
+}
+
+// Returns true if the two handles are identical in representation.
+// Note that two unique handles may point to the same underlying primitive
+// object (such as when they have been cloned); if testing for duplicate
+// primitives prefer iree_wait_primitive_compare.
+static bool iree_wait_primitive_compare_identical(
+ const iree_wait_handle_t* lhs, const iree_wait_handle_t* rhs) {
+ return lhs->type == rhs->type &&
+ memcmp(&lhs->value, &rhs->value, sizeof(lhs->value)) == 0;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_wait_set_t
+//===----------------------------------------------------------------------===//
+
+struct iree_wait_set_t {
+ iree_allocator_t allocator;
+
+ // Total capacity of handles in the set (including duplicates).
+ // This defines the capacity of user_handles and native_handles and to ensure
+ // that we don't get insanely hard to debug behavioral differences when some
+ // handles happen to be duplicates we track the total count against this total
+ // capacity including duplicates.
+ //
+ // If you added 1000 duplicate handles to the set you'd need a handle_capacity
+ // of 1000 even though handle_count (expluding duplicates) would be 1.
+ iree_host_size_t handle_capacity;
+
+ // Total number of handles in the set (including duplicates).
+ // We use this to ensure that we provide consistent capacity errors;
+ iree_host_size_t total_handle_count;
+
+ // Number of handles in the set (excluding duplicates), defining the valid
+ // size of both user_handles and native_handles.
+ iree_host_size_t handle_count;
+
+ // De-duped user-provided handles. iree_wait_handle_t::set_internal.dupe_count
+ // is used to indicate how many additional duplicates there are of a
+ // particular handle. For example, dupe_count=0 means that there are no
+ // duplicates.
+ iree_wait_handle_t* user_handles;
+
+ // Native list of win32 HANDLE we will pass directly to WFMO.
+ // This list may be smaller than the total_handle_count if handles have been
+ // deduplicated.
+ HANDLE* native_handles;
+};
+
+iree_status_t iree_wait_set_allocate(iree_host_size_t capacity,
+ iree_allocator_t allocator,
+ iree_wait_set_t** out_set) {
+ IREE_ASSERT_ARGUMENT(out_set);
+
+ // Be reasonable; 64 MAXIMUM_WAIT_OBJECTS is low, but 64K objects is too high.
+ if (capacity >= UINT16_MAX) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "wait set capacity of %zu is unreasonably large",
+ capacity);
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_host_size_t user_handle_list_size =
+ capacity * sizeof(iree_wait_handle_t);
+ iree_host_size_t native_handle_list_size = capacity * sizeof(HANDLE);
+ iree_host_size_t total_size = iree_sizeof_struct(iree_wait_set_t) +
+ user_handle_list_size + native_handle_list_size;
+
+ iree_wait_set_t* set = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(allocator, total_size, (void**)&set));
+ set->allocator = allocator;
+ set->handle_capacity = capacity;
+ iree_wait_set_clear(set);
+
+ set->user_handles =
+ (iree_wait_handle_t*)((uint8_t*)set +
+ iree_sizeof_struct(iree_wait_set_t));
+ set->native_handles =
+ (HANDLE*)((uint8_t*)set->user_handles + user_handle_list_size);
+
+ *out_set = set;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+void iree_wait_set_free(iree_wait_set_t* set) {
+ if (!set) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_allocator_free(set->allocator, set);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+bool iree_wait_set_is_empty(const iree_wait_set_t* set) {
+ return set->handle_count != 0;
+}
+
+iree_status_t iree_wait_set_insert(iree_wait_set_t* set,
+ iree_wait_handle_t handle) {
+ if (set->total_handle_count + 1 > set->handle_capacity) {
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "wait set capacity %" PRIhsz
+ " reached; no more wait handles available",
+ set->handle_capacity);
+ }
+
+ // First check to see if we already have the handle in the set; since APIs
+ // like WFMO don't allow duplicate handles in their arguments this is our
+ // workaround (with the benefit of also reducing the native handle count).
+ for (iree_host_size_t i = 0; i < set->handle_count; ++i) {
+ iree_wait_handle_t* existing_handle = &set->user_handles[i];
+ if (iree_wait_primitive_compare_identical(existing_handle, &handle)) {
+ // Handle already exists in the set; just increment the reference count.
+ ++existing_handle->set_internal.dupe_count;
+ ++set->total_handle_count;
+ return iree_ok_status();
+ }
+ }
+
+ HANDLE native_handle = NULL;
+ if (IREE_LIKELY(handle.type == IREE_WAIT_PRIMITIVE_TYPE_WIN32_HANDLE)) {
+ // Our normal handle type; pass-through below.
+ native_handle = (HANDLE)handle.value.win32.handle;
+ } else {
+ return iree_make_status(
+ IREE_STATUS_UNIMPLEMENTED,
+ "unimplemented primitive type %d (expected PERMANENT/WIN32_HANDLE)",
+ (int)handle.type);
+ }
+
+ // There's a max of 64 waitable handles. If we want to support more than that
+ // we can spawn threads to wait on 64 objects and then wait on all those
+ // threads. For example:
+ // iree_wait_multi(...180 handles...):
+ // -> spawn th0 and wait on handles 0-63 (64 handles)
+ // -> spawn th1 and wait on handles 64-127 (64 handles)
+ // wait on [th0, th1, handles 128-179] (threads + 52 remaining handles)
+ //
+ // At the point you're multiwaiting on that many things, though, it indicates
+ // that there may be higher level coalescing that can be done by the
+ // application itself (by, say, multiplexing sockets onto a single fd instead
+ // of trying to wait on every unique socket handle via this API).
+ if (native_handle &&
+ IREE_UNLIKELY(set->handle_count + 1 > MAXIMUM_WAIT_OBJECTS)) {
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "max wait objects exceeded; only up to %d native "
+ "wait handles are supported in WFMO",
+ (int)MAXIMUM_WAIT_OBJECTS);
+ }
+
+ ++set->total_handle_count;
+ iree_host_size_t index = set->handle_count++;
+ iree_wait_handle_t* user_handle = &set->user_handles[index];
+ iree_wait_handle_wrap_primitive(handle.type, handle.value, user_handle);
+ user_handle->set_internal.dupe_count = 0; // just us so far
+ set->native_handles[index] = native_handle;
+
+ return iree_ok_status();
+}
+
+void iree_wait_set_erase(iree_wait_set_t* set, iree_wait_handle_t handle) {
+ // Find the user handle in the set. This either requires a linear scan to
+ // find the matching user handle or - if valid - we can use the native index
+ // set after an iree_wait_any wake to do a quick lookup.
+ iree_host_size_t index = handle.set_internal.index;
+ if (IREE_UNLIKELY(index >= set->handle_count) ||
+ IREE_UNLIKELY(!iree_wait_primitive_compare_identical(
+ &set->user_handles[index], &handle))) {
+ // Fallback to a linear scan of (hopefully) a small list.
+ for (iree_host_size_t i = 0; i < set->handle_count; ++i) {
+ if (iree_wait_primitive_compare_identical(&set->user_handles[i],
+ &handle)) {
+ index = i;
+ break;
+ }
+ }
+ }
+
+ // Decrement reference count.
+ iree_wait_handle_t* existing_handle = &set->user_handles[index];
+ if (existing_handle->set_internal.dupe_count-- > 0) {
+ // Still one or more remaining in the set; leave it in the handle list.
+ --set->total_handle_count;
+ return;
+ }
+
+ // No more references remaining; remove from both handle lists.
+ // Since we make no guarantees about the order of the lists we can just swap
+ // with the last value.
+ int tail_index = (int)set->handle_count - 1;
+ if (tail_index > index) {
+ memcpy(&set->native_handles[index], &set->native_handles[tail_index],
+ sizeof(*set->native_handles));
+ memcpy(&set->user_handles[index], &set->user_handles[tail_index],
+ sizeof(*set->user_handles));
+ }
+ --set->total_handle_count;
+ --set->handle_count;
+}
+
+void iree_wait_set_clear(iree_wait_set_t* set) {
+ set->total_handle_count = 0;
+ set->handle_count = 0;
+}
+
+static iree_status_t iree_wait_multi(iree_wait_set_t* set, bool require_all,
+ iree_time_t deadline_ns,
+ iree_wait_handle_t* out_wake_handle) {
+ // TODO(benvanik): see if we can use tracy's mutex tracking to make waits
+ // nicer (at least showing signal->wait relations).
+
+ // Early-exit when there's nothing to wait on.
+ if (set->handle_count == 0) {
+ if (out_wake_handle) memset(out_wake_handle, 0, sizeof(*out_wake_handle));
+ return iree_ok_status();
+ }
+
+ // Remap absolute timeout to relative timeout, handling special values as
+ // needed.
+ DWORD timeout_ms = iree_absolute_deadline_to_timeout_ms(deadline_ns);
+
+ // Perform the wait; this is allowed to yield the calling thread even if the
+ // timeout_ms is 0 to indicate a poll.
+ DWORD result =
+ WaitForMultipleObjectsEx(set->handle_count, set->native_handles,
+ /*bWaitAll=*/(require_all ? TRUE : FALSE),
+ timeout_ms, /*bAlertable=*/FALSE);
+
+ if (result == WAIT_TIMEOUT) {
+ // Timeout elapsed while waiting; note that the timeout may have been 0 to
+ // force a poll and be an expected result. We avoid a full status object
+ // here as we don't want to track all that in non-exceptional cases.
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ } else if (result >= WAIT_OBJECT_0 &&
+ result < WAIT_OBJECT_0 + set->handle_count) {
+ // One (or more) handles were signaled sucessfully.
+ if (out_wake_handle) {
+ DWORD wake_index = result - WAIT_OBJECT_0;
+ iree_wait_primitive_value_t wake_value;
+ memset(&wake_value, 0, sizeof(wake_value));
+ wake_value.win32.handle = (uintptr_t)set->native_handles[wake_index];
+ iree_wait_handle_wrap_primitive(IREE_WAIT_PRIMITIVE_TYPE_WIN32_HANDLE,
+ wake_value, out_wake_handle);
+
+ // Optimization for wait-wake-erase; this lets us avoid scanning the
+ // native handle list (the kernel already did that for us!).
+ out_wake_handle->set_internal.index = wake_index;
+ }
+ return iree_ok_status();
+ } else if (result >= WAIT_ABANDONED_0 &&
+ result < WAIT_ABANDONED_0 + set->handle_count) {
+ // One (or more) mutex handles were abandonded during the wait.
+ // This happens when a thread holding the mutex dies without releasing it.
+ // This is less common in-process and more for the cross-process situations
+ // where we have duped/opened a remote handle and the remote process dies.
+ // That's a pretty situation but not quite unheard of in sandboxing impls
+ // where death is a feature.
+ //
+ // NOTE: we shouldn't get abandoned handles in regular cases - both because
+ // we don't really use mutex handles (though users may provide them) and
+ // that mutex abandonment is exceptional. If you see this you are probably
+ // going to want to look for thread exit messages or zombie processes.
+ DWORD wake_index = result - WAIT_ABANDONED_0;
+ return iree_make_status(
+ IREE_STATUS_DATA_LOSS,
+ "mutex native handle %lu abanonded; shared state is "
+ "(likely) inconsistent",
+ wake_index);
+ } else if (result == WAIT_FAILED) {
+ return iree_make_status(iree_status_code_from_win32_error(GetLastError()),
+ "WFMO failed");
+ } else {
+ return iree_make_status(IREE_STATUS_INTERNAL,
+ "WFMO internal error (unimplemented APC?)");
+ }
+}
+
+iree_status_t iree_wait_all(iree_wait_set_t* set, iree_time_t deadline_ns) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status =
+ iree_wait_multi(set, /*require_all=*/true, deadline_ns, NULL);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_status_t iree_wait_any(iree_wait_set_t* set, iree_time_t deadline_ns,
+ iree_wait_handle_t* out_wake_handle) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status =
+ iree_wait_multi(set, /*require_all=*/false, deadline_ns, out_wake_handle);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_status_t iree_wait_one(iree_wait_handle_t* handle,
+ iree_time_t deadline_ns) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Remap absolute timeout to relative timeout, handling special values as
+ // needed.
+ DWORD timeout_ms = iree_absolute_deadline_to_timeout_ms(deadline_ns);
+
+ // Perform the wait; this is allowed to yield the calling thread even if the
+ // timeout_ms is 0 to indicate a poll.
+ DWORD result =
+ WaitForSingleObjectEx((HANDLE)handle->value.win32.handle, timeout_ms,
+ /*bAlertable=*/FALSE);
+
+ iree_status_t status;
+ if (result == WAIT_TIMEOUT) {
+ // Timeout elapsed while waiting; note that the timeout may have been 0 to
+ // force a poll and be an expected result. We avoid a full status object
+ // here as we don't want to track all that in non-exceptional cases.
+ status = iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ } else if (result == WAIT_OBJECT_0) {
+ // Handle was signaled successfully.
+ status = iree_ok_status();
+ } else if (result == WAIT_ABANDONED_0) {
+ // The mutex handle was abandonded during the wait.
+ // This happens when a thread holding the mutex dies without releasing it.
+ // This is less common in-process and more for the cross-process situations
+ // where we have duped/opened a remote handle and the remote process dies.
+ // That's a pretty situation but not quite unheard of in sandboxing impls
+ // where death is a feature.
+ //
+ // NOTE: we shouldn't get abandoned handles in regular cases - both because
+ // we don't really use mutex handles (though users may provide them) and
+ // that mutex abandonment is exceptional. If you see this you are probably
+ // going to want to look for thread exit messages or zombie processes.
+ status = iree_make_status(IREE_STATUS_DATA_LOSS,
+ "mutex native handle abanonded; shared state is "
+ "(likely) inconsistent");
+ } else if (result == WAIT_FAILED) {
+ status = iree_make_status(iree_status_code_from_win32_error(GetLastError()),
+ "WFSO failed");
+ } else {
+ status = iree_make_status(IREE_STATUS_INTERNAL,
+ "WFSO internal error (unimplemented APC?)");
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_event_t
+//===----------------------------------------------------------------------===//
+
+iree_status_t iree_event_initialize(bool initial_state,
+ iree_event_t* out_event) {
+ memset(out_event, 0, sizeof(*out_event));
+ iree_wait_primitive_value_t value;
+ memset(&value, 0, sizeof(value));
+ value.win32.handle =
+ (uintptr_t)CreateEvent(NULL, TRUE, initial_state ? TRUE : FALSE, NULL);
+ if (!value.win32.handle) {
+ return iree_make_status(iree_status_code_from_win32_error(GetLastError()),
+ "unable to create event");
+ }
+ iree_wait_handle_wrap_primitive(IREE_WAIT_PRIMITIVE_TYPE_WIN32_HANDLE, value,
+ out_event);
+ return iree_ok_status();
+}
+
+void iree_event_deinitialize(iree_event_t* event) {
+ iree_wait_handle_close(event);
+}
+
+void iree_event_set(iree_event_t* event) {
+ HANDLE handle = (HANDLE)event->value.win32.handle;
+ if (handle) SetEvent(handle);
+}
+
+void iree_event_reset(iree_event_t* event) {
+ HANDLE handle = (HANDLE)event->value.win32.handle;
+ if (handle) ResetEvent(handle);
+}
+
+#endif // IREE_WAIT_API == IREE_WAIT_API_WIN32
diff --git a/runtime/src/iree/base/logging.cc b/runtime/src/iree/base/logging.cc
new file mode 100644
index 0000000..b27342f
--- /dev/null
+++ b/runtime/src/iree/base/logging.cc
@@ -0,0 +1,189 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/logging.h"
+
+#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
+
+#ifdef __ANDROID__
+#include <android/log.h>
+#endif
+
+#include "iree/base/internal/flags.h"
+#include "iree/base/tracing.h"
+
+IREE_FLAG(int32_t, iree_minloglevel, 0,
+ "Minimum logging level. 0 = INFO and above.");
+IREE_FLAG(int32_t, iree_v, 0,
+ "Verbosity level maximum. 1 = IREE_VLOG(0-1), 2 = IREE_VLOG(0-2).");
+
+namespace iree {
+namespace internal {
+
+namespace {
+
+// Parse log level (int64_t) from environment variable (char*).
+// Returns true if the value was present and parsed successfully.
+bool LogLevelStrToInt(const char* iree_env_var_val, int64_t* out_level) {
+ *out_level = 0;
+ if (iree_env_var_val == nullptr) {
+ return false;
+ }
+
+ std::string min_log_level(iree_env_var_val);
+ std::istringstream ss(min_log_level);
+ int64_t level;
+ if (!(ss >> level)) {
+ // Invalid vlog level setting, set level to default (0).
+ return false;
+ }
+
+ *out_level = level;
+ return true;
+}
+
+int64_t MinLogLevelFromEnv() {
+ const char* iree_env_var_val = getenv("IREE_MIN_LOG_LEVEL");
+ int64_t level = 0;
+ if (LogLevelStrToInt(iree_env_var_val, &level)) {
+ return level;
+ }
+ return FLAG_iree_minloglevel;
+}
+
+int64_t MinVLogLevelFromEnv() {
+ const char* iree_env_var_val = getenv("IREE_MIN_VLOG_LEVEL");
+ int64_t level = 0;
+ if (LogLevelStrToInt(iree_env_var_val, &level)) {
+ return level;
+ }
+ return FLAG_iree_v;
+}
+
+} // namespace
+
+LogMessage::LogMessage(const char* file_name, int line, int severity)
+ : file_name_(file_name), line_(line), severity_(severity) {}
+
+LogMessage::~LogMessage() {
+ // Read the min log level once during the first call to logging.
+ static int64_t min_log_level = MinLogLevelFromEnv();
+ if (IREE_LIKELY(severity_ >= min_log_level)) {
+ EmitLogMessage();
+ }
+}
+
+int64_t LogMessage::MinVLogLevel() {
+ static int64_t min_vlog_level = MinVLogLevelFromEnv();
+ return min_vlog_level;
+}
+
+void LogMessage::EmitLogMessage() {
+ // TODO(scotttodd): Include current system time
+ fprintf(stderr, "%c %s:%d] %s\n", "IWEF"[severity_], file_name_, line_,
+ str().c_str());
+
+#if defined(__ANDROID__)
+ // Define equivalent android log levels to map to IREE.
+ constexpr int kStatusToAndroidLevel[4] = {
+ 4, // Android info
+ 5, // Android waring
+ 6, // Android error
+ 6 // Android fatal (doesn't exist, so reusing error)
+ };
+
+ // NOTE: this truncates. That's fine for now and stderr is still usable.
+ int android_severity = kStatusToAndroidLevel[severity_];
+ {
+ // NOTE: this truncates. That's fine for now and stderr is still usable.
+ char str_buffer[512];
+ snprintf(str_buffer, sizeof(str_buffer), "%s:%d] %s\n", file_name_, line_,
+ str().c_str());
+ __android_log_write(android_severity, "native", str_buffer);
+ }
+#endif // !defined(__ANDROID__)
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_LOG_MESSAGES
+ constexpr int kLevelColors[4] = {
+ IREE_TRACING_MESSAGE_LEVEL_INFO, // INFO
+ IREE_TRACING_MESSAGE_LEVEL_WARNING, // WARNING
+ IREE_TRACING_MESSAGE_LEVEL_ERROR, // ERROR
+ IREE_TRACING_MESSAGE_LEVEL_ERROR, // FATAL
+ };
+ {
+ // NOTE: this truncates. That's fine for now and stderr is still usable.
+ char str_buffer[512];
+ int str_length = snprintf(str_buffer, sizeof(str_buffer), "%s:%d] %s\n",
+ file_name_, line_, str().c_str());
+ IREE_TRACE_MESSAGE_DYNAMIC_COLORED(kLevelColors[severity_], str_buffer,
+ str_length);
+ }
+#endif // IREE_TRACING_FEATURES& IREE_TRACING_FEATURE_LOG_MESSAGES
+}
+
+LogMessageFatal::LogMessageFatal(const char* file, int line)
+ : LogMessage(file, line, FATAL) {}
+
+LogMessageFatal::~LogMessageFatal() {
+ EmitLogMessage();
+
+ // abort() ensures we don't return (as promised via ATTRIBUTE_NORETURN).
+ abort();
+}
+
+template <>
+void MakeCheckOpValueString(std::ostream* os, const char& v) {
+ if (v >= 32 && v <= 126) {
+ (*os) << "'" << v << "'";
+ } else {
+ (*os) << "char value " << static_cast<int16_t>(v);
+ }
+}
+
+template <>
+void MakeCheckOpValueString(std::ostream* os, const int8_t& v) {
+ if (v >= 32 && v <= 126) {
+ (*os) << "'" << v << "'";
+ } else {
+ (*os) << "signed char value " << static_cast<int16_t>(v);
+ }
+}
+
+template <>
+void MakeCheckOpValueString(std::ostream* os, const uint8_t& v) {
+ if (v >= 32 && v <= 126) {
+ (*os) << "'" << v << "'";
+ } else {
+ (*os) << "unsigned char value " << static_cast<uint16_t>(v);
+ }
+}
+
+template <>
+void MakeCheckOpValueString(std::ostream* os, const std::nullptr_t& v) {
+ (*os) << "nullptr";
+}
+
+CheckOpMessageBuilder::CheckOpMessageBuilder(const char* exprtext)
+ : stream_(new std::ostringstream) {
+ *stream_ << "Check failed: " << exprtext << " (";
+}
+
+CheckOpMessageBuilder::~CheckOpMessageBuilder() { delete stream_; }
+
+std::ostream* CheckOpMessageBuilder::ForVar2() {
+ *stream_ << " vs. ";
+ return stream_;
+}
+
+std::string* CheckOpMessageBuilder::NewString() {
+ *stream_ << ")";
+ return new std::string(stream_->str());
+}
+
+} // namespace internal
+} // namespace iree
diff --git a/runtime/src/iree/base/logging.h b/runtime/src/iree/base/logging.h
new file mode 100644
index 0000000..3051dc3
--- /dev/null
+++ b/runtime/src/iree/base/logging.h
@@ -0,0 +1,374 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+//===----------------------------------------------------------------------===//
+// //
+// ( ( ( ( //
+// )\ ) )\ ) )\ ) ( ( * ) )\ ) //
+// (()/( ( (()/( (()/( ( )\ )\ ` ) /( ( (()/( //
+// /(_)) )\ /(_)) /(_)) )\ (((_) ((((_)( ( )(_)) )\ /(_)) //
+// (_))_ ((_) (_)) (_)) ((_) )\___ )\ _ )\ (_(_()) ((_) (_))_ //
+// | \ | __| | _ \ | _ \ | __| ((/ __| (_)_\(_) |_ _| | __| | \ //
+// | |) | | _| | _/ | / | _| | (__ / _ \ | | | _| | |) | //
+// |___/ |___| |_| |_|_\ |___| \___| /_/ \_\ |_| |___| |___/ //
+// //
+//===----------------------------------------------------------------------===//
+// TODO(#2843): replace this file with a C sink API. IREE itself should not
+// perform any logging by default and instead route all logging through a
+// pluggable interface (similar to how we have iree_allocator_t to plug in
+// allocators). This will allow applications to scope their logging (critical
+// in multi-tenant situations where logs need to route back to clients), bring
+// their own logging libraries, and support logging on platforms we otherwise
+// cannot. The code in this file is currently C++ only and not great.
+
+#ifndef IREE_BASE_LOGGING_H_
+#define IREE_BASE_LOGGING_H_
+
+// IREE_LOG(severity) << ...;
+// Logs a message at the given severity.
+// Severity:
+// INFO Logs information text.
+// WARNING Logs a warning.
+// ERROR Logs an error.
+// FATAL Logs an error and exit(1).
+//
+// IREE_DLOG(severity) << ...;
+// Behaves like `IREE_LOG` in debug mode (i.e. `#ifndef NDEBUG`).
+// Otherwise, it compiles away and does nothing.
+//
+// IREE_VLOG(level) << ...;
+// Logs a verbose message at the given verbosity level.
+//
+// IREE_DVLOG(level) << ...;
+// Behaves like `IREE_VLOG` in debug mode (i.e. `#ifndef NDEBUG`).
+// Otherwise, it compiles away and does nothing.
+//
+// IREE_CHECK(condition) << ...;
+// Runtime asserts that the given condition is true even in release builds.
+// It's recommended that IREE_DCHECK is used instead as too many CHECKs
+// can impact performance.
+//
+// IREE_CHECK_EQ|NE|LT|GT|LE|GE(val1, val2) << ...;
+// Runtime assert the specified operation with the given values.
+//
+// IREE_DCHECK(condition) << ...;
+// Runtime asserts that the given condition is true only in non-opt builds.
+//
+// IREE_DCHECK_EQ|NE|LT|GT|LE|GE(val1, val2) << ...;
+// Runtime assert the specified operation with the given values in non-opt
+// builds.
+//
+// IREE_QCHECK(condition) << ...;
+// IREE_QCHECK_EQ|NE|LT|GT|LE|GE(val1, val2) << ...;
+// These behave like `IREE_CHECK` but do not print a full stack trace.
+// They are useful when problems are definitely unrelated to program flow,
+// e.g. when validating user input.
+
+#include <cstddef>
+#include <cstdint>
+#include <ios>
+#include <limits>
+#include <sstream>
+#include <string>
+
+#include "iree/base/attributes.h"
+
+namespace iree {
+
+// ------------------------------------------------------------------------- //
+// | IREE_LOG | //
+// ------------------------------------------------------------------------- //
+
+// Severity levels for IREE_LOG().
+const int INFO = 0;
+const int WARNING = 1;
+const int ERROR = 2;
+const int FATAL = 3;
+
+namespace internal {
+
+class LogMessage : public std::basic_ostringstream<char> {
+ public:
+ LogMessage(const char* file_name, int line, int severity);
+ ~LogMessage();
+
+ const char* file_name() const { return file_name_; }
+ int line() const { return line_; }
+ int severity() const { return severity_; }
+
+ // Returns the minimum log level for IREE_VLOG statements.
+ // E.g., if MinVLogLevel() is 2, then IREE_VLOG(2) statements will produce
+ // output, but IREE_VLOG(3) will not. Defaults to 0.
+ static int64_t MinVLogLevel();
+
+ protected:
+ void EmitLogMessage();
+
+ private:
+ const char* file_name_;
+ int line_;
+ int severity_;
+};
+
+// LogMessageFatal ensures the process exits in failure after logging a message.
+class LogMessageFatal : public LogMessage {
+ public:
+ LogMessageFatal(const char* file, int line) IREE_ATTRIBUTE_COLD;
+ IREE_ATTRIBUTE_NORETURN ~LogMessageFatal();
+};
+
+// NullStream implements operator<< but does nothing.
+class NullStream {
+ public:
+ NullStream& stream() { return *this; }
+};
+template <typename T>
+inline NullStream& operator<<(NullStream& str, const T&) {
+ return str;
+}
+inline NullStream& operator<<(NullStream& str,
+ std::ostream& (*)(std::ostream& os)) {
+ return str;
+}
+inline NullStream& operator<<(NullStream& str,
+ std::ios_base& (*)(std::ios_base& os)) {
+ return str;
+}
+
+#define _IREE_LOG_INFO \
+ ::iree::internal::LogMessage(__FILE__, __LINE__, ::iree::INFO)
+#define _IREE_LOG_WARNING \
+ ::iree::internal::LogMessage(__FILE__, __LINE__, ::iree::WARNING)
+#define _IREE_LOG_ERROR \
+ ::iree::internal::LogMessage(__FILE__, __LINE__, ::iree::ERROR)
+#define _IREE_LOG_FATAL ::iree::internal::LogMessageFatal(__FILE__, __LINE__)
+
+#define IREE_LOG(severity) _IREE_LOG_##severity
+
+#ifndef NDEBUG
+#define IREE_DLOG IREE_LOG
+#else
+#define IREE_DLOG(severity) \
+ switch (0) \
+ default: \
+ ::iree::internal::NullStream().stream()
+#endif
+
+#define IREE_VLOG_IS_ON(lvl) \
+ ((lvl) <= ::iree::internal::LogMessage::MinVLogLevel())
+
+#define IREE_VLOG(lvl) \
+ if (IREE_UNLIKELY(IREE_VLOG_IS_ON(lvl))) \
+ ::iree::internal::LogMessage(__FILE__, __LINE__, ::iree::INFO)
+
+// `IREE_DVLOG` behaves like `IREE_VLOG` in debug mode (i.e. `#ifndef NDEBUG`).
+// Otherwise, it compiles away and does nothing.
+#ifndef NDEBUG
+#define IREE_DVLOG IREE_VLOG
+#else
+#define IREE_DVLOG(verbose_level) \
+ while (false && (verbose_level) > 0) ::iree::internal::NullStream().stream()
+#endif // !NDEBUG
+
+// ------------------------------------------------------------------------- //
+// | IREE_CHECK | //
+// ------------------------------------------------------------------------- //
+
+// IREE_CHECK dies with a fatal error if condition is not true. It is *not*
+// controlled by NDEBUG, so the check will be executed regardless of
+// compilation mode. Therefore, it is safe to do things like:
+// IREE_CHECK(fp->Write(x) == 4)
+#define IREE_CHECK(condition) \
+ if (IREE_UNLIKELY(!(condition))) \
+ IREE_LOG(FATAL) << "Check failed: " #condition " "
+
+// Function is overloaded for integral types to allow static const
+// integrals declared in classes and not defined to be used as arguments to
+// IREE_CHECK* macros. It's not encouraged though.
+template <typename T>
+inline const T& GetReferenceableValue(const T& t) {
+ return t;
+}
+inline char GetReferenceableValue(char t) { return t; }
+inline int8_t GetReferenceableValue(int8_t t) { return t; }
+inline uint8_t GetReferenceableValue(uint8_t t) { return t; }
+inline int16_t GetReferenceableValue(int16_t t) { return t; }
+inline uint16_t GetReferenceableValue(uint16_t t) { return t; }
+inline int32_t GetReferenceableValue(int32_t t) { return t; }
+inline uint32_t GetReferenceableValue(uint32_t t) { return t; }
+inline int64_t GetReferenceableValue(int64_t t) { return t; }
+inline uint64_t GetReferenceableValue(uint64_t t) { return t; }
+
+// This formats a value for a failing IREE_CHECK_XX statement. Ordinarily,
+// it uses the definition for operator<<, with a few special cases below.
+template <typename T>
+inline void MakeCheckOpValueString(std::ostream* os, const T& v) {
+ (*os) << v;
+}
+
+// Overrides for char types provide readable values for unprintable
+// characters.
+template <>
+void MakeCheckOpValueString(std::ostream* os, const char& v);
+template <>
+void MakeCheckOpValueString(std::ostream* os, const int8_t& v);
+template <>
+void MakeCheckOpValueString(std::ostream* os, const uint8_t& v);
+// We need an explicit specialization for std::nullptr_t.
+template <>
+void MakeCheckOpValueString(std::ostream* os, const std::nullptr_t& v);
+
+// A container for a string pointer which can be evaluated to a bool -
+// true iff the pointer is non-NULL.
+struct CheckOpString {
+ CheckOpString(std::string* str) : str_(str) {} // NOLINT
+ // No destructor: if str_ is non-NULL, we're about to IREE_LOG(FATAL),
+ // so there's no point in cleaning up str_.
+ operator bool() const { return IREE_UNLIKELY(str_ != NULL); }
+ std::string* str_;
+};
+
+// Build the error message string. Specify no inlining for code size.
+template <typename T1, typename T2>
+std::string* MakeCheckOpString(const T1& v1, const T2& v2,
+ const char* exprtext) IREE_ATTRIBUTE_NOINLINE;
+
+// A helper class for formatting "expr (V1 vs. V2)" in a IREE_CHECK_XX
+// statement. See MakeCheckOpString for sample usage.
+class CheckOpMessageBuilder {
+ public:
+ // Inserts "exprtext" and " (" to the stream.
+ explicit CheckOpMessageBuilder(const char* exprtext);
+ // Deletes "stream_".
+ ~CheckOpMessageBuilder();
+ // For inserting the first variable.
+ std::ostream* ForVar1() { return stream_; }
+ // For inserting the second variable (adds an intermediate " vs. ").
+ std::ostream* ForVar2();
+ // Get the result (inserts the closing ")").
+ std::string* NewString();
+
+ private:
+ std::ostringstream* stream_;
+};
+
+template <typename T1, typename T2>
+std::string* MakeCheckOpString(const T1& v1, const T2& v2,
+ const char* exprtext) {
+ CheckOpMessageBuilder comb(exprtext);
+ MakeCheckOpValueString(comb.ForVar1(), v1);
+ MakeCheckOpValueString(comb.ForVar2(), v2);
+ return comb.NewString();
+}
+
+// Helper functions for IREE_CHECK_OP macro.
+// The (int, int) specialization works around the issue that the compiler
+// will not instantiate the template version of the function on values of
+// unnamed enum type - see comment below.
+// The (size_t, int) and (int, size_t) specialization are to handle unsigned
+// comparison errors while still being thorough with the comparison.
+#define _IREE_DEFINE_CHECK_OP_IMPL(name, op) \
+ template <typename T1, typename T2> \
+ inline std::string* name##Impl(const T1& v1, const T2& v2, \
+ const char* exprtext) { \
+ if (IREE_LIKELY(v1 op v2)) \
+ return NULL; \
+ else \
+ return ::iree::internal::MakeCheckOpString(v1, v2, exprtext); \
+ } \
+ inline std::string* name##Impl(int v1, int v2, const char* exprtext) { \
+ return name##Impl<int, int>(v1, v2, exprtext); \
+ } \
+ inline std::string* name##Impl(const size_t v1, const int v2, \
+ const char* exprtext) { \
+ if (IREE_UNLIKELY(v2 < 0)) { \
+ return ::iree::internal::MakeCheckOpString(v1, v2, exprtext); \
+ } \
+ const size_t uval = (size_t)((unsigned)v1); \
+ return name##Impl<size_t, size_t>(uval, v2, exprtext); \
+ } \
+ inline std::string* name##Impl(const int v1, const size_t v2, \
+ const char* exprtext) { \
+ if (IREE_UNLIKELY(v2 >= std::numeric_limits<int>::max())) { \
+ return ::iree::internal::MakeCheckOpString(v1, v2, exprtext); \
+ } \
+ const size_t uval = (size_t)((unsigned)v2); \
+ return name##Impl<size_t, size_t>(v1, uval, exprtext); \
+ }
+
+_IREE_DEFINE_CHECK_OP_IMPL(Check_EQ, ==)
+_IREE_DEFINE_CHECK_OP_IMPL(Check_NE, !=)
+_IREE_DEFINE_CHECK_OP_IMPL(Check_LE, <=)
+_IREE_DEFINE_CHECK_OP_IMPL(Check_LT, <)
+_IREE_DEFINE_CHECK_OP_IMPL(Check_GE, >=)
+_IREE_DEFINE_CHECK_OP_IMPL(Check_GT, >)
+#undef _IREE_DEFINE_CHECK_OP_IMPL
+
+// In optimized mode, use CheckOpString to hint to compiler that
+// the while condition is unlikely.
+#define IREE_CHECK_OP_LOG(name, op, val1, val2) \
+ while (::iree::internal::CheckOpString _result = \
+ ::iree::internal::name##Impl( \
+ ::iree::internal::GetReferenceableValue(val1), \
+ ::iree::internal::GetReferenceableValue(val2), \
+ #val1 " " #op " " #val2)) \
+ ::iree::internal::LogMessageFatal(__FILE__, __LINE__) << *(_result.str_)
+
+#define IREE_CHECK_OP(name, op, val1, val2) \
+ IREE_CHECK_OP_LOG(name, op, val1, val2)
+
+// IREE_CHECK_EQ/NE/...
+#define IREE_CHECK_EQ(val1, val2) IREE_CHECK_OP(Check_EQ, ==, val1, val2)
+#define IREE_CHECK_NE(val1, val2) IREE_CHECK_OP(Check_NE, !=, val1, val2)
+#define IREE_CHECK_LE(val1, val2) IREE_CHECK_OP(Check_LE, <=, val1, val2)
+#define IREE_CHECK_LT(val1, val2) IREE_CHECK_OP(Check_LT, <, val1, val2)
+#define IREE_CHECK_GE(val1, val2) IREE_CHECK_OP(Check_GE, >=, val1, val2)
+#define IREE_CHECK_GT(val1, val2) IREE_CHECK_OP(Check_GT, >, val1, val2)
+
+#ifndef NDEBUG
+#define IREE_DCHECK(condition) IREE_CHECK(condition)
+#define IREE_DCHECK_EQ(val1, val2) IREE_CHECK_EQ(val1, val2)
+#define IREE_DCHECK_NE(val1, val2) IREE_CHECK_NE(val1, val2)
+#define IREE_DCHECK_LE(val1, val2) IREE_CHECK_LE(val1, val2)
+#define IREE_DCHECK_LT(val1, val2) IREE_CHECK_LT(val1, val2)
+#define IREE_DCHECK_GE(val1, val2) IREE_CHECK_GE(val1, val2)
+#define IREE_DCHECK_GT(val1, val2) IREE_CHECK_GT(val1, val2)
+
+#else
+
+#define IREE_DCHECK(condition) \
+ while (false && (condition)) IREE_LOG(FATAL)
+
+// NDEBUG is defined, so IREE_DCHECK_EQ(x, y) and so on do nothing.
+// However, we still want the compiler to parse x and y, because
+// we don't want to lose potentially useful errors and warnings.
+// _IREE_DCHECK_NOP is a helper, and should not be used outside of this file.
+#define _IREE_DCHECK_NOP(x, y) \
+ while (false && ((void)(x), (void)(y), 0)) IREE_LOG(FATAL)
+
+#define IREE_DCHECK_EQ(x, y) _IREE_DCHECK_NOP(x, y)
+#define IREE_DCHECK_NE(x, y) _IREE_DCHECK_NOP(x, y)
+#define IREE_DCHECK_LE(x, y) _IREE_DCHECK_NOP(x, y)
+#define IREE_DCHECK_LT(x, y) _IREE_DCHECK_NOP(x, y)
+#define IREE_DCHECK_GE(x, y) _IREE_DCHECK_NOP(x, y)
+#define IREE_DCHECK_GT(x, y) _IREE_DCHECK_NOP(x, y)
+
+#endif // !NDEBUG
+
+// These are for when you don't want a IREE_CHECK failure to print a verbose
+// stack trace. The implementation of IREE_CHECK* in this file already doesn't.
+#define IREE_QCHECK(condition) IREE_CHECK(condition)
+#define IREE_QCHECK_EQ(x, y) IREE_CHECK_EQ(x, y)
+#define IREE_QCHECK_NE(x, y) IREE_CHECK_NE(x, y)
+#define IREE_QCHECK_LE(x, y) IREE_CHECK_LE(x, y)
+#define IREE_QCHECK_LT(x, y) IREE_CHECK_LT(x, y)
+#define IREE_QCHECK_GE(x, y) IREE_CHECK_GE(x, y)
+#define IREE_QCHECK_GT(x, y) IREE_CHECK_GT(x, y)
+
+} // namespace internal
+} // namespace iree
+
+#endif // IREE_BASE_LOGGING_H_
diff --git a/runtime/src/iree/base/loop.c b/runtime/src/iree/base/loop.c
new file mode 100644
index 0000000..00dd83d
--- /dev/null
+++ b/runtime/src/iree/base/loop.c
@@ -0,0 +1,203 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/loop.h"
+
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// iree_loop_t
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_loop_call(iree_loop_t loop,
+ iree_loop_priority_t priority,
+ iree_loop_callback_fn_t callback,
+ void* user_data) {
+ if (IREE_UNLIKELY(!loop.ctl)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "null loop");
+ }
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ const iree_loop_call_params_t params = {
+ .callback =
+ {
+ .fn = callback,
+ .user_data = user_data,
+ },
+ .priority = priority,
+ };
+ iree_status_t status =
+ loop.ctl(loop.self, IREE_LOOP_COMMAND_CALL, ¶ms, NULL);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_loop_dispatch(
+ iree_loop_t loop, const uint32_t workgroup_count_xyz[3],
+ iree_loop_workgroup_fn_t workgroup_callback,
+ iree_loop_callback_fn_t completion_callback, void* user_data) {
+ if (IREE_UNLIKELY(!loop.ctl)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "null loop");
+ }
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, (uint64_t)workgroup_count_xyz[0]);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, (uint64_t)workgroup_count_xyz[1]);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, (uint64_t)workgroup_count_xyz[2]);
+
+ const iree_loop_dispatch_params_t params = {
+ .callback =
+ {
+ .fn = completion_callback,
+ .user_data = user_data,
+ },
+ .workgroup_fn = workgroup_callback,
+ .workgroup_count_xyz =
+ {
+ workgroup_count_xyz[0],
+ workgroup_count_xyz[1],
+ workgroup_count_xyz[2],
+ },
+ };
+ iree_status_t status =
+ loop.ctl(loop.self, IREE_LOOP_COMMAND_DISPATCH, ¶ms, NULL);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_loop_wait_until(iree_loop_t loop, iree_timeout_t timeout,
+ iree_loop_callback_fn_t callback, void* user_data) {
+ if (IREE_UNLIKELY(!loop.ctl)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "null loop");
+ }
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Capture time as an absolute value as we don't know when it's going to run.
+ iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
+
+ const iree_loop_wait_until_params_t params = {
+ .callback =
+ {
+ .fn = callback,
+ .user_data = user_data,
+ },
+ .deadline_ns = deadline_ns,
+ };
+ iree_status_t status =
+ loop.ctl(loop.self, IREE_LOOP_COMMAND_WAIT_UNTIL, ¶ms, NULL);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_loop_wait_one(
+ iree_loop_t loop, iree_wait_source_t wait_source, iree_timeout_t timeout,
+ iree_loop_callback_fn_t callback, void* user_data) {
+ if (IREE_UNLIKELY(!loop.ctl)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "null loop");
+ }
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Capture time as an absolute value as we don't know when it's going to run.
+ iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
+
+ const iree_loop_wait_one_params_t params = {
+ .callback =
+ {
+ .fn = callback,
+ .user_data = user_data,
+ },
+ .deadline_ns = deadline_ns,
+ .wait_source = wait_source,
+ };
+ iree_status_t status =
+ loop.ctl(loop.self, IREE_LOOP_COMMAND_WAIT_ONE, ¶ms, NULL);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static iree_status_t iree_loop_wait_multi(
+ iree_loop_command_t command, iree_loop_t loop, iree_host_size_t count,
+ iree_wait_source_t* wait_sources, iree_timeout_t timeout,
+ iree_loop_callback_fn_t callback, void* user_data) {
+ if (count == 0) {
+ // No wait handles; issue the callback as if it had completed async.
+ return iree_loop_call(loop, IREE_LOOP_PRIORITY_DEFAULT, callback,
+ user_data);
+ } else if (count == 1) {
+ // One wait handle can go down the fast path.
+ return iree_loop_wait_one(loop, wait_sources[0], timeout, callback,
+ user_data);
+ }
+
+ // Capture time as an absolute value as we don't know when it's going to run.
+ iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
+
+ const iree_loop_wait_multi_params_t params = {
+ .callback =
+ {
+ .fn = callback,
+ .user_data = user_data,
+ },
+ .deadline_ns = deadline_ns,
+ .count = count,
+ .wait_sources = wait_sources,
+ };
+ return loop.ctl(loop.self, command, ¶ms, NULL);
+}
+
+IREE_API_EXPORT iree_status_t iree_loop_wait_any(
+ iree_loop_t loop, iree_host_size_t count, iree_wait_source_t* wait_sources,
+ iree_timeout_t timeout, iree_loop_callback_fn_t callback, void* user_data) {
+ if (IREE_UNLIKELY(!loop.ctl)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "null loop");
+ }
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, (uint64_t)count);
+ iree_status_t status =
+ iree_loop_wait_multi(IREE_LOOP_COMMAND_WAIT_ANY, loop, count,
+ wait_sources, timeout, callback, user_data);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_loop_wait_all(
+ iree_loop_t loop, iree_host_size_t count, iree_wait_source_t* wait_sources,
+ iree_timeout_t timeout, iree_loop_callback_fn_t callback, void* user_data) {
+ if (IREE_UNLIKELY(!loop.ctl)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "null loop");
+ }
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, (uint64_t)count);
+ iree_status_t status =
+ iree_loop_wait_multi(IREE_LOOP_COMMAND_WAIT_ALL, loop, count,
+ wait_sources, timeout, callback, user_data);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_loop_drain(iree_loop_t loop,
+ iree_timeout_t timeout) {
+ if (IREE_UNLIKELY(!loop.ctl)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "null loop");
+ }
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Capture time as an absolute value as we don't know when it's going to run.
+ iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
+
+ const iree_loop_drain_params_t params = {
+ .deadline_ns = deadline_ns,
+ };
+ iree_status_t status =
+ loop.ctl(loop.self, IREE_LOOP_COMMAND_DRAIN, ¶ms, NULL);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/base/loop.h b/runtime/src/iree/base/loop.h
new file mode 100644
index 0000000..da2cbd2
--- /dev/null
+++ b/runtime/src/iree/base/loop.h
@@ -0,0 +1,337 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_LOOP_H_
+#define IREE_BASE_LOOP_H_
+
+#include <inttypes.h>
+
+#include "iree/base/allocator.h"
+#include "iree/base/attributes.h"
+#include "iree/base/status.h"
+#include "iree/base/time.h"
+#include "iree/base/wait_source.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_loop_t public API
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_loop_t iree_loop_t;
+typedef uint32_t iree_loop_command_t;
+
+// TODO(benvanik): define prioritization. This is useful for ensuring fast
+// coroutine switching by avoiding the current coroutine being set to the back
+// of the loop. It's easy to shoot yourself in the foot, though: cooperative
+// scheduling can be tricky.
+typedef enum iree_loop_priority_e {
+ IREE_LOOP_PRIORITY_DEFAULT = 0u,
+} iree_loop_priority_t;
+
+// Callback to execute user code used by the loop.
+// |user_data| contains the value provided to the callback when enqueuing the
+// operation and must remain live until the callback is made.
+//
+// If the callback is to be executed as normal |status| will be OK.
+// A non-fatal error case of IREE_STATUS_DEADLINE_EXCEEDED can occur if the
+// operation had a deadline specified and it elapsed prior to the condition
+// being met.
+//
+// |status| otherwise indicates that the operation failed (such as a failed wait
+// or a failed workgroup callback).
+//
+// Callbacks may reentrantly queue work on the |loop| _unless_ the passed
+// |status| is IREE_STATUS_ABORTED indicating that the loop is shutting down or
+// the operation is being aborted because of a prior failure.
+//
+// Any non-OK result will be routed to a loop-global error handler (depending on
+// implementation) or otherwise ignored; users must set their own exit bits.
+typedef iree_status_t(IREE_API_PTR* iree_loop_callback_fn_t)(
+ void* user_data, iree_loop_t loop, iree_status_t status);
+
+// Callback to execute a single workgroup in a grid dispatch.
+// Each call receives the XYZ location in the grid and may run concurrently with
+// any other workgroup call.
+//
+// Any non-OK result will be routed to the completion callback of the dispatch
+// operation but not otherwise trigger loop failure. Other workgroups may
+// continue to run up until the completion callback is issued.
+typedef iree_status_t(IREE_API_PTR* iree_loop_workgroup_fn_t)(
+ void* user_data, iree_loop_t loop, uint32_t workgroup_x,
+ uint32_t workgroup_y, uint32_t workgroup_z);
+
+// Function pointer for an iree_loop_t control function.
+// |command| provides the operation to perform. Commands may use |params| to
+// pass additional operation-specific parameters. |inout_ptr| usage is defined
+// by each operation.
+typedef iree_status_t(IREE_API_PTR* iree_loop_ctl_fn_t)(
+ void* self, iree_loop_command_t command, const void* params,
+ void** inout_ptr);
+
+// An event system for executing queued asynchronous work.
+// Implementations are allowed to execute operations in any order but generally
+// runs FIFO and will only ever execute one operation at a time. The thread used
+// for execution may change from operation to operation. Usage that has order
+// requirements is required to perform the ordering themselves.
+//
+// This is a form of cooperative scheduling and the loop _may_ not make forward
+// progress if a callback issues a blocking operation. All blocking operations
+// should either be done on user-controlled threads or via the loop primitives
+// such as iree_loop_wait_one. Callbacks may enqueue zero or more operations
+// with 2+ performing a conceptual fork. The iree_loop_dispatch operation allows
+// for a constrained style of concurrency matching a GPU grid dispatch and can
+// be used as a primitive to implement other kinds of parallel loops.
+//
+// User data passed to callbacks is unowned and must be kept live by the
+// requester. All callbacks are guaranteed to be issued even on failure and
+// allocations made when enqueuing operations are safe to free in the callbacks.
+//
+// The rough behavior of the loop matches that of the web event loop
+// dispatching events/promises/timeouts/etc. It's a stackless design where the
+// owner of the primary control loop is hidden from the users of the loop. This
+// allows implementations to integrate into existing scheduling mechanisms
+// (ALooper, libuv, io_uring, the browser main event loop, etc) in a generic
+// way. The design of the API here is meant to make it easy to put the
+// implementation in external code (python/javascript/rust/java/etc) as only a
+// single method with a fixed interface is used to cross the boundaries.
+//
+// Note that by default this implementation is only intended for host-level
+// synchronization and scheduling: fairly coarse events performed fairly
+// infrequently. Optimized multi-threaded workloads are intended to execute on
+// the iree/task/ system via command buffers.
+typedef struct iree_loop_t {
+ // Control function data.
+ void* self;
+ // ioctl-style control function servicing all loop-related commands.
+ // See iree_loop_command_t for more information.
+ iree_loop_ctl_fn_t ctl;
+} iree_loop_t;
+
+// A loop that can do no work. Attempts to enqueue work will fail.
+static inline iree_loop_t iree_loop_null() {
+ iree_loop_t loop = {NULL, NULL};
+ return loop;
+}
+
+// Executes |callback| from the loop at some point in the future.
+//
+// The callback is guaranteed to be issued but in an undefined order.
+// |user_data| is not retained and must be live until the callback is issued.
+IREE_API_EXPORT iree_status_t iree_loop_call(iree_loop_t loop,
+ iree_loop_priority_t priority,
+ iree_loop_callback_fn_t callback,
+ void* user_data);
+
+// Executes |workgroup_callback| from the loop at some point in the future
+// with grid dispatch of |workgroup_count_xyz| workgroups. Each
+// |workgroup_callback| will receive its XYZ location in the grid and
+// |completion_callback| will be issued upon completion (or failure).
+// The dispatched workgroups are not guaranteed to run concurrently and must
+// not perform blocking operations.
+//
+// The completion callback is guaranteed to be issued but in an undefined order.
+// The workgroup callback runs serially or concurrently from multiple threads.
+// |user_data| is not retained and must be live until the callback is issued.
+IREE_API_EXPORT iree_status_t iree_loop_dispatch(
+ iree_loop_t loop, const uint32_t workgroup_count_xyz[3],
+ iree_loop_workgroup_fn_t workgroup_callback,
+ iree_loop_callback_fn_t completion_callback, void* user_data);
+
+// Waits until |timeout| is reached and then issues |callback|.
+// There may be a significant latency between |timeout| and when the |callback|
+// is executed.
+//
+// The callback is guaranteed to be issued.
+// |user_data| is not retained and must be live until the callback is issued.
+IREE_API_EXPORT iree_status_t
+iree_loop_wait_until(iree_loop_t loop, iree_timeout_t timeout,
+ iree_loop_callback_fn_t callback, void* user_data);
+
+// Waits until the |wait_source| is satisfied or |timeout| is reached and then
+// issues |callback|.
+//
+// The callback is guaranteed to be issued.
+// |user_data| is not retained and must be live until the callback is issued.
+IREE_API_EXPORT iree_status_t iree_loop_wait_one(
+ iree_loop_t loop, iree_wait_source_t wait_source, iree_timeout_t timeout,
+ iree_loop_callback_fn_t callback, void* user_data);
+
+// Waits until one or more of the |wait_sources| is satisfied or |timeout| is
+// reached and then issues |callback|.
+//
+// The callback is guaranteed to be issued.
+// |wait_sources| and |user_data| is not retained and must be live until the
+// callback is issued.
+IREE_API_EXPORT iree_status_t iree_loop_wait_any(
+ iree_loop_t loop, iree_host_size_t count, iree_wait_source_t* wait_sources,
+ iree_timeout_t timeout, iree_loop_callback_fn_t callback, void* user_data);
+
+// Waits until all of the |wait_sources| is satisfied or |timeout| is reached
+// and then issues |callback|.
+//
+// The callback is guaranteed to be issued.
+// |wait_sources| and |user_data| is not retained and must be live until the
+// callback is issued.
+IREE_API_EXPORT iree_status_t iree_loop_wait_all(
+ iree_loop_t loop, iree_host_size_t count, iree_wait_source_t* wait_sources,
+ iree_timeout_t timeout, iree_loop_callback_fn_t callback, void* user_data);
+
+// Blocks the caller and waits until the loop is idle or |timeout| is reached.
+//
+// Not all implementations support this and may return
+// IREE_STATUS_DEADLINE_EXCEEDED immediately when work is still pending.
+// |user_data| is not retained and must be live until the callback is issued.
+IREE_API_EXPORT iree_status_t iree_loop_drain(iree_loop_t loop,
+ iree_timeout_t timeout);
+
+//===----------------------------------------------------------------------===//
+// iree_loop_t implementation details
+//===----------------------------------------------------------------------===//
+// These are exposed so that user applications can implement their own loops and
+// are otherwise private to the API.
+
+// Controls the behavior of an iree_loop_ctl_fn_t callback function.
+enum iree_loop_command_e {
+ // Issues the callback from the loop at some point in the future.
+ // The callback will always be called (including when aborted).
+ //
+ // iree_loop_ctl_fn_t:
+ // params: iree_loop_call_params_t
+ // inout_ptr: unused
+ IREE_LOOP_COMMAND_CALL = 0u,
+
+ // Issues a workgroup callback across a grid and then issues the callback.
+ // The completion callback will always be called (including when aborted).
+ //
+ // iree_loop_ctl_fn_t:
+ // params: iree_loop_dispatch_params_t
+ // inout_ptr: unused
+ IREE_LOOP_COMMAND_DISPATCH,
+
+ // TODO(benvanik): open/read/write/close/etc with iovecs.
+ // Our iree_byte_span_t matches with `struct iovec` and if we share that we
+ // can do scatter/gather I/O with io_uring.
+ // Want something with an fd, flags, count, and iree_byte_span_t's.
+
+ // TODO(benvanik): IREE_LOOP_COMMAND_WAIT_IDLE to get idle callbacks.
+
+ // Sleeps until the timeout is reached then issues the callback.
+ // The callback will always be called (including when aborted).
+ //
+ // iree_loop_ctl_fn_t:
+ // params: iree_loop_wait_until_params_t
+ // inout_ptr: unused
+ IREE_LOOP_COMMAND_WAIT_UNTIL,
+
+ // Waits until the wait source has resolved then issues the callback.
+ // The callback will always be called (including when aborted).
+ //
+ // iree_loop_ctl_fn_t:
+ // params: iree_loop_wait_one_params_t
+ // inout_ptr: unused
+ IREE_LOOP_COMMAND_WAIT_ONE,
+
+ // Waits until one or more wait sources have resolved then issues the
+ // callback. The callback will always be called (including when aborted).
+ //
+ // iree_loop_ctl_fn_t:
+ // params: iree_loop_wait_multi_params_t
+ // inout_ptr: unused
+ IREE_LOOP_COMMAND_WAIT_ANY,
+
+ // Waits until all of the wait sources have resolved then issues the
+ // callback. The callback will always be called (including when aborted).
+ //
+ // iree_loop_ctl_fn_t:
+ // params: iree_loop_wait_multi_params_t
+ // inout_ptr: unused
+ IREE_LOOP_COMMAND_WAIT_ALL,
+
+ // Waits until the loop has no more pending work.
+ // Resolves early with IREE_STATUS_DEADLINE_EXCEEDED if the timeout is reached
+ // before the loop is idle or if the platform does not support the operation.
+ //
+ // iree_loop_ctl_fn_t:
+ // params: iree_loop_drain_params_t
+ // inout_ptr: unused
+ IREE_LOOP_COMMAND_DRAIN,
+
+ IREE_LOOP_COMMAND_MAX = IREE_LOOP_COMMAND_DRAIN,
+};
+
+typedef struct iree_loop_callback_t {
+ // Callback function pointer.
+ iree_loop_callback_fn_t fn;
+ // User data passed to the callback function. Unowned.
+ void* user_data;
+} iree_loop_callback_t;
+
+// Parameters for IREE_LOOP_COMMAND_CALL.
+typedef struct iree_loop_call_params_t {
+ // Callback issued to perform the call.
+ iree_loop_callback_t callback;
+ // Controls the scheduling of the call.
+ iree_loop_priority_t priority;
+} iree_loop_call_params_t;
+
+// Parameters for IREE_LOOP_COMMAND_DISPATCH.
+typedef struct iree_loop_dispatch_params_t {
+ // Callback issued when the call completes (successfully or otherwise).
+ iree_loop_callback_t callback;
+ // Callback issued for each workgroup.
+ iree_loop_workgroup_fn_t workgroup_fn;
+ // 3D workgroup count.
+ uint32_t workgroup_count_xyz[3];
+} iree_loop_dispatch_params_t;
+
+// Parameters for IREE_LOOP_COMMAND_WAIT_UTIL.
+typedef struct iree_loop_wait_until_params_t {
+ // Callback issued after the wait condition is satisfied.
+ iree_loop_callback_t callback;
+ // Maximum time to wait before failing the wait with
+ // IREE_STATUS_DEADLINE_EXCEEDED.
+ iree_time_t deadline_ns;
+} iree_loop_wait_until_params_t;
+
+// Parameters for IREE_LOOP_COMMAND_WAIT_ONE.
+typedef struct iree_loop_wait_one_params_t {
+ // Callback issued after the wait condition is satisfied.
+ iree_loop_callback_t callback;
+ // Maximum time to wait before failing the wait with
+ // IREE_STATUS_DEADLINE_EXCEEDED.
+ iree_time_t deadline_ns;
+ // Wait source to wait on.
+ iree_wait_source_t wait_source;
+} iree_loop_wait_one_params_t;
+
+// Parameters for IREE_LOOP_COMMAND_WAIT_ANY / IREE_LOOP_COMMAND_WAIT_ALL.
+typedef struct iree_loop_wait_multi_params_t {
+ // Callback issued after any/all wait conditions are satisfied.
+ iree_loop_callback_t callback;
+ // Maximum time to wait before failing the wait with
+ // IREE_STATUS_DEADLINE_EXCEEDED.
+ iree_time_t deadline_ns;
+ // Total number of wait sources.
+ iree_host_size_t count;
+ // List of wait source to wait on.
+ // Ownership remains with the issuer and must remain live until the callback.
+ iree_wait_source_t* wait_sources;
+} iree_loop_wait_multi_params_t;
+
+// Parameters for IREE_LOOP_COMMAND_DRAIN.
+typedef struct iree_loop_drain_params_t {
+ // Time when the wait will abort.
+ iree_time_t deadline_ns;
+} iree_loop_drain_params_t;
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_LOOP_H_
diff --git a/runtime/src/iree/base/loop_inline.c b/runtime/src/iree/base/loop_inline.c
new file mode 100644
index 0000000..3c19b9e
--- /dev/null
+++ b/runtime/src/iree/base/loop_inline.c
@@ -0,0 +1,514 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/loop_inline.h"
+
+#include "iree/base/assert.h"
+#include "iree/base/tracing.h"
+
+static iree_status_t iree_loop_inline_reentrant_ctl(void* self,
+ iree_loop_command_t command,
+ const void* params,
+ void** inout_ptr);
+
+static void iree_loop_inline_emit_error(iree_loop_t loop, iree_status_t status);
+
+//===----------------------------------------------------------------------===//
+// Inline execution of operations
+//===----------------------------------------------------------------------===//
+
+// IREE_LOOP_COMMAND_CALL
+static void iree_loop_inline_run_call(iree_loop_t loop,
+ iree_loop_call_params_t params) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Ideally a tail call (when not tracing).
+ iree_status_t status =
+ params.callback.fn(params.callback.user_data, loop, iree_ok_status());
+ if (!iree_status_is_ok(status)) {
+ iree_loop_inline_emit_error(loop, status);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// IREE_LOOP_COMMAND_DISPATCH
+static void iree_loop_inline_run_dispatch(iree_loop_t loop,
+ iree_loop_dispatch_params_t params) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status = iree_ok_status();
+
+ // We run all workgroups before issuing the completion callback.
+ // If any workgroup fails we exit early and pass the failing status back to
+ // the completion handler exactly once.
+ uint32_t workgroup_count_x = params.workgroup_count_xyz[0];
+ uint32_t workgroup_count_y = params.workgroup_count_xyz[1];
+ uint32_t workgroup_count_z = params.workgroup_count_xyz[2];
+ iree_status_t workgroup_status = iree_ok_status();
+ for (uint32_t z = 0; z < workgroup_count_z; ++z) {
+ for (uint32_t y = 0; y < workgroup_count_y; ++y) {
+ for (uint32_t x = 0; x < workgroup_count_x; ++x) {
+ workgroup_status =
+ params.workgroup_fn(params.callback.user_data, loop, x, y, z);
+ if (!iree_status_is_ok(workgroup_status)) goto workgroup_failed;
+ }
+ }
+ }
+workgroup_failed:
+
+ // Fire the completion callback with either success or the first error hit by
+ // a workgroup.
+ // Ideally a tail call (when not tracing).
+ status =
+ params.callback.fn(params.callback.user_data, loop, workgroup_status);
+ if (!iree_status_is_ok(status)) {
+ iree_loop_inline_emit_error(loop, status);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// IREE_LOOP_COMMAND_WAIT_UNTIL
+static void iree_loop_inline_run_wait_until(
+ iree_loop_t loop, iree_loop_wait_until_params_t params) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ bool did_wait = iree_wait_until(params.deadline_ns);
+
+ iree_status_t status = params.callback.fn(
+ params.callback.user_data, loop,
+ did_wait ? iree_ok_status()
+ : iree_make_status(IREE_STATUS_ABORTED,
+ "sleep was aborted by a signal/alert"));
+ if (!iree_status_is_ok(status)) {
+ iree_loop_inline_emit_error(loop, status);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// IREE_LOOP_COMMAND_WAIT_ONE
+static void iree_loop_inline_run_wait_one(iree_loop_t loop,
+ iree_loop_wait_one_params_t params) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_timeout_t timeout = iree_make_deadline(params.deadline_ns);
+
+ // Try waiting on the wait source directly; this is usually the most optimal
+ // implementation when available and for others may drop down to a system
+ // wait primitive.
+ iree_status_t wait_status =
+ iree_wait_source_wait_one(params.wait_source, timeout);
+
+ // Callback after wait, whether it succeeded or failed.
+ iree_status_t status =
+ params.callback.fn(params.callback.user_data, loop, wait_status);
+ if (!iree_status_is_ok(status)) {
+ iree_loop_inline_emit_error(loop, status);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// IREE_LOOP_COMMAND_WAIT_ANY
+static void iree_loop_inline_run_wait_any(
+ iree_loop_t loop, iree_loop_wait_multi_params_t params) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_timeout_t timeout = iree_make_deadline(params.deadline_ns);
+
+ // Do a scan down the wait sources to see if any are already set - if so we
+ // can bail early. Otherwise we need to wait on any one.
+ // iree_wait_any is a much more efficient (and fair) way but this keeps the
+ // code working on bare-metal.
+ iree_status_t wait_status = iree_status_from_code(IREE_STATUS_DEFERRED);
+ for (iree_host_size_t i = 0; i < params.count; ++i) {
+ iree_status_code_t wait_status_code = IREE_STATUS_OK;
+ iree_status_t query_status =
+ iree_wait_source_query(params.wait_sources[i], &wait_status_code);
+ if (iree_status_is_ok(query_status)) {
+ if (wait_status_code == IREE_STATUS_OK) {
+ // Signaled - can bail early.
+ break;
+ } else if (wait_status_code == IREE_STATUS_DEFERRED) {
+ // Not signaled yet - keep scanning.
+ continue;
+ } else {
+ // Wait failed - can bail early.
+ wait_status = iree_status_from_code(wait_status_code);
+ break;
+ }
+ } else {
+ // Failed to perform the query, which we treat the same as a wait error.
+ wait_status = query_status;
+ break;
+ }
+ }
+ if (iree_status_is_deferred(wait_status)) {
+ // No queries resolved/failed - commit any real wait.
+ // We choose the first one to be (somewhat) deterministic but really it
+ // should be randomized... or if the user cares they should use a real loop.
+ wait_status = iree_wait_source_wait_one(params.wait_sources[0], timeout);
+ }
+
+ // Callback after wait, whether it succeeded or failed.
+ iree_status_t status =
+ params.callback.fn(params.callback.user_data, loop, wait_status);
+ if (!iree_status_is_ok(status)) {
+ iree_loop_inline_emit_error(loop, status);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// IREE_LOOP_COMMAND_WAIT_ALL
+static void iree_loop_inline_run_wait_all(
+ iree_loop_t loop, iree_loop_wait_multi_params_t params) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_timeout_t timeout = iree_make_deadline(params.deadline_ns);
+
+ // Run down the list waiting on each source.
+ // iree_wait_all is a much more efficient way but this keeps the code working
+ // on bare-metal.
+ iree_status_t wait_status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < params.count; ++i) {
+ wait_status = iree_wait_source_wait_one(params.wait_sources[i], timeout);
+ if (!iree_status_is_ok(wait_status)) break;
+ }
+
+ // Callback after wait, whether it succeeded or failed.
+ iree_status_t status =
+ params.callback.fn(params.callback.user_data, loop, wait_status);
+ if (!iree_status_is_ok(status)) {
+ iree_loop_inline_emit_error(loop, status);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_loop_inline_ring_t
+//===----------------------------------------------------------------------===//
+
+// Total capacity of the ringbuffer in operations pending.
+// The usable capacity is always 1 less than this as we mask it off,
+// unfortunately wasting a slot but keeping this all stupid simple. If we wanted
+// to drop another ~32B of stack space we could make this do the right thing.
+#define IREE_LOOP_INLINE_RING_CAPACITY ((uint8_t)8)
+static_assert((IREE_LOOP_INLINE_RING_CAPACITY &
+ (IREE_LOOP_INLINE_RING_CAPACITY - 1)) == 0,
+ "ringbuffer capacity must be a power of two");
+
+// Bitmask used to perform a quick mod of the ringbuffer indices.
+// This must always be ANDed with the indices before use:
+// uint8_t physical_idx = logical_idx % IREE_LOOP_INLINE_RING_CAPACITY;
+// or this, way better (though the compiler can usually figure it out):
+// uint8_t physical_idx = logical_idx & IREE_LOOP_INLINE_RING_CAPACITY;
+#define IREE_LOOP_INLINE_RING_MASK (IREE_LOOP_INLINE_RING_CAPACITY - 1)
+
+// An operation in the inline loop ringbuffer containing all the information
+// required to replay it at a future time. All pointers are unowned.
+typedef struct iree_loop_inline_op_t {
+ iree_loop_command_t command;
+ union {
+ iree_loop_callback_t callback;
+ union {
+ iree_loop_call_params_t call;
+ iree_loop_dispatch_params_t dispatch;
+ iree_loop_wait_until_params_t wait_until;
+ iree_loop_wait_one_params_t wait_one;
+ iree_loop_wait_multi_params_t wait_multi;
+ } params;
+ };
+} iree_loop_inline_op_t;
+
+// Returns the size of the parameters required by |command|.
+static inline uint8_t iree_loop_params_size(iree_loop_command_t command) {
+ // Keep this a tail call switch; compilers can work magic here.
+ switch (command) {
+ case IREE_LOOP_COMMAND_CALL:
+ return sizeof(iree_loop_call_params_t);
+ case IREE_LOOP_COMMAND_DISPATCH:
+ return sizeof(iree_loop_dispatch_params_t);
+ case IREE_LOOP_COMMAND_WAIT_UNTIL:
+ return sizeof(iree_loop_wait_until_params_t);
+ case IREE_LOOP_COMMAND_WAIT_ONE:
+ return sizeof(iree_loop_wait_one_params_t);
+ case IREE_LOOP_COMMAND_WAIT_ANY:
+ case IREE_LOOP_COMMAND_WAIT_ALL:
+ return sizeof(iree_loop_wait_multi_params_t);
+ default:
+ return 0;
+ }
+}
+
+// Fixed-size ringbuffer of commands enqueued reentrantly.
+// We ensure the size stays small so we don't blow the stack of tiny systems.
+// The inline loop is explicitly not designed for multi-program cooperative
+// scheduling and well-formed programs shouldn't hit the limit.
+//
+// NOTE: this structure must be in an initialized state if zeroed.
+typedef struct iree_loop_inline_ring_t {
+ iree_loop_inline_op_t ops[IREE_LOOP_INLINE_RING_CAPACITY];
+ uint8_t read_head;
+ uint8_t write_head;
+ iree_status_t* status_ptr;
+} iree_loop_inline_ring_t;
+static_assert(
+ sizeof(iree_loop_inline_ring_t) <= IREE_LOOP_INLINE_STORAGE_SIZE,
+ "iree_loop_inline_ring_t needs to be tiny as it's allocated on the stack");
+
+// Returns a loop that references the current ringbuffer for reentrant usage.
+static inline iree_loop_t iree_loop_inline_reentrant(
+ iree_loop_inline_ring_t* ring) {
+ iree_loop_t loop = {
+ .self = ring,
+ .ctl = iree_loop_inline_reentrant_ctl,
+ };
+ return loop;
+}
+
+// Initializes |out_ring| for use.
+// We don't clear the ops as we (hopefully) don't use them unless they are valid
+// as defined by the ringbuffer parameters.
+static inline void iree_loop_inline_ring_initialize(
+ iree_status_t* status_ptr, iree_loop_inline_ring_t* out_ring) {
+ out_ring->read_head = 0;
+ out_ring->write_head = 0;
+ out_ring->status_ptr = status_ptr;
+}
+
+// Returns true if the ringbuffer is empty (read has caught up to write).
+static inline bool iree_loop_inline_ring_is_empty(
+ const iree_loop_inline_ring_t* ring) {
+ return ring->read_head == ring->write_head;
+}
+
+// Returns true if the ringbuffer is full (write has caught up to read).
+static inline bool iree_loop_inline_ring_is_full(
+ const iree_loop_inline_ring_t* ring) {
+ return ((ring->write_head - ring->read_head) & IREE_LOOP_INLINE_RING_MASK) ==
+ IREE_LOOP_INLINE_RING_MASK;
+}
+
+// Enqueues an operation into |ring|, capacity-permitting.
+// |params| is copied into the ringbuffer and need not remain live upon return.
+static iree_status_t iree_loop_inline_enqueue(iree_loop_inline_ring_t* ring,
+ iree_loop_command_t command,
+ const void* params) {
+ // The only thing we need to do here is memcpy the params into our ring.
+ // Since all the params differ in size we just effectively perform a lookup
+ // and do the copy.
+ uint8_t params_size = iree_loop_params_size(command);
+ if (IREE_UNLIKELY(params_size) == 0) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unimplemented loop command");
+ }
+
+ // Ensure there's space for the new operation.
+ if (iree_loop_inline_ring_is_full(ring)) {
+ return iree_make_status(
+ IREE_STATUS_RESOURCE_EXHAUSTED,
+ "inline ringbuffer capacity exceeded; reduce the amount of concurrent "
+ "work or use a real loop implementation");
+ }
+
+ // Reserve a slot for the new operation.
+ uint8_t slot = ring->write_head;
+ ring->write_head = (ring->write_head + 1) & IREE_LOOP_INLINE_RING_MASK;
+
+ // Copy the operation in; the params are on the stack and won't be valid after
+ // the caller returns.
+ ring->ops[slot].command = command;
+ memcpy(&ring->ops[slot].params, params, params_size);
+ return iree_ok_status();
+}
+
+// Dequeues the next operation in |ring| and executes it.
+// The operation may reentrantly enqueue more operations.
+static void iree_loop_inline_dequeue_and_run_next(
+ iree_loop_inline_ring_t* ring) {
+ IREE_ASSERT(!iree_loop_inline_ring_is_empty(ring));
+
+ // Acquire the next operation.
+ uint8_t slot = ring->read_head;
+ ring->read_head = (ring->read_head + 1) & IREE_LOOP_INLINE_RING_MASK;
+
+ // Copy out the parameters; the operation we execute may overwrite them by
+ // enqueuing more work.
+ iree_loop_inline_op_t op = ring->ops[slot];
+
+ // We pass the callbacks a loop that has the reentrancy bit set.
+ // This allows iree_loop_inline_ctl to determine whether it needs to alloc
+ // more stack space.
+ iree_loop_t loop = iree_loop_inline_reentrant(ring);
+
+ // Tail call into the execution routine so we can hopefully tail call all the
+ // way up the stack.
+ // Ideally these are all tail calls.
+ switch (op.command) {
+ case IREE_LOOP_COMMAND_CALL:
+ iree_loop_inline_run_call(loop, op.params.call);
+ break;
+ case IREE_LOOP_COMMAND_DISPATCH:
+ iree_loop_inline_run_dispatch(loop, op.params.dispatch);
+ break;
+ case IREE_LOOP_COMMAND_WAIT_UNTIL:
+ iree_loop_inline_run_wait_until(loop, op.params.wait_until);
+ break;
+ case IREE_LOOP_COMMAND_WAIT_ONE:
+ iree_loop_inline_run_wait_one(loop, op.params.wait_one);
+ break;
+ case IREE_LOOP_COMMAND_WAIT_ANY:
+ iree_loop_inline_run_wait_any(loop, op.params.wait_multi);
+ break;
+ case IREE_LOOP_COMMAND_WAIT_ALL:
+ iree_loop_inline_run_wait_all(loop, op.params.wait_multi);
+ break;
+ default:
+ break;
+ }
+}
+
+// Aborts all operations in the ring and resets it to its initial state.
+static void iree_loop_inline_abort_all(iree_loop_inline_ring_t* ring) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Issue the completion callback of each op to notify it of the abort.
+ // To prevent enqueuing more work while aborting we pass in a NULL loop.
+ // We can't do anything with the errors so we ignore them.
+ while (!iree_loop_inline_ring_is_empty(ring)) {
+ uint8_t slot = ring->read_head;
+ ring->read_head = (ring->read_head + 1) & IREE_LOOP_INLINE_RING_MASK;
+ iree_loop_callback_t callback = ring->ops[slot].callback;
+ iree_status_ignore(callback.fn(callback.user_data, iree_loop_null(),
+ iree_make_status(IREE_STATUS_ABORTED)));
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static void iree_loop_inline_emit_error(iree_loop_t loop,
+ iree_status_t status) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_TEXT(
+ z0, iree_status_code_string(iree_status_code(status)));
+
+ iree_loop_inline_ring_t* ring = (iree_loop_inline_ring_t*)loop.self;
+ if (ring->status_ptr && iree_status_is_ok(*ring->status_ptr)) {
+ *ring->status_ptr = status;
+ } else {
+ iree_status_ignore(status);
+ }
+
+ iree_loop_inline_abort_all(ring);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Runs the |ring| until it is empty or an operation fails.
+static iree_status_t iree_loop_inline_run_all(iree_loop_inline_ring_t* ring) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ do {
+ // Dequeue the next op and run it inline.
+ iree_loop_inline_dequeue_and_run_next(ring);
+ } while (!iree_loop_inline_ring_is_empty(ring));
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_loop_inline_ctl functions
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_loop_inline_ctl(void* self,
+ iree_loop_command_t command,
+ const void* params,
+ void** inout_ptr) {
+ IREE_ASSERT_ARGUMENT(self);
+
+ if (command == IREE_LOOP_COMMAND_DRAIN) {
+ // We don't really do anything with this; if called non-reentrantly then
+ // there is no work to drain.
+ return iree_ok_status();
+ }
+
+ iree_status_t* status_ptr = (iree_status_t*)self;
+
+ // Initialize a new execution context on the stack.
+ iree_loop_inline_ring_t stack_ring;
+ iree_loop_inline_ring_initialize(status_ptr, &stack_ring);
+
+ // Enqueue the initial command; we'll dequeue it right away but this keeps
+ // the code size smaller.
+ IREE_RETURN_IF_ERROR(iree_loop_inline_enqueue(&stack_ring, command, params));
+
+ // If the status is not OK then we bail immediately; this allows for sticky
+ // errors that mimic the abort behavior of an actual loop. Inline loops never
+ // run work from multiple scopes as they don't persist beyond the loop
+ // operation.
+ if (iree_status_is_ok(*status_ptr)) {
+ // Run until the ring is empty or we fail.
+ return iree_loop_inline_run_all(&stack_ring); // tail
+ } else {
+ // Abort all ops.
+ iree_loop_inline_abort_all(&stack_ring);
+ return iree_ok_status();
+ }
+}
+
+IREE_API_EXPORT iree_status_t
+iree_loop_inline_using_storage_ctl(void* self, iree_loop_command_t command,
+ const void* params, void** inout_ptr) {
+ if (command == IREE_LOOP_COMMAND_DRAIN) {
+ // We don't really do anything with this; if called non-reentrantly then
+ // there is no work to drain.
+ return iree_ok_status();
+ }
+
+ iree_loop_inline_storage_t* storage = (iree_loop_inline_storage_t*)self;
+ iree_loop_inline_ring_t* ring = (iree_loop_inline_ring_t*)storage->opaque;
+
+ // Top-level call using external storage; run until the ring is empty or
+ // we fail. Note that the storage contents are undefined and we have to
+ // ensure the list is ready for use.
+ iree_loop_inline_ring_initialize(&storage->status, ring);
+
+ IREE_RETURN_IF_ERROR(iree_loop_inline_enqueue(ring, command, params));
+
+ // If the status is not OK then we bail immediately; this allows for sticky
+ // errors that mimic the abort behavior of an actual loop. Inline loops never
+ // run work from multiple scopes as they don't persist beyond the loop
+ // operation.
+ if (iree_status_is_ok(storage->status)) {
+ // Run until the ring is empty or we fail.
+ return iree_loop_inline_run_all(ring); // tail
+ } else {
+ // Abort all ops.
+ iree_loop_inline_abort_all(ring);
+ return iree_ok_status();
+ }
+}
+
+static iree_status_t iree_loop_inline_reentrant_ctl(void* self,
+ iree_loop_command_t command,
+ const void* params,
+ void** inout_ptr) {
+ if (command == IREE_LOOP_COMMAND_DRAIN) {
+ // We don't really do anything with this; when called reentrantly we are
+ // already draining as we drain on each top-level op.
+ return iree_ok_status();
+ }
+
+ // Enqueue the new command and return to the caller - it'll be run by
+ // the top-level control call.
+ iree_loop_inline_ring_t* ring = (iree_loop_inline_ring_t*)self;
+ return iree_loop_inline_enqueue(ring, command, params); // tail
+}
diff --git a/runtime/src/iree/base/loop_inline.h b/runtime/src/iree/base/loop_inline.h
new file mode 100644
index 0000000..79a1dc1
--- /dev/null
+++ b/runtime/src/iree/base/loop_inline.h
@@ -0,0 +1,95 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_LOOP_INLINE_H_
+#define IREE_BASE_LOOP_INLINE_H_
+
+#include <inttypes.h>
+
+#include "iree/base/loop.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_loop_inline
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_loop_inline_ctl(void* self,
+ iree_loop_command_t command,
+ const void* params,
+ void** inout_ptr);
+IREE_API_EXPORT iree_status_t
+iree_loop_inline_using_storage_ctl(void* self, iree_loop_command_t command,
+ const void* params, void** inout_ptr);
+
+// Returns a loop that doesn't really loop.
+// All operations are run as they are enqueued on the stack. This uses no
+// additional memory and ensures that everything completes upon return to the
+// user but does eliminate the ability for pipelining and overlapping work from
+// multiple subprograms. This approach limits the amount of work that can be
+// reentrantly scheduled and should only be used when in the tiniest of
+// environments with programs tested to be compatible with it.
+//
+// Reentrant enqueuing is possible and can be used to create tail call chains
+// (or recursion) that executes roughly in order.
+//
+// Caveats:
+// - Reentrant enqueuing of operations is limited to some small number (~4).
+// - Waits are performed as they are enqueued and the loop must be able to
+// make forward progress on each.
+// - Execution deadlines are ignored in order to fully drain on each operation.
+// - Errors propagate immediately to the top-level caller and abort all pending
+// operations.
+//
+// Thread-compatible: stateless and executes all work on the calling thread.
+static inline iree_loop_t iree_loop_inline(iree_status_t* out_status) {
+ iree_loop_t loop = {out_status, iree_loop_inline_ctl};
+ return loop;
+}
+
+// Minimum size in bytes required for iree_loop_inline_storage_t.
+// If we wanted to shrink this size to the absolute minimum we'd just expose the
+// structures here; not the worst thing but messy (as this is a public API).
+#define IREE_LOOP_INLINE_STORAGE_SIZE 512
+
+// Storage for an inline loop.
+// May be either allocated on the stack or on the heap and only needs to remain
+// valid for the lifetime of the iree_loop_t referencing it.
+typedef iree_alignas(iree_max_align_t) struct iree_loop_inline_storage_t {
+ uint8_t opaque[IREE_LOOP_INLINE_STORAGE_SIZE];
+ iree_status_t status;
+} iree_loop_inline_storage_t;
+
+// Returns an inline loop that uses an external |storage| instead of the stack.
+// The storage will only be used while executing and can be reused if the caller
+// knows it is safe (not reentrantly inside of a loop execution). Errors that
+// arise will be set in the storage status field and must be checked (or
+// ignored) by the caller to avoid leaks.
+//
+// See iree_loop_inline for details on the execution behavior.
+static inline iree_loop_t iree_loop_inline_initialize(
+ iree_loop_inline_storage_t* storage) {
+ storage->status = iree_ok_status();
+ iree_loop_t loop = {
+ storage,
+ iree_loop_inline_using_storage_ctl,
+ };
+ return loop;
+}
+
+static void iree_loop_inline_deinitialize(iree_loop_inline_storage_t* storage) {
+ if (!storage) return;
+ iree_status_ignore(storage->status);
+ storage->status = iree_ok_status();
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_LOOP_INLINE_H_
diff --git a/runtime/src/iree/base/loop_inline_test.cc b/runtime/src/iree/base/loop_inline_test.cc
new file mode 100644
index 0000000..0df5c83
--- /dev/null
+++ b/runtime/src/iree/base/loop_inline_test.cc
@@ -0,0 +1,51 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/api.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+// Contains the test definitions applied to all loop implementations:
+#include "iree/base/loop_test.h"
+
+void AllocateLoop(iree_status_t* out_status, iree_allocator_t allocator,
+ iree_loop_t* out_loop) {
+ *out_loop = iree_loop_inline(out_status);
+}
+
+void FreeLoop(iree_allocator_t allocator, iree_loop_t loop) {}
+
+// Tests usage of external storage for the inline ringbuffer.
+// The standard tests all use loop allocated stack storage while this one uses
+// the storage we control. Real applications could put that storage in .rwdata
+// somewhere or alias it with other storage (arenas/etc).
+TEST(LoopInlineTest, ExternalStorage) {
+ IREE_TRACE_SCOPE();
+
+ iree_loop_inline_storage_t storage = {{0xCD}, iree_ok_status()};
+ auto loop = iree_loop_inline_initialize(&storage);
+
+ // Issue a call that adds 1 to a counter until it reaches kCountUpTo.
+ static const int kCountUpTo = 128;
+ struct user_data_t {
+ int counter = 0;
+ } user_data;
+ static const iree_loop_callback_fn_t callback_fn =
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ auto* user_data = reinterpret_cast<user_data_t*>(user_data_ptr);
+ if (++user_data->counter < kCountUpTo) {
+ return iree_loop_call(loop, IREE_LOOP_PRIORITY_DEFAULT, callback_fn,
+ user_data);
+ }
+ return iree_ok_status();
+ };
+ IREE_ASSERT_OK(iree_loop_call(loop, IREE_LOOP_PRIORITY_DEFAULT, callback_fn,
+ &user_data));
+ EXPECT_EQ(user_data.counter, kCountUpTo);
+ IREE_ASSERT_OK(storage.status);
+
+ iree_loop_inline_deinitialize(&storage);
+}
diff --git a/runtime/src/iree/base/loop_sync.c b/runtime/src/iree/base/loop_sync.c
new file mode 100644
index 0000000..8de715d
--- /dev/null
+++ b/runtime/src/iree/base/loop_sync.c
@@ -0,0 +1,1101 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/loop_sync.h"
+
+#include "iree/base/internal/math.h"
+#include "iree/base/internal/wait_handle.h"
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// iree_loop_sync_t utilities
+//===----------------------------------------------------------------------===//
+
+// Amount of time that can remain in a wait-until while still retiring.
+// This prevents additional system sleeps when the remaining time before the
+// deadline is less than the granularity the system is likely able to sleep for.
+// Some platforms may have as much as 10-15ms of potential slop and sleeping for
+// 1ms may result in 10-15ms.
+#define IREE_LOOP_SYNC_DELAY_SLOP_NS (2 /*ms*/ * 1000000)
+
+// NOTE: all callbacks should be at offset 0. This allows for easily zipping
+// through the params lists and issuing callbacks.
+static_assert(offsetof(iree_loop_call_params_t, callback) == 0,
+ "callback must be at offset 0");
+static_assert(offsetof(iree_loop_dispatch_params_t, callback) == 0,
+ "callback must be at offset 0");
+static_assert(offsetof(iree_loop_wait_until_params_t, callback) == 0,
+ "callback must be at offset 0");
+static_assert(offsetof(iree_loop_wait_one_params_t, callback) == 0,
+ "callback must be at offset 0");
+static_assert(offsetof(iree_loop_wait_multi_params_t, callback) == 0,
+ "callback must be at offset 0");
+
+static void iree_loop_sync_abort_scope(iree_loop_sync_t* loop_sync,
+ iree_loop_sync_scope_t* scope);
+
+//===----------------------------------------------------------------------===//
+// iree_loop_run_ring_t
+//===----------------------------------------------------------------------===//
+
+// Represents an operation in the loop run ringbuffer.
+// Note that the storage may be reallocated at any time and all pointers must be
+// external to the storage in order to remain valid.
+typedef struct iree_loop_run_op_t {
+ union {
+ iree_loop_callback_t callback; // asserted at offset 0 above
+ union {
+ iree_loop_call_params_t call;
+ iree_loop_dispatch_params_t dispatch;
+ } params;
+ };
+ iree_loop_command_t command;
+ iree_loop_sync_scope_t* scope;
+
+ // Set on calls when we are issuing a callback for an operation.
+ // Unlike other pointers in the params this is owned by the ring.
+ iree_status_t status;
+} iree_loop_run_op_t;
+
+// Ringbuffer containing pending ready to run callback operations.
+//
+// Generally this works as a FIFO but we allow for head-of-ring replacement
+// for high priority tail calls. New operations are appended to the ring and
+// removed as drained; if the ringbuffer capacity is exceeded then the storage
+// will be reallocated up to the maximum capacity specified at creation time.
+typedef iree_alignas(iree_max_align_t) struct iree_loop_run_ring_t {
+ // Current storage capacity of |ops|.
+ uint32_t capacity;
+ // Index into |ops| where the next operation to be dequeued is located.
+ uint32_t read_head;
+ // Index into |ops| where the last operation to be enqueued is located.
+ uint32_t write_head;
+ // Ringbuffer storage.
+ iree_loop_run_op_t ops[0];
+} iree_loop_run_ring_t;
+
+static iree_host_size_t iree_loop_run_ring_storage_size(
+ iree_loop_sync_options_t options) {
+ return sizeof(iree_loop_run_ring_t) +
+ options.max_queue_depth * sizeof(iree_loop_run_op_t);
+}
+
+static inline uint32_t iree_loop_run_ring_mask(
+ const iree_loop_run_ring_t* run_ring) {
+ return run_ring->capacity - 1;
+}
+
+static iree_host_size_t iree_loop_run_ring_size(
+ const iree_loop_run_ring_t* run_ring) {
+ return run_ring->write_head >= run_ring->read_head
+ ? (run_ring->write_head - run_ring->read_head)
+ : (run_ring->write_head + run_ring->capacity -
+ run_ring->read_head);
+}
+
+static bool iree_loop_run_ring_is_empty(const iree_loop_run_ring_t* run_ring) {
+ return run_ring->read_head == run_ring->write_head;
+}
+
+static bool iree_loop_run_ring_is_full(const iree_loop_run_ring_t* run_ring) {
+ const uint32_t mask = iree_loop_run_ring_mask(run_ring);
+ return ((run_ring->write_head - run_ring->read_head) & mask) == mask;
+}
+
+static void iree_loop_run_ring_initialize(iree_loop_sync_options_t options,
+ iree_loop_run_ring_t* out_run_ring) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ out_run_ring->capacity = (uint32_t)options.max_queue_depth;
+ out_run_ring->read_head = 0;
+ out_run_ring->write_head = 0;
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static void iree_loop_run_ring_deinitialize(iree_loop_run_ring_t* run_ring) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Expected abort to be called.
+ IREE_ASSERT(iree_loop_run_ring_is_empty(run_ring));
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_loop_run_ring_enqueue(iree_loop_run_ring_t* run_ring,
+ iree_loop_run_op_t op) {
+ if (iree_loop_run_ring_is_full(run_ring)) {
+ return iree_make_status(
+ IREE_STATUS_RESOURCE_EXHAUSTED,
+ "run ringbuffer capacity %u exceeded; reduce the amount of concurrent "
+ "work or use a full loop implementation",
+ run_ring->capacity);
+ }
+
+ IREE_TRACE_PLOT_VALUE_I64("iree_loop_queue_depth",
+ iree_loop_run_ring_size(run_ring));
+
+ // Reserve a slot for the new operation.
+ uint32_t slot = run_ring->write_head;
+ run_ring->write_head =
+ (run_ring->write_head + 1) & iree_loop_run_ring_mask(run_ring);
+
+ // Copy the operation in; the params are on the stack and won't be valid after
+ // the caller returns.
+ run_ring->ops[slot] = op;
+
+ ++op.scope->pending_count;
+
+ IREE_TRACE_PLOT_VALUE_I64("iree_loop_queue_depth",
+ iree_loop_run_ring_size(run_ring));
+ return iree_ok_status();
+}
+
+static bool iree_loop_run_ring_dequeue(iree_loop_run_ring_t* run_ring,
+ iree_loop_run_op_t* out_op) {
+ if (iree_loop_run_ring_is_empty(run_ring)) return false;
+
+ IREE_TRACE_PLOT_VALUE_I64("iree_loop_queue_depth",
+ iree_loop_run_ring_size(run_ring));
+
+ // Acquire the next operation.
+ uint32_t slot = run_ring->read_head;
+ run_ring->read_head =
+ (run_ring->read_head + 1) & iree_loop_run_ring_mask(run_ring);
+
+ // Copy out the parameters; the operation we execute may overwrite them by
+ // enqueuing more work.
+ *out_op = run_ring->ops[slot];
+
+ --out_op->scope->pending_count;
+
+ IREE_TRACE_PLOT_VALUE_I64("iree_loop_queue_depth",
+ iree_loop_run_ring_size(run_ring));
+ return true;
+}
+
+// Aborts all ops that are part of |scope|.
+// A NULL |scope| indicates all work from all scopes should be aborted.
+static void iree_loop_run_ring_abort_scope(iree_loop_run_ring_t* run_ring,
+ iree_loop_sync_scope_t* scope) {
+ if (iree_loop_run_ring_is_empty(run_ring)) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Do a single pass over the ring and abort all ops matching the scope.
+ // To keep things simple and preserve dense ordered ops in the ringbuffer we
+ // dequeue all ops and re-enqueue any that don't match. When complete the ring
+ // may be at a different offset but will contain only those ops we didn't
+ // abort in their original order.
+ iree_host_size_t count = iree_loop_run_ring_size(run_ring);
+ for (iree_host_size_t i = 0; i < count; ++i) {
+ iree_loop_run_op_t op;
+ if (!iree_loop_run_ring_dequeue(run_ring, &op)) break;
+ if (scope && op.scope != scope) {
+ // Not part of the scope we are aborting; re-enqueue to the ring.
+ iree_status_ignore(iree_loop_run_ring_enqueue(run_ring, op));
+ } else {
+ // Part of the scope to abort.
+ --op.scope->pending_count;
+ iree_status_ignore(op.status);
+ iree_status_ignore(op.callback.fn(op.callback.user_data, iree_loop_null(),
+ iree_make_status(IREE_STATUS_ABORTED)));
+ }
+ }
+
+ IREE_TRACE_PLOT_VALUE_I64("iree_loop_queue_depth",
+ iree_loop_run_ring_size(run_ring));
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Aborts all ops from all scopes.
+static void iree_loop_run_ring_abort_all(iree_loop_run_ring_t* run_ring) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_loop_run_ring_abort_scope(run_ring, /*scope=*/NULL);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_loop_wait_list_t
+//===----------------------------------------------------------------------===//
+
+// Represents an operation in the loop wait list.
+// Note that the storage may be reallocated at any time and all pointers must be
+// external to the storage in order to remain valid.
+typedef struct iree_loop_wait_op_t {
+ union {
+ iree_loop_callback_t callback; // asserted at offset 0 above
+ union {
+ iree_loop_wait_until_params_t wait_until;
+ iree_loop_wait_one_params_t wait_one;
+ iree_loop_wait_multi_params_t wait_multi;
+ } params;
+ };
+ iree_loop_command_t command;
+ iree_loop_sync_scope_t* scope;
+} iree_loop_wait_op_t;
+
+// Dense list of pending wait operations.
+// We don't care about the order here as we put them all into a wait set for
+// multi-wait anyway. iree_wait_set_t should really be rewritten such that this
+// is not required (custom data on registered handles, etc).
+typedef iree_alignas(iree_max_align_t) struct iree_loop_wait_list_t {
+ // System wait set used to perform multi-waits.
+ iree_wait_set_t* wait_set;
+ // Current storage capacity of |ops|.
+ uint32_t capacity;
+ // Current count of valid |ops|.
+ uint32_t count;
+ // Pending wait operations.
+ iree_loop_wait_op_t ops[0];
+} iree_loop_wait_list_t;
+
+static iree_host_size_t iree_loop_wait_list_storage_size(
+ iree_loop_sync_options_t options) {
+ return sizeof(iree_loop_wait_list_t) +
+ options.max_wait_count * sizeof(iree_loop_wait_op_t);
+}
+
+static bool iree_loop_wait_list_is_empty(iree_loop_wait_list_t* wait_list) {
+ return wait_list->count == 0;
+}
+
+static iree_status_t iree_loop_wait_list_initialize(
+ iree_loop_sync_options_t options, iree_allocator_t allocator,
+ iree_loop_wait_list_t* out_wait_list) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ out_wait_list->capacity = (uint32_t)options.max_wait_count;
+ out_wait_list->count = 0;
+
+ iree_status_t status = iree_wait_set_allocate(
+ options.max_wait_count, allocator, &out_wait_list->wait_set);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_loop_wait_list_deinitialize(iree_loop_wait_list_t* wait_list) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Expected abort to be called.
+ IREE_ASSERT(iree_loop_wait_list_is_empty(wait_list));
+
+ iree_wait_set_free(wait_list->wait_set);
+ wait_list->wait_set = NULL;
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_loop_wait_list_register_wait_source(
+ iree_loop_wait_list_t* wait_list, iree_wait_source_t* wait_source) {
+ if (iree_wait_source_is_immediate(*wait_source)) {
+ // Task has been neutered and is treated as an immediately resolved wait.
+ return iree_ok_status();
+ } else if (iree_wait_source_is_delay(*wait_source)) {
+ // We can't easily support delays as registered wait sources; we need to be
+ // able to snoop the tasks to find the earliest sleep time and can't easily
+ // do that if we tried to put them in the wait set.
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "delays must come from wait-until ops");
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = iree_ok_status();
+
+ // Acquire a wait handle and insert it into the wait set.
+ // We swap out the wait source with the handle so that we don't export it
+ // again and can find it on wake.
+ iree_wait_handle_t wait_handle = iree_wait_handle_immediate();
+ iree_wait_handle_t* wait_handle_ptr =
+ iree_wait_handle_from_source(wait_source);
+ if (wait_handle_ptr) {
+ // Already a wait handle - can directly insert it.
+ wait_handle = *wait_handle_ptr;
+ } else {
+ iree_wait_primitive_t wait_primitive = iree_wait_primitive_immediate();
+ status = iree_wait_source_export(*wait_source, IREE_WAIT_PRIMITIVE_TYPE_ANY,
+ iree_immediate_timeout(), &wait_primitive);
+ if (iree_status_is_ok(status)) {
+ // Swap the wait handle with the exported handle so we can wake it later.
+ // It'd be ideal if we retained the wait handle separate so that we could
+ // still do fast queries for local wait sources.
+ iree_wait_handle_wrap_primitive(wait_primitive.type, wait_primitive.value,
+ &wait_handle);
+ status = iree_wait_source_import(wait_primitive, wait_source);
+ }
+ }
+
+ if (iree_status_is_ok(status)) {
+ status = iree_wait_set_insert(wait_list->wait_set, wait_handle);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_loop_wait_list_unregister_wait_source(
+ iree_loop_wait_list_t* wait_list, iree_wait_source_t* wait_source) {
+ if (iree_wait_source_is_immediate(*wait_source) ||
+ iree_wait_source_is_delay(*wait_source)) {
+ // Not registered or it's already been unregistered.
+ return;
+ }
+ iree_wait_handle_t* wait_handle = iree_wait_handle_from_source(wait_source);
+ if (wait_handle) {
+ iree_wait_set_erase(wait_list->wait_set, *wait_handle);
+ }
+ *wait_source = iree_wait_source_immediate();
+}
+
+static void iree_loop_wait_list_unregister_wait_sources(
+ iree_loop_wait_list_t* wait_list, iree_loop_wait_op_t* op) {
+ switch (op->command) {
+ case IREE_LOOP_COMMAND_WAIT_ONE:
+ iree_loop_wait_list_unregister_wait_source(
+ wait_list, &op->params.wait_one.wait_source);
+ break;
+ case IREE_LOOP_COMMAND_WAIT_ANY:
+ case IREE_LOOP_COMMAND_WAIT_ALL:
+ for (iree_host_size_t i = 0; i < op->params.wait_multi.count; ++i) {
+ iree_loop_wait_list_unregister_wait_source(
+ wait_list, &op->params.wait_multi.wait_sources[i]);
+ }
+ break;
+ default:
+ case IREE_LOOP_COMMAND_WAIT_UNTIL:
+ break;
+ }
+}
+
+static iree_status_t iree_loop_wait_list_insert(
+ iree_loop_wait_list_t* wait_list, iree_loop_wait_op_t op) {
+ if (wait_list->count + 1 >= wait_list->capacity) {
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "wait list capacity %u reached",
+ wait_list->capacity);
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_PLOT_VALUE_I64("iree_loop_wait_depth", wait_list->count);
+
+ uint32_t slot = wait_list->count++;
+ wait_list->ops[slot] = op;
+
+ iree_status_t status = iree_ok_status();
+ switch (op.command) {
+ case IREE_LOOP_COMMAND_WAIT_UNTIL:
+ // No entry in the wait set; we just need it in the list in order to scan.
+ break;
+ case IREE_LOOP_COMMAND_WAIT_ONE: {
+ status = iree_loop_wait_list_register_wait_source(
+ wait_list, &op.params.wait_one.wait_source);
+ break;
+ }
+ case IREE_LOOP_COMMAND_WAIT_ALL:
+ case IREE_LOOP_COMMAND_WAIT_ANY: {
+ for (iree_host_size_t i = 0;
+ i < op.params.wait_multi.count && iree_status_is_ok(status); ++i) {
+ status = iree_loop_wait_list_register_wait_source(
+ wait_list, &op.params.wait_multi.wait_sources[i]);
+ }
+ break;
+ }
+ default:
+ IREE_ASSERT_UNREACHABLE("unhandled wait list command");
+ break;
+ }
+
+ if (iree_status_is_ok(status)) {
+ ++op.scope->pending_count;
+ }
+
+ IREE_TRACE_PLOT_VALUE_I64("iree_loop_wait_depth", wait_list->count);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static iree_status_t iree_loop_wait_list_notify_wake(
+ iree_loop_wait_list_t* wait_list, iree_loop_run_ring_t* run_ring,
+ iree_host_size_t i, iree_status_t status) {
+ IREE_TRACE_PLOT_VALUE_I64("iree_loop_wait_depth", wait_list->count);
+
+ // Unregister all wait handles from the wait set.
+ iree_loop_wait_list_unregister_wait_sources(wait_list, &wait_list->ops[i]);
+
+ // Since we make no guarantees about the order of the lists we can just swap
+ // with the last value. Note that we need to preserve the callback.
+ iree_loop_sync_scope_t* scope = wait_list->ops[i].scope;
+ --scope->pending_count;
+ iree_loop_callback_t callback = wait_list->ops[i].callback;
+ int tail_index = (int)wait_list->count - 1;
+ if (tail_index > i) {
+ memcpy(&wait_list->ops[i], &wait_list->ops[tail_index],
+ sizeof(*wait_list->ops));
+ }
+ --wait_list->count;
+
+ IREE_TRACE_PLOT_VALUE_I64("iree_loop_wait_depth", wait_list->count);
+
+ // Enqueue the callback on the run ring - this ensures it gets sequenced with
+ // other runnable work and keeps ordering easier to reason about.
+ return iree_loop_run_ring_enqueue(
+ run_ring, (iree_loop_run_op_t){
+ .command = IREE_LOOP_COMMAND_CALL,
+ .scope = scope,
+ .params =
+ {
+ .call =
+ {
+ .callback = callback,
+ // TODO(benvanik): elevate callback priority
+ // to reduce latency?
+ .priority = IREE_LOOP_PRIORITY_DEFAULT,
+ },
+ },
+ .status = status,
+ });
+}
+
+// Returns DEFERRED if unresolved, OK if resolved, and an error otherwise.
+// If resolved (successful or not) the caller must erase the wait.
+static iree_status_t iree_loop_wait_list_scan_wait_until(
+ iree_loop_wait_list_t* wait_list, iree_loop_wait_until_params_t* params,
+ iree_time_t now_ns, iree_time_t* earliest_deadline_ns) {
+ // Task is a delay until some future time; factor that in to our earliest
+ // deadline so that we'll wait in the system until that time. If we wake
+ // earlier because another wait resolved it's still possible for the delay
+ // to have been reached before we get back to this check.
+ if (params->deadline_ns <= now_ns + IREE_LOOP_SYNC_DELAY_SLOP_NS) {
+ // Wait deadline reached.
+ return iree_ok_status();
+ } else {
+ // Still waiting.
+ *earliest_deadline_ns =
+ iree_min(*earliest_deadline_ns, params->deadline_ns);
+ return iree_status_from_code(IREE_STATUS_DEFERRED);
+ }
+}
+
+// Returns DEFERRED if unresolved, OK if resolved, and an error otherwise.
+// If resolved (successful or not) the caller must erase the wait.
+static iree_status_t iree_loop_wait_list_scan_wait_one(
+ iree_loop_wait_list_t* wait_list, iree_loop_wait_one_params_t* params,
+ iree_time_t now_ns, iree_time_t* earliest_deadline_ns) {
+ // Query the status.
+ iree_status_code_t wait_status_code = IREE_STATUS_OK;
+ IREE_RETURN_IF_ERROR(
+ iree_wait_source_query(params->wait_source, &wait_status_code));
+
+ if (wait_status_code != IREE_STATUS_OK) {
+ if (params->deadline_ns <= now_ns) {
+ // Deadline reached without having resolved.
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ } else {
+ // Still waiting.
+ *earliest_deadline_ns =
+ iree_min(*earliest_deadline_ns, params->deadline_ns);
+ }
+ }
+
+ return iree_status_from_code(wait_status_code);
+}
+
+// Returns DEFERRED if unresolved, OK if resolved, and an error otherwise.
+// If resolved (successful or not) the caller must erase the wait.
+static iree_status_t iree_loop_wait_list_scan_wait_any(
+ iree_loop_wait_list_t* wait_list, iree_loop_wait_multi_params_t* params,
+ iree_time_t now_ns, iree_time_t* earliest_deadline_ns) {
+ for (iree_host_size_t i = 0; i < params->count; ++i) {
+ iree_status_code_t wait_status_code = IREE_STATUS_OK;
+ IREE_RETURN_IF_ERROR(
+ iree_wait_source_query(params->wait_sources[i], &wait_status_code));
+ if (wait_status_code == IREE_STATUS_OK) {
+ return iree_ok_status(); // one resolved, wait-any satisfied
+ }
+ }
+ if (params->deadline_ns <= now_ns) {
+ // Deadline reached without having resolved any.
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ } else {
+ // Still waiting.
+ *earliest_deadline_ns =
+ iree_min(*earliest_deadline_ns, params->deadline_ns);
+ }
+ return iree_status_from_code(IREE_STATUS_DEFERRED); // none resolved
+}
+
+// Returns DEFERRED if unresolved, OK if resolved, and an error otherwise.
+// If resolved (successful or not) the caller must erase the wait.
+static iree_status_t iree_loop_wait_list_scan_wait_all(
+ iree_loop_wait_list_t* wait_list, iree_loop_wait_multi_params_t* params,
+ iree_time_t now_ns, iree_time_t* earliest_deadline_ns) {
+ bool any_unresolved = false;
+ for (iree_host_size_t i = 0; i < params->count; ++i) {
+ if (iree_wait_source_is_immediate(params->wait_sources[i])) continue;
+ iree_status_code_t wait_status_code = IREE_STATUS_OK;
+ IREE_RETURN_IF_ERROR(
+ iree_wait_source_query(params->wait_sources[i], &wait_status_code));
+ if (wait_status_code == IREE_STATUS_OK) {
+ // Wait resolved; remove it from the wait set so that we don't wait on it
+ // again. We do this by neutering the handle.
+ iree_wait_handle_t* wait_handle =
+ iree_wait_handle_from_source(¶ms->wait_sources[i]);
+ if (wait_handle) {
+ iree_wait_set_erase(wait_list->wait_set, *wait_handle);
+ }
+ params->wait_sources[i] = iree_wait_source_immediate();
+ } else {
+ // Wait not yet resolved.
+ if (params->deadline_ns <= now_ns) {
+ // Deadline reached without having resolved all.
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ } else {
+ // Still waiting.
+ *earliest_deadline_ns =
+ iree_min(*earliest_deadline_ns, params->deadline_ns);
+ any_unresolved = true;
+ }
+ }
+ }
+ return any_unresolved ? iree_status_from_code(IREE_STATUS_DEFERRED)
+ : iree_ok_status();
+}
+
+static void iree_loop_wait_list_handle_wake(iree_loop_wait_list_t* wait_list,
+ iree_loop_run_ring_t* run_ring,
+ iree_wait_handle_t wake_handle) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // TODO(benvanik): scan the list. We need a way to map wake_handle back to
+ // the zero or more tasks that match it but don't currently store the
+ // handle. Ideally we'd have the wait set tell us precisely which things
+ // woke - possibly by having a bitmap of original insertions that match the
+ // handle - but for now we just eat the extra query syscall.
+ int woken_tasks = 0;
+
+ (void)woken_tasks;
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, woken_tasks);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_loop_wait_list_scan(
+ iree_loop_wait_list_t* wait_list, iree_loop_run_ring_t* run_ring,
+ iree_time_t* out_earliest_deadline_ns) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ *out_earliest_deadline_ns = IREE_TIME_INFINITE_FUTURE;
+
+ iree_time_t now_ns = iree_time_now();
+ iree_status_t scan_status = iree_ok_status();
+ for (iree_host_size_t i = 0;
+ i < wait_list->count && iree_status_is_ok(scan_status); ++i) {
+ iree_status_t wait_status = iree_ok_status();
+ switch (wait_list->ops[i].command) {
+ case IREE_LOOP_COMMAND_WAIT_UNTIL:
+ wait_status = iree_loop_wait_list_scan_wait_until(
+ wait_list, &wait_list->ops[i].params.wait_until, now_ns,
+ out_earliest_deadline_ns);
+ break;
+ case IREE_LOOP_COMMAND_WAIT_ONE:
+ wait_status = iree_loop_wait_list_scan_wait_one(
+ wait_list, &wait_list->ops[i].params.wait_one, now_ns,
+ out_earliest_deadline_ns);
+ break;
+ case IREE_LOOP_COMMAND_WAIT_ANY:
+ wait_status = iree_loop_wait_list_scan_wait_any(
+ wait_list, &wait_list->ops[i].params.wait_multi, now_ns,
+ out_earliest_deadline_ns);
+ break;
+ case IREE_LOOP_COMMAND_WAIT_ALL:
+ wait_status = iree_loop_wait_list_scan_wait_all(
+ wait_list, &wait_list->ops[i].params.wait_multi, now_ns,
+ out_earliest_deadline_ns);
+ break;
+ }
+ if (!iree_status_is_deferred(wait_status)) {
+ // Wait completed/failed - erase from the wait set and op list.
+ scan_status =
+ iree_loop_wait_list_notify_wake(wait_list, run_ring, i, wait_status);
+ --i; // item i removed
+
+ // Don't commit the wait if we woke something; we want the callback to be
+ // issued ASAP and will let the main loop pump again to actually wait if
+ // needed.
+ *out_earliest_deadline_ns = IREE_TIME_INFINITE_PAST;
+ }
+ }
+
+ IREE_TRACE_PLOT_VALUE_I64("iree_loop_wait_depth", wait_list->count);
+ IREE_TRACE_ZONE_END(z0);
+ return scan_status;
+}
+
+static iree_status_t iree_loop_wait_list_commit(
+ iree_loop_wait_list_t* wait_list, iree_loop_run_ring_t* run_ring,
+ iree_time_t deadline_ns) {
+ if (iree_wait_set_is_empty(wait_list->wait_set) == 0) {
+ // No wait handles; this is a sleep.
+ IREE_TRACE_ZONE_BEGIN_NAMED(z0, "iree_loop_wait_list_commit_sleep");
+ iree_status_t status =
+ iree_wait_until(deadline_ns)
+ ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+
+ // Real system wait.
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, (int64_t)wait_list->count);
+
+ // Enter the system wait API.
+ iree_wait_handle_t wake_handle = iree_wait_handle_immediate();
+ iree_status_t status =
+ iree_wait_any(wait_list->wait_set, deadline_ns, &wake_handle);
+ if (iree_status_is_ok(status)) {
+ // One or more waiters is ready. We don't support multi-wake right now so
+ // we'll just take the one we got back and try again.
+ //
+ // To avoid extra syscalls we scan the list and mark whatever tasks were
+ // using the handle the wait set reported waking as completed. On the next
+ // scan they'll be retired immediately. Ideally we'd have the wait set be
+ // able to tell us this precise list.
+ if (iree_wait_handle_is_immediate(wake_handle)) {
+ // No-op wait - ignore.
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, "nop");
+ } else {
+ // Route to zero or more tasks using this handle.
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, "task(s)");
+ iree_loop_wait_list_handle_wake(wait_list, run_ring, wake_handle);
+ }
+ } else if (iree_status_is_deadline_exceeded(status)) {
+ // Indicates nothing was woken within the deadline. We gracefully bail here
+ // and let the scan check for per-op deadline exceeded events or delay
+ // completion.
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, "deadline exceeded");
+ } else {
+ // (Spurious?) error during wait.
+ // TODO(#4026): propagate failure to all scopes involved.
+ // Failures during waits are serious: ignoring them could lead to live-lock
+ // as tasks further in the pipeline expect them to have completed or - even
+ // worse - user code/other processes/drivers/etc may expect them to
+ // complete.
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, "failure");
+ IREE_ASSERT_TRUE(iree_status_is_ok(status));
+ iree_status_ignore(status);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+// Aborts all waits that are part of |scope|.
+// A NULL |scope| indicates all work from all scopes should be aborted.
+static void iree_loop_wait_list_abort_scope(iree_loop_wait_list_t* wait_list,
+ iree_loop_sync_scope_t* scope) {
+ if (!wait_list->count) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_PLOT_VALUE_I64("iree_loop_wait_depth", wait_list->count);
+
+ // Issue the completion callback of each op to notify it of the abort.
+ // To prevent enqueuing more work while aborting we pass in a NULL loop.
+ // We can't do anything with the errors so we ignore them.
+ for (iree_host_size_t i = 0; i < wait_list->count; ++i) {
+ if (scope && wait_list->ops[i].scope != scope) continue;
+
+ --wait_list->ops[i].scope->pending_count;
+ iree_loop_callback_t callback = wait_list->ops[i].callback;
+ iree_status_t status = callback.fn(callback.user_data, iree_loop_null(),
+ iree_make_status(IREE_STATUS_ABORTED));
+ iree_status_ignore(status);
+
+ // Since we make no guarantees about the order of the lists we can just swap
+ // with the last value.
+ int tail_index = (int)wait_list->count - 1;
+ if (tail_index > i) {
+ memcpy(&wait_list->ops[i], &wait_list->ops[tail_index],
+ sizeof(*wait_list->ops));
+ }
+ --wait_list->count;
+ --i;
+ }
+
+ IREE_TRACE_PLOT_VALUE_I64("iree_loop_wait_depth", wait_list->count);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Aborts all waits from all scopes.
+static void iree_loop_wait_list_abort_all(iree_loop_wait_list_t* wait_list) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_loop_wait_list_abort_scope(wait_list, /*scope=*/NULL);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_loop_sync_scope_t
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT void iree_loop_sync_scope_initialize(
+ iree_loop_sync_t* loop_sync, iree_loop_sync_error_fn_t error_fn,
+ void* error_user_data, iree_loop_sync_scope_t* out_scope) {
+ memset(out_scope, 0, sizeof(*out_scope));
+ out_scope->loop_sync = loop_sync;
+ out_scope->pending_count = 0;
+ out_scope->error_fn = error_fn;
+ out_scope->error_user_data = error_user_data;
+}
+
+IREE_API_EXPORT void iree_loop_sync_scope_deinitialize(
+ iree_loop_sync_scope_t* scope) {
+ IREE_ASSERT_ARGUMENT(scope);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ if (scope->loop_sync) {
+ iree_loop_sync_abort_scope(scope->loop_sync, scope);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_loop_sync_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_loop_sync_t {
+ iree_allocator_t allocator;
+
+ iree_loop_run_ring_t* run_ring;
+ iree_loop_wait_list_t* wait_list;
+
+ // Trailing data:
+ // + iree_loop_run_ring_storage_size
+ // + iree_loop_wait_list_storage_size
+} iree_loop_sync_t;
+
+IREE_API_EXPORT iree_status_t iree_loop_sync_allocate(
+ iree_loop_sync_options_t options, iree_allocator_t allocator,
+ iree_loop_sync_t** out_loop_sync) {
+ IREE_ASSERT_ARGUMENT(out_loop_sync);
+
+ // The run queue must be a power of two due to the ringbuffer masking
+ // technique we use.
+ options.max_queue_depth =
+ iree_math_round_up_to_pow2_u32((uint32_t)options.max_queue_depth);
+ if (options.max_queue_depth > UINT16_MAX) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "queue depth exceeds maximum");
+ }
+
+ // Wait sets also have a handle limit but we may want to allow more
+ // outstanding wait operations even if we can't wait on them all
+ // simultaneously.
+ if (IREE_UNLIKELY(options.max_wait_count > UINT16_MAX)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "wait list depth exceeds maximum");
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ const iree_host_size_t loop_sync_size =
+ iree_host_align(sizeof(iree_loop_sync_t), iree_max_align_t);
+ const iree_host_size_t run_ring_size = iree_host_align(
+ iree_loop_run_ring_storage_size(options), iree_max_align_t);
+ const iree_host_size_t wait_list_size = iree_host_align(
+ iree_loop_wait_list_storage_size(options), iree_max_align_t);
+ const iree_host_size_t total_storage_size =
+ loop_sync_size + run_ring_size + wait_list_size;
+
+ uint8_t* storage = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_allocator_malloc(allocator, total_storage_size, (void**)&storage));
+ iree_loop_sync_t* loop_sync = (iree_loop_sync_t*)storage;
+ loop_sync->allocator = allocator;
+ loop_sync->run_ring = (iree_loop_run_ring_t*)(storage + loop_sync_size);
+ loop_sync->wait_list =
+ (iree_loop_wait_list_t*)(storage + loop_sync_size + run_ring_size);
+
+ iree_status_t status = iree_ok_status();
+ if (iree_status_is_ok(status)) {
+ iree_loop_run_ring_initialize(options, loop_sync->run_ring);
+ }
+ if (iree_status_is_ok(status)) {
+ status = iree_loop_wait_list_initialize(options, allocator,
+ loop_sync->wait_list);
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_loop_sync = loop_sync;
+ } else {
+ iree_loop_sync_free(loop_sync);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT void iree_loop_sync_free(iree_loop_sync_t* loop_sync) {
+ IREE_ASSERT_ARGUMENT(loop_sync);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_allocator_t allocator = loop_sync->allocator;
+
+ // Abort all pending operations.
+ // This will issue callbacks for each operation that was aborted directly
+ // with IREE_STATUS_ABORTED.
+ // To ensure we don't enqueue more work while aborting we NULL out the lists.
+ iree_loop_run_ring_t* run_ring = loop_sync->run_ring;
+ iree_loop_wait_list_t* wait_list = loop_sync->wait_list;
+ loop_sync->run_ring = NULL;
+ loop_sync->wait_list = NULL;
+ iree_loop_wait_list_abort_all(wait_list);
+ iree_loop_run_ring_abort_all(run_ring);
+
+ // After all operations are cleared we can release the data structures.
+ iree_loop_run_ring_deinitialize(run_ring);
+ iree_loop_wait_list_deinitialize(wait_list);
+ iree_allocator_free(allocator, loop_sync);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Aborts all operations in the loop attributed to |scope|.
+static void iree_loop_sync_abort_scope(iree_loop_sync_t* loop_sync,
+ iree_loop_sync_scope_t* scope) {
+ iree_loop_wait_list_abort_scope(loop_sync->wait_list, scope);
+ iree_loop_run_ring_abort_scope(loop_sync->run_ring, scope);
+}
+
+// Emits |status| to the given |loop| scope and aborts associated operations.
+static void iree_loop_sync_emit_error(iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_TEXT(
+ z0, iree_status_code_string(iree_status_code(status)));
+
+ iree_loop_sync_scope_t* scope = (iree_loop_sync_scope_t*)loop.self;
+ iree_loop_sync_t* loop_sync = scope->loop_sync;
+
+ if (scope->error_fn) {
+ scope->error_fn(scope->error_user_data, status);
+ } else {
+ iree_status_ignore(status);
+ }
+
+ iree_loop_sync_abort_scope(loop_sync, scope);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static void iree_loop_sync_run_call(iree_loop_sync_t* loop_sync,
+ iree_loop_t loop,
+ const iree_loop_call_params_t params,
+ iree_status_t op_status) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status =
+ params.callback.fn(params.callback.user_data, loop, op_status);
+ if (!iree_status_is_ok(status)) {
+ iree_loop_sync_emit_error(loop, status);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static void iree_loop_sync_run_dispatch(
+ iree_loop_sync_t* loop_sync, iree_loop_t loop,
+ const iree_loop_dispatch_params_t params) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status = iree_ok_status();
+
+ // We run all workgroups before issuing the completion callback.
+ // If any workgroup fails we exit early and pass the failing status back to
+ // the completion handler exactly once.
+ uint32_t workgroup_count_x = params.workgroup_count_xyz[0];
+ uint32_t workgroup_count_y = params.workgroup_count_xyz[1];
+ uint32_t workgroup_count_z = params.workgroup_count_xyz[2];
+ iree_status_t workgroup_status = iree_ok_status();
+ for (uint32_t z = 0; z < workgroup_count_z; ++z) {
+ for (uint32_t y = 0; y < workgroup_count_y; ++y) {
+ for (uint32_t x = 0; x < workgroup_count_x; ++x) {
+ workgroup_status =
+ params.workgroup_fn(params.callback.user_data, loop, x, y, z);
+ if (!iree_status_is_ok(workgroup_status)) goto workgroup_failed;
+ }
+ }
+ }
+workgroup_failed:
+
+ // Fire the completion callback with either success or the first error hit by
+ // a workgroup.
+ status =
+ params.callback.fn(params.callback.user_data, loop, workgroup_status);
+ if (!iree_status_is_ok(status)) {
+ iree_loop_sync_emit_error(loop, status);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Drains work from the loop until all work in |scope| has completed.
+// A NULL |scope| indicates all work from all scopes should be drained.
+static iree_status_t iree_loop_sync_drain_scope(iree_loop_sync_t* loop_sync,
+ iree_loop_sync_scope_t* scope,
+ iree_time_t deadline_ns) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ do {
+ // If we are draining a particular scope we can bail whenever there's no
+ // more work remaining.
+ if (scope && !scope->pending_count) break;
+
+ // Run an op from the runnable queue.
+ // We dequeue operations here so that re-entrant enqueuing works.
+ // We only want to run one op at a time before checking our deadline so that
+ // we don't get into infinite loops or exceed the deadline (too much).
+ iree_loop_run_op_t run_op;
+ if (iree_loop_run_ring_dequeue(loop_sync->run_ring, &run_op)) {
+ iree_loop_t loop = {
+ .self = run_op.scope,
+ .ctl = iree_loop_sync_ctl,
+ };
+ switch (run_op.command) {
+ case IREE_LOOP_COMMAND_CALL:
+ iree_loop_sync_run_call(loop_sync, loop, run_op.params.call,
+ run_op.status);
+ break;
+ case IREE_LOOP_COMMAND_DISPATCH:
+ iree_loop_sync_run_dispatch(loop_sync, loop, run_op.params.dispatch);
+ break;
+ }
+ continue; // loop back around only if under the deadline
+ }
+
+ // -- if here then the run ring is currently empty --
+
+ // If there are no pending waits then the drain has completed.
+ if (iree_loop_wait_list_is_empty(loop_sync->wait_list)) {
+ break;
+ }
+
+ // Scan the wait list and check for resolved ops.
+ // If there are any waiting ops the next earliest timeout is returned. An
+ // immediate timeout indicates that there's work in the run ring and we
+ // shouldn't perform a wait operation this go around the loop.
+ iree_time_t earliest_deadline_ns = IREE_TIME_INFINITE_FUTURE;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_loop_wait_list_scan(loop_sync->wait_list, loop_sync->run_ring,
+ &earliest_deadline_ns));
+ if (earliest_deadline_ns != IREE_TIME_INFINITE_PAST &&
+ earliest_deadline_ns != IREE_TIME_INFINITE_FUTURE) {
+ // Commit the wait operation, waiting up until the minimum of the user
+ // specified and wait list derived values.
+ iree_time_t wait_deadline_ns = earliest_deadline_ns < deadline_ns
+ ? earliest_deadline_ns
+ : deadline_ns;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_loop_wait_list_commit(
+ loop_sync->wait_list, loop_sync->run_ring, wait_deadline_ns));
+ }
+ } while (iree_time_now() < deadline_ns);
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t
+iree_loop_sync_wait_idle(iree_loop_sync_t* loop_sync, iree_timeout_t timeout) {
+ IREE_ASSERT_ARGUMENT(loop_sync);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
+ iree_status_t status =
+ iree_loop_sync_drain_scope(loop_sync, /*scope=*/NULL, deadline_ns);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// Control function for the synchronous loop.
+// |self| must be an iree_loop_sync_scope_t.
+IREE_API_EXPORT iree_status_t iree_loop_sync_ctl(void* self,
+ iree_loop_command_t command,
+ const void* params,
+ void** inout_ptr) {
+ IREE_ASSERT_ARGUMENT(self);
+ iree_loop_sync_scope_t* scope = (iree_loop_sync_scope_t*)self;
+ iree_loop_sync_t* loop_sync = scope->loop_sync;
+
+ if (IREE_UNLIKELY(!loop_sync->run_ring)) {
+ return iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "new work cannot be enqueued while the loop is shutting down");
+ }
+
+ // NOTE: we return immediately to make this all (hopefully) tail calls.
+ switch (command) {
+ case IREE_LOOP_COMMAND_CALL:
+ return iree_loop_run_ring_enqueue(
+ loop_sync->run_ring,
+ (iree_loop_run_op_t){
+ .command = command,
+ .scope = scope,
+ .params =
+ {
+ .call = *(const iree_loop_call_params_t*)params,
+ },
+ });
+ case IREE_LOOP_COMMAND_DISPATCH:
+ return iree_loop_run_ring_enqueue(
+ loop_sync->run_ring,
+ (iree_loop_run_op_t){
+ .command = command,
+ .scope = scope,
+ .params =
+ {
+ .dispatch = *(const iree_loop_dispatch_params_t*)params,
+ },
+ });
+ case IREE_LOOP_COMMAND_WAIT_UNTIL:
+ return iree_loop_wait_list_insert(
+ loop_sync->wait_list,
+ (iree_loop_wait_op_t){
+ .command = command,
+ .scope = scope,
+ .params =
+ {
+ .wait_until =
+ *(const iree_loop_wait_until_params_t*)params,
+ },
+ });
+ case IREE_LOOP_COMMAND_WAIT_ONE:
+ return iree_loop_wait_list_insert(
+ loop_sync->wait_list,
+ (iree_loop_wait_op_t){
+ .command = command,
+ .scope = scope,
+ .params =
+ {
+ .wait_one = *(const iree_loop_wait_one_params_t*)params,
+ },
+ });
+ case IREE_LOOP_COMMAND_WAIT_ALL:
+ case IREE_LOOP_COMMAND_WAIT_ANY:
+ return iree_loop_wait_list_insert(
+ loop_sync->wait_list,
+ (iree_loop_wait_op_t){
+ .command = command,
+ .scope = scope,
+ .params =
+ {
+ .wait_multi =
+ *(const iree_loop_wait_multi_params_t*)params,
+ },
+ });
+ case IREE_LOOP_COMMAND_DRAIN:
+ return iree_loop_sync_drain_scope(
+ loop_sync, scope,
+ ((const iree_loop_drain_params_t*)params)->deadline_ns);
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unimplemented loop command");
+ }
+}
diff --git a/runtime/src/iree/base/loop_sync.h b/runtime/src/iree/base/loop_sync.h
new file mode 100644
index 0000000..12811bd
--- /dev/null
+++ b/runtime/src/iree/base/loop_sync.h
@@ -0,0 +1,109 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_LOOP_SYNC_H_
+#define IREE_BASE_LOOP_SYNC_H_
+
+#include "iree/base/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_loop_sync_t
+//===----------------------------------------------------------------------===//
+
+// Configuration options for the synchronous loop implementation.
+typedef struct iree_loop_sync_options_t {
+ // Specifies the maximum operation queue depth in number of operations.
+ // Growth is not currently supported and if the capacity is reached during
+ // execution then IREE_STATUS_RESOURCE_EXHAUSTED will be returned when new
+ // operations are enqueued.
+ iree_host_size_t max_queue_depth;
+
+ // Specifies how many pending waits are allowed at the same time.
+ // Growth is not currently supported and if the capacity is reached during
+ // execution then IREE_STATUS_RESOURCE_EXHAUSTED will be returned when new
+ // waits are enqueued.
+ iree_host_size_t max_wait_count;
+} iree_loop_sync_options_t;
+
+// A lightweight loop that greedily runs operations as they are available.
+// This does not require any system threading support and has deterministic
+// behavior unless multi-waits are used.
+//
+// Thread-compatible: the loop only performs work when iree_loop_drain is
+// called and must not be used from multiple threads concurrently.
+typedef struct iree_loop_sync_t iree_loop_sync_t;
+
+// Allocates a synchronous loop using |allocator| stored into |out_loop_sync|.
+IREE_API_EXPORT iree_status_t iree_loop_sync_allocate(
+ iree_loop_sync_options_t options, iree_allocator_t allocator,
+ iree_loop_sync_t** out_loop_sync);
+
+// Frees a synchronous |loop_sync|, aborting all pending operations.
+IREE_API_EXPORT void iree_loop_sync_free(iree_loop_sync_t* loop_sync);
+
+// Waits until the loop is idle (all operations in all scopes have retired).
+// Returns IREE_STATUS_DEADLINE_EXCEEDED if |timeout| is reached before the
+// loop is idle.
+IREE_API_EXPORT iree_status_t
+iree_loop_sync_wait_idle(iree_loop_sync_t* loop_sync, iree_timeout_t timeout);
+
+// Handles scope errors returned from loop callback operations.
+// Ownership of |status| is passed to the handler and must be freed.
+// All operations of the same scope will be aborted.
+typedef void(IREE_API_PTR* iree_loop_sync_error_fn_t)(void* user_data,
+ iree_status_t status);
+
+// A scope of execution within a loop.
+// Each scope has a dedicated error handler that is notified when an error
+// propagates from a loop operation scheduled against the scope. When an error
+// arises all other operations in the same scope will be aborted.
+typedef struct iree_loop_sync_scope_t {
+ // Target loop for execution.
+ iree_loop_sync_t* loop_sync;
+
+ // Total number of pending operations in the scope.
+ // When 0 the scope is considered idle.
+ int32_t pending_count;
+
+ // Optional function used to report errors that occur during execution.
+ iree_loop_sync_error_fn_t error_fn;
+ void* error_user_data;
+} iree_loop_sync_scope_t;
+
+// Initializes a loop scope that runs operations against |loop_sync|.
+IREE_API_EXPORT void iree_loop_sync_scope_initialize(
+ iree_loop_sync_t* loop_sync, iree_loop_sync_error_fn_t error_fn,
+ void* error_user_data, iree_loop_sync_scope_t* out_scope);
+
+// Deinitializes a loop |scope| and aborts any pending operations.
+IREE_API_EXPORT void iree_loop_sync_scope_deinitialize(
+ iree_loop_sync_scope_t* scope);
+
+IREE_API_EXPORT iree_status_t iree_loop_sync_ctl(void* self,
+ iree_loop_command_t command,
+ const void* params,
+ void** inout_ptr);
+
+// Returns a loop that schedules operations against |scope|.
+// The scope must remain valid until all operations scheduled against it have
+// completed.
+static inline iree_loop_t iree_loop_sync_scope(iree_loop_sync_scope_t* scope) {
+ iree_loop_t loop = {
+ scope,
+ iree_loop_sync_ctl,
+ };
+ return loop;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_LOOP_SYNC_H_
diff --git a/runtime/src/iree/base/loop_sync_test.cc b/runtime/src/iree/base/loop_sync_test.cc
new file mode 100644
index 0000000..893ed40
--- /dev/null
+++ b/runtime/src/iree/base/loop_sync_test.cc
@@ -0,0 +1,52 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/loop_sync.h"
+
+#include "iree/base/api.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+// Contains the test definitions applied to all loop implementations:
+#include "iree/base/loop_test.h"
+
+void AllocateLoop(iree_status_t* out_status, iree_allocator_t allocator,
+ iree_loop_t* out_loop) {
+ iree_loop_sync_options_t options = {0};
+ options.max_queue_depth = 128;
+ options.max_wait_count = 32;
+
+ iree_loop_sync_t* loop_sync = NULL;
+ IREE_CHECK_OK(iree_loop_sync_allocate(options, allocator, &loop_sync));
+
+ iree_loop_sync_scope_t* scope = NULL;
+ IREE_CHECK_OK(
+ iree_allocator_malloc(allocator, sizeof(*scope), (void**)&scope));
+ iree_loop_sync_scope_initialize(
+ loop_sync,
+ +[](void* user_data, iree_status_t status) {
+ iree_status_t* status_ptr = (iree_status_t*)user_data;
+ if (iree_status_is_ok(*status_ptr)) {
+ *status_ptr = status;
+ } else {
+ iree_status_ignore(status);
+ }
+ },
+ out_status, scope);
+ *out_loop = iree_loop_sync_scope(scope);
+}
+
+void FreeLoop(iree_allocator_t allocator, iree_loop_t loop) {
+ iree_loop_sync_scope_t* scope = (iree_loop_sync_scope_t*)loop.self;
+ iree_loop_sync_t* loop_sync = scope->loop_sync;
+
+ iree_loop_sync_scope_deinitialize(scope);
+ iree_allocator_free(allocator, scope);
+
+ iree_loop_sync_free(loop_sync);
+}
+
+// TODO(benvanik): test multiple scopes and scoped abort behavior.
diff --git a/runtime/src/iree/base/loop_test.h b/runtime/src/iree/base/loop_test.h
new file mode 100644
index 0000000..66b439a
--- /dev/null
+++ b/runtime/src/iree/base/loop_test.h
@@ -0,0 +1,980 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <chrono>
+#include <thread>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/wait_handle.h"
+#include "iree/base/tracing.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+// NOTE: this file is meant to be included inside of a _test.cc source file.
+// The file must define these functions to allocate/free the loop.
+// |out_status| should receive the last global error encountered in the loop.
+void AllocateLoop(iree_status_t* out_status, iree_allocator_t allocator,
+ iree_loop_t* out_loop);
+void FreeLoop(iree_allocator_t allocator, iree_loop_t loop);
+
+namespace iree {
+namespace testing {
+
+struct LoopTest : public ::testing::Test {
+ iree_allocator_t allocator = iree_allocator_system();
+ iree_loop_t loop;
+ iree_status_t loop_status = iree_ok_status();
+
+ void SetUp() override {
+ IREE_TRACE_SCOPE();
+ AllocateLoop(&loop_status, allocator, &loop);
+ }
+ void TearDown() override {
+ IREE_TRACE_SCOPE();
+ FreeLoop(allocator, loop);
+ iree_status_ignore(loop_status);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// iree_loop_call
+//===----------------------------------------------------------------------===//
+
+// Tests the simple call interface for running work.
+TEST_F(LoopTest, Call) {
+ IREE_TRACE_SCOPE();
+ struct UserData {
+ iree_status_t call_status = iree_status_from_code(IREE_STATUS_DATA_LOSS);
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_call(
+ loop, IREE_LOOP_PRIORITY_DEFAULT,
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->call_status = status;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+ IREE_ASSERT_OK(loop_status);
+ IREE_ASSERT_OK(user_data.call_status);
+}
+
+// Tests a call that forks into two other calls.
+TEST_F(LoopTest, CallFork) {
+ IREE_TRACE_SCOPE();
+ struct UserData {
+ bool called_a = false;
+ bool called_b = false;
+ bool called_c = false;
+ } user_data;
+
+ // A -> [B, C]
+ IREE_ASSERT_OK(iree_loop_call(
+ loop, IREE_LOOP_PRIORITY_DEFAULT,
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->called_a = true;
+
+ // B
+ IREE_EXPECT_OK(iree_loop_call(
+ loop, IREE_LOOP_PRIORITY_DEFAULT,
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->called_b = true;
+ return iree_ok_status();
+ },
+ user_data));
+
+ // C
+ IREE_EXPECT_OK(iree_loop_call(
+ loop, IREE_LOOP_PRIORITY_DEFAULT,
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->called_c = true;
+ return iree_ok_status();
+ },
+ user_data));
+
+ return iree_ok_status();
+ },
+ &user_data));
+
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.called_a);
+ EXPECT_TRUE(user_data.called_b);
+ EXPECT_TRUE(user_data.called_c);
+}
+
+// Tests a repeating call - since the loops are intended to be stackless we
+// should in theory be able to issue calls forever. This test ensures we can do
+// a really large amount without blowing the native stack.
+struct CallRepeatedData {
+ int remaining = 2 * 1024;
+};
+static iree_status_t CallRepeatedFn(void* user_data_ptr, iree_loop_t loop,
+ iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<CallRepeatedData*>(user_data_ptr);
+ if (--user_data->remaining) {
+ IREE_RETURN_IF_ERROR(iree_loop_call(loop, IREE_LOOP_PRIORITY_DEFAULT,
+ CallRepeatedFn, user_data));
+ }
+ return iree_ok_status();
+}
+TEST_F(LoopTest, CallRepeated) {
+ IREE_TRACE_SCOPE();
+ CallRepeatedData user_data;
+ IREE_ASSERT_OK(iree_loop_call(loop, IREE_LOOP_PRIORITY_DEFAULT,
+ CallRepeatedFn, &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_EQ(user_data.remaining, 0);
+}
+
+// Tests a call that results in failure.
+TEST_F(LoopTest, CallFailure) {
+ IREE_TRACE_SCOPE();
+ struct UserData {
+ bool completed = false;
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_call(
+ loop, IREE_LOOP_PRIORITY_DEFAULT,
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ EXPECT_FALSE(user_data->completed);
+ user_data->completed = true;
+ return iree_status_from_code(IREE_STATUS_DATA_LOSS);
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DATA_LOSS, loop_status);
+}
+
+// Tests that a failure will abort other pending tasks.
+TEST_F(LoopTest, CallFailureAborts) {
+ IREE_TRACE_SCOPE();
+ struct UserData {
+ bool did_call_callback = false;
+ bool did_wait_callback = false;
+ } user_data;
+
+ // Issue the call that will fail.
+ IREE_ASSERT_OK(iree_loop_call(
+ loop, IREE_LOOP_PRIORITY_DEFAULT,
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ EXPECT_FALSE(user_data->did_call_callback);
+ user_data->did_call_callback = true;
+ return iree_status_from_code(IREE_STATUS_DATA_LOSS);
+ },
+ &user_data));
+
+ // Enqueue a wait that will never complete - if it runs it means we didn't
+ // correctly abort it.
+ IREE_ASSERT_OK(iree_loop_wait_until(
+ loop, iree_make_timeout_ms(1 * 60 * 1000),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_ABORTED, status);
+ iree_status_ignore(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ EXPECT_FALSE(user_data->did_wait_callback);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ &user_data));
+
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DATA_LOSS, loop_status);
+ EXPECT_TRUE(user_data.did_call_callback);
+ EXPECT_TRUE(user_data.did_wait_callback);
+}
+
+// Tests that a failure will abort other pending tasks, including those enqueued
+// from within the failing call itself.
+TEST_F(LoopTest, CallFailureAbortsNested) {
+ IREE_TRACE_SCOPE();
+ struct UserData {
+ bool did_call_callback = false;
+ bool did_wait_callback = false;
+ } user_data;
+
+ // Issue the call that will fail.
+ IREE_ASSERT_OK(iree_loop_call(
+ loop, IREE_LOOP_PRIORITY_DEFAULT,
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ EXPECT_FALSE(user_data->did_call_callback);
+ user_data->did_call_callback = true;
+
+ // Enqueue a wait that will never complete - if it runs it means we
+ // didn't correctly abort it. We are enqueuing it reentrantly as a user
+ // would before we encounter the error below.
+ IREE_EXPECT_OK(iree_loop_wait_until(
+ loop, iree_make_timeout_ms(1 * 60 * 1000),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_ABORTED, status);
+ iree_status_ignore(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ EXPECT_FALSE(user_data->did_wait_callback);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ user_data));
+
+ return iree_status_from_code(IREE_STATUS_DATA_LOSS);
+ },
+ &user_data));
+
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DATA_LOSS, loop_status);
+ EXPECT_TRUE(user_data.did_call_callback);
+ EXPECT_TRUE(user_data.did_wait_callback);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_loop_dispatch
+//===----------------------------------------------------------------------===//
+
+// Tests a grid dispatch operation with an empty grid.
+// The completion callback should still be issued but no workgroups.
+TEST_F(LoopTest, DispatchEmpty) {
+ IREE_TRACE_SCOPE();
+ struct UserData {
+ std::atomic<int> workgroup_count = {0};
+ bool completed = false;
+ } user_data;
+ const uint32_t xyz[3] = {1, 0, 0};
+ IREE_ASSERT_OK(iree_loop_dispatch(
+ loop, xyz,
+ +[](void* user_data_ptr, iree_loop_t loop, uint32_t workgroup_x,
+ uint32_t workgroup_y, uint32_t workgroup_z) {
+ IREE_TRACE_SCOPE();
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ ++user_data->workgroup_count;
+ return iree_ok_status();
+ },
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ EXPECT_FALSE(user_data->completed);
+ user_data->completed = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_EQ(user_data.workgroup_count, 0);
+ EXPECT_TRUE(user_data.completed);
+}
+
+// Tests a grid dispatch operation and ensures all workgroups are issued.
+TEST_F(LoopTest, DispatchGrid) {
+ IREE_TRACE_SCOPE();
+ struct UserData {
+ std::atomic<int> workgroup_count = {0};
+ bool completed = false;
+ } user_data;
+ const uint32_t xyz[3] = {4, 2, 1};
+ IREE_ASSERT_OK(iree_loop_dispatch(
+ loop, xyz,
+ +[](void* user_data_ptr, iree_loop_t loop, uint32_t workgroup_x,
+ uint32_t workgroup_y, uint32_t workgroup_z) {
+ IREE_TRACE_SCOPE();
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ ++user_data->workgroup_count;
+ return iree_ok_status();
+ },
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ EXPECT_FALSE(user_data->completed);
+ user_data->completed = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_EQ(user_data.workgroup_count, xyz[0] * xyz[1] * xyz[2]);
+ EXPECT_TRUE(user_data.completed);
+}
+
+// Tests a grid dispatch operation with a workgroup failure.
+TEST_F(LoopTest, DispatchWorkgroupFailure) {
+ IREE_TRACE_SCOPE();
+ struct UserData {
+ bool completed = false;
+ } user_data;
+ const uint32_t xyz[3] = {4, 2, 1};
+ IREE_ASSERT_OK(iree_loop_dispatch(
+ loop, xyz,
+ +[](void* user_data_ptr, iree_loop_t loop, uint32_t workgroup_x,
+ uint32_t workgroup_y, uint32_t workgroup_z) {
+ IREE_TRACE_SCOPE();
+ return iree_status_from_code(IREE_STATUS_DATA_LOSS);
+ },
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DATA_LOSS, status);
+ iree_status_ignore(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ EXPECT_FALSE(user_data->completed);
+ user_data->completed = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.completed);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_loop_wait_until
+//===----------------------------------------------------------------------===//
+
+// Tests a wait-until delay with an immediate timeout.
+TEST_F(LoopTest, WaitUntilImmediate) {
+ IREE_TRACE_SCOPE();
+ struct UserData {
+ iree_status_t wait_status = iree_status_from_code(IREE_STATUS_DATA_LOSS);
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_until(
+ loop, iree_immediate_timeout(),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->wait_status = status;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+ IREE_ASSERT_OK(loop_status);
+ IREE_ASSERT_OK(user_data.wait_status);
+}
+
+// Tests a wait-until delay with an actual delay.
+TEST_F(LoopTest, WaitUntil) {
+ IREE_TRACE_SCOPE();
+ struct UserData {
+ iree_time_t start_ns = iree_time_now();
+ iree_time_t end_ns = IREE_TIME_INFINITE_FUTURE;
+ iree_status_t wait_status = iree_status_from_code(IREE_STATUS_DATA_LOSS);
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_until(
+ loop, iree_make_timeout_ms(50),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->end_ns = iree_time_now();
+ user_data->wait_status = status;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+ IREE_ASSERT_OK(loop_status);
+ IREE_ASSERT_OK(user_data.wait_status);
+ // Not checking exact timing as some devices may not have clocks.
+ EXPECT_GE(user_data.end_ns, user_data.start_ns);
+}
+
+// Tests that multiple wait-until's can be active at once.
+// NOTE: loops are not required to wake in any particular order.
+TEST_F(LoopTest, MultiWaitUntil) {
+ IREE_TRACE_SCOPE();
+ struct UserData {
+ bool woke_a = false;
+ bool woke_b = false;
+ } user_data;
+
+ IREE_ASSERT_OK(iree_loop_wait_until(
+ loop, iree_make_timeout_ms(25),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->woke_a = true;
+ return iree_ok_status();
+ },
+ &user_data));
+
+ IREE_ASSERT_OK(iree_loop_wait_until(
+ loop, iree_make_timeout_ms(50),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->woke_b = true;
+ return iree_ok_status();
+ },
+ &user_data));
+
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.woke_a);
+ EXPECT_TRUE(user_data.woke_b);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_loop_wait_one
+//===----------------------------------------------------------------------===//
+
+// Tests a wait-one with an immediate timeout.
+// The handle is never resolved and if we didn't bail immediately we'd hang.
+TEST_F(LoopTest, WaitOneImmediate) {
+ IREE_TRACE_SCOPE();
+
+ // An event that never resolves.
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event));
+ iree_wait_source_t wait_source = iree_event_await(&event);
+
+ struct UserData {
+ bool did_wait_callback = false;
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_one(
+ loop, wait_source, iree_immediate_timeout(),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED, status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.did_wait_callback);
+
+ iree_event_deinitialize(&event);
+}
+
+// Tests a wait-one with a non-immediate timeout.
+TEST_F(LoopTest, WaitOneTimeout) {
+ IREE_TRACE_SCOPE();
+
+ // An event that never resolves.
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event));
+ iree_wait_source_t wait_source = iree_event_await(&event);
+
+ struct UserData {
+ bool did_wait_callback = false;
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_one(
+ loop, wait_source, iree_make_timeout_ms(10),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED, status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.did_wait_callback);
+
+ iree_event_deinitialize(&event);
+}
+
+// Tests a wait-one that times out does not abort other loop ops.
+// The deadline exceeded status passed to the callback is sufficient.
+TEST_F(LoopTest, WaitOneTimeoutNoAbort) {
+ IREE_TRACE_SCOPE();
+
+ // An event that never resolves.
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event));
+ iree_wait_source_t wait_source = iree_event_await(&event);
+
+ struct UserData {
+ bool did_wait_callback = false;
+ bool did_call_callback = false;
+ } user_data;
+
+ // Wait that will time out.
+ IREE_ASSERT_OK(iree_loop_wait_one(
+ loop, wait_source, iree_make_timeout_ms(10),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED, status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->did_wait_callback = true;
+
+ // Call that should still be issued correctly.
+ // Note that we queue it here as if we did it outside the wait we'd
+ // immediately execute it on out-of-order implementations.
+ IREE_EXPECT_OK(iree_loop_call(
+ loop, IREE_LOOP_PRIORITY_DEFAULT,
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ EXPECT_FALSE(user_data->did_call_callback);
+ user_data->did_call_callback = true;
+ return iree_ok_status();
+ },
+ user_data));
+
+ return iree_ok_status();
+ },
+ &user_data));
+
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.did_wait_callback);
+ EXPECT_TRUE(user_data.did_call_callback);
+
+ iree_event_deinitialize(&event);
+}
+
+// Tests a wait-one with an already signaled wait source.
+TEST_F(LoopTest, WaitOneSignaled) {
+ IREE_TRACE_SCOPE();
+
+ // An event that is resolved immediately.
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &event));
+ iree_wait_source_t wait_source = iree_event_await(&event);
+
+ struct UserData {
+ bool did_wait_callback = false;
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_one(
+ loop, wait_source, iree_make_timeout_ms(10),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.did_wait_callback);
+
+ iree_event_deinitialize(&event);
+}
+
+// Tests a wait-one on a wait handle signaled out-of-band.
+TEST_F(LoopTest, WaitOneBlocking) {
+ IREE_TRACE_SCOPE();
+
+ // Initially unsignaled.
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event));
+ iree_wait_source_t wait_source = iree_event_await(&event);
+
+ // Spin up the thread to signal the event after a short delay.
+ // We need to do this before we issue the wait so that loops which perform the
+ // wait inline can still make forward progress even if they block.
+ std::thread thread([&]() {
+ IREE_TRACE_SCOPE();
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ iree_event_set(&event);
+ });
+
+ struct UserData {
+ bool did_wait_callback = false;
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_one(
+ loop, wait_source, iree_make_timeout_ms(200),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.did_wait_callback);
+
+ thread.join();
+ iree_event_deinitialize(&event);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_loop_wait_any
+//===----------------------------------------------------------------------===//
+
+// Tests a wait-any with a immediate timeout (a poll).
+TEST_F(LoopTest, WaitAnyImmediate) {
+ IREE_TRACE_SCOPE();
+
+ // Events that are never resolved such that we time out.
+ iree_event_t event_a;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event_a));
+ iree_wait_source_t wait_source_a = iree_event_await(&event_a);
+ iree_event_t event_b;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event_b));
+ iree_wait_source_t wait_source_b = iree_event_await(&event_b);
+
+ iree_wait_source_t wait_sources[2] = {
+ wait_source_a,
+ wait_source_b,
+ };
+ struct UserData {
+ bool did_wait_callback = false;
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_any(
+ loop, IREE_ARRAYSIZE(wait_sources), wait_sources,
+ iree_immediate_timeout(),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED, status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.did_wait_callback);
+
+ iree_event_deinitialize(&event_a);
+ iree_event_deinitialize(&event_b);
+}
+
+// Tests a wait-any with a non-immediate timeout.
+TEST_F(LoopTest, WaitAnyTimeout) {
+ IREE_TRACE_SCOPE();
+
+ // Events that are never resolved such that we time out.
+ iree_event_t event_a;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event_a));
+ iree_wait_source_t wait_source_a = iree_event_await(&event_a);
+ iree_event_t event_b;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event_b));
+ iree_wait_source_t wait_source_b = iree_event_await(&event_b);
+
+ iree_wait_source_t wait_sources[2] = {
+ wait_source_a,
+ wait_source_b,
+ };
+ struct UserData {
+ bool did_wait_callback = false;
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_any(
+ loop, IREE_ARRAYSIZE(wait_sources), wait_sources,
+ iree_make_timeout_ms(10),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED, status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.did_wait_callback);
+
+ iree_event_deinitialize(&event_a);
+ iree_event_deinitialize(&event_b);
+}
+
+// Tests a wait-any with an already-resolved wait handle.
+TEST_F(LoopTest, WaitAnySignaled) {
+ IREE_TRACE_SCOPE();
+
+ // An event that is resolved immediately.
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &event));
+ iree_wait_source_t wait_source = iree_event_await(&event);
+
+ // Always unsignaled so we test the wait-any behavior.
+ iree_event_t unresolved_event;
+ IREE_ASSERT_OK(
+ iree_event_initialize(/*initial_state=*/false, &unresolved_event));
+ iree_wait_source_t unresolved_wait_source =
+ iree_event_await(&unresolved_event);
+
+ iree_wait_source_t wait_sources[2] = {
+ wait_source,
+ unresolved_wait_source,
+ };
+ struct UserData {
+ bool did_wait_callback = false;
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_any(
+ loop, IREE_ARRAYSIZE(wait_sources), wait_sources,
+ iree_make_timeout_ms(10),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.did_wait_callback);
+
+ iree_event_deinitialize(&event);
+ iree_event_deinitialize(&unresolved_event);
+}
+
+// Tests a wait-any with a wait handle signaled out-of-band.
+TEST_F(LoopTest, WaitAnyBlocking) {
+ IREE_TRACE_SCOPE();
+
+ // Initially unsignaled.
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event));
+ iree_wait_source_t wait_source = iree_event_await(&event);
+
+ // Always unsignaled so we test the wait-any behavior.
+ iree_event_t unresolved_event;
+ IREE_ASSERT_OK(
+ iree_event_initialize(/*initial_state=*/false, &unresolved_event));
+ iree_wait_source_t unresolved_wait_source =
+ iree_event_await(&unresolved_event);
+
+ // Spin up the thread to signal the event after a short delay.
+ // We need to do this before we issue the wait so that loops which perform the
+ // wait inline can still make forward progress even if they block.
+ std::thread thread([&]() {
+ IREE_TRACE_SCOPE();
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ iree_event_set(&event);
+ });
+
+ iree_wait_source_t wait_sources[2] = {
+ wait_source,
+ unresolved_wait_source,
+ };
+ struct UserData {
+ bool did_wait_callback = false;
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_any(
+ loop, IREE_ARRAYSIZE(wait_sources), wait_sources,
+ iree_make_timeout_ms(200),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.did_wait_callback);
+
+ thread.join();
+ iree_event_deinitialize(&event);
+ iree_event_deinitialize(&unresolved_event);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_loop_wait_all
+//===----------------------------------------------------------------------===//
+
+// Tests a wait-all with a immediate timeout (a poll).
+TEST_F(LoopTest, WaitAllImmediate) {
+ IREE_TRACE_SCOPE();
+
+ // One unresolved and one resolved event (should fail the wait-all).
+ iree_event_t event_a;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event_a));
+ iree_wait_source_t wait_source_a = iree_event_await(&event_a);
+ iree_event_t event_b;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &event_b));
+ iree_wait_source_t wait_source_b = iree_event_await(&event_b);
+
+ iree_wait_source_t wait_sources[2] = {
+ wait_source_a,
+ wait_source_b,
+ };
+ struct UserData {
+ bool did_wait_callback = false;
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_all(
+ loop, IREE_ARRAYSIZE(wait_sources), wait_sources,
+ iree_immediate_timeout(),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED, status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.did_wait_callback);
+
+ iree_event_deinitialize(&event_a);
+ iree_event_deinitialize(&event_b);
+}
+
+// Tests a wait-all with a non-immediate timeout.
+TEST_F(LoopTest, WaitAllTimeout) {
+ IREE_TRACE_SCOPE();
+
+ // One unresolved and one resolved event (should fail the wait-all).
+ iree_event_t event_a;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event_a));
+ iree_wait_source_t wait_source_a = iree_event_await(&event_a);
+ iree_event_t event_b;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &event_b));
+ iree_wait_source_t wait_source_b = iree_event_await(&event_b);
+
+ iree_wait_source_t wait_sources[2] = {
+ wait_source_a,
+ wait_source_b,
+ };
+ struct UserData {
+ bool did_wait_callback = false;
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_all(
+ loop, IREE_ARRAYSIZE(wait_sources), wait_sources,
+ iree_make_timeout_ms(10),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_DEADLINE_EXCEEDED, status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.did_wait_callback);
+
+ iree_event_deinitialize(&event_a);
+ iree_event_deinitialize(&event_b);
+}
+
+// Tests a wait-all with already-resolved wait handles.
+TEST_F(LoopTest, WaitAllSignaled) {
+ IREE_TRACE_SCOPE();
+
+ // Signaled events so the wait-all succeeds.
+ iree_event_t event_a;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &event_a));
+ iree_wait_source_t wait_source_a = iree_event_await(&event_a);
+ iree_event_t event_b;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/true, &event_b));
+ iree_wait_source_t wait_source_b = iree_event_await(&event_b);
+
+ iree_wait_source_t wait_sources[2] = {
+ wait_source_a,
+ wait_source_b,
+ };
+ struct UserData {
+ bool did_wait_callback = false;
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_all(
+ loop, IREE_ARRAYSIZE(wait_sources), wait_sources,
+ iree_make_timeout_ms(10),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.did_wait_callback);
+
+ iree_event_deinitialize(&event_a);
+ iree_event_deinitialize(&event_b);
+}
+
+// Tests a wait-all with wait handles signaled out-of-band.
+TEST_F(LoopTest, WaitAllBlocking) {
+ IREE_TRACE_SCOPE();
+
+ // Initially unsignaled.
+ iree_event_t event;
+ IREE_ASSERT_OK(iree_event_initialize(/*initial_state=*/false, &event));
+ iree_wait_source_t wait_source = iree_event_await(&event);
+
+ // Always unsignaled so we test the wait-any behavior.
+ iree_event_t resolved_event;
+ IREE_ASSERT_OK(
+ iree_event_initialize(/*initial_state=*/true, &resolved_event));
+ iree_wait_source_t resolved_wait_source = iree_event_await(&resolved_event);
+
+ // Spin up the thread to signal the event after a short delay.
+ // We need to do this before we issue the wait so that loops which perform the
+ // wait inline can still make forward progress even if they block.
+ std::thread thread([&]() {
+ IREE_TRACE_SCOPE();
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ iree_event_set(&event);
+ });
+
+ iree_wait_source_t wait_sources[2] = {
+ wait_source,
+ resolved_wait_source,
+ };
+ struct UserData {
+ bool did_wait_callback = false;
+ } user_data;
+ IREE_ASSERT_OK(iree_loop_wait_all(
+ loop, IREE_ARRAYSIZE(wait_sources), wait_sources,
+ iree_make_timeout_ms(200),
+ +[](void* user_data_ptr, iree_loop_t loop, iree_status_t status) {
+ IREE_TRACE_SCOPE();
+ IREE_EXPECT_OK(status);
+ auto* user_data = reinterpret_cast<UserData*>(user_data_ptr);
+ user_data->did_wait_callback = true;
+ return iree_ok_status();
+ },
+ &user_data));
+ IREE_ASSERT_OK(iree_loop_drain(loop, iree_infinite_timeout()));
+
+ IREE_ASSERT_OK(loop_status);
+ EXPECT_TRUE(user_data.did_wait_callback);
+
+ thread.join();
+ iree_event_deinitialize(&event);
+ iree_event_deinitialize(&resolved_event);
+}
+
+} // namespace testing
+} // namespace iree
diff --git a/runtime/src/iree/base/status.c b/runtime/src/iree/base/status.c
new file mode 100644
index 0000000..d71ba53
--- /dev/null
+++ b/runtime/src/iree/base/status.c
@@ -0,0 +1,832 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/status.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iree/base/alignment.h"
+#include "iree/base/allocator.h"
+#include "iree/base/assert.h"
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// C11 aligned_alloc compatibility shim
+//===----------------------------------------------------------------------===//
+
+#if defined(IREE_PLATFORM_WINDOWS)
+// https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/aligned-malloc
+#define iree_aligned_alloc(alignment, size) _aligned_malloc(size, alignment)
+#define iree_aligned_free(p) _aligned_free(p)
+#elif defined(_ISOC11_SOURCE)
+// https://en.cppreference.com/w/c/memory/aligned_alloc
+#define iree_aligned_alloc(alignment, size) aligned_alloc(alignment, size)
+#define iree_aligned_free(p) free(p)
+#elif _POSIX_C_SOURCE >= 200112L
+// https://pubs.opengroup.org/onlinepubs/9699919799/functions/posix_memalign.html
+static inline void* iree_aligned_alloc(size_t alignment, size_t size) {
+ void* ptr = NULL;
+ return posix_memalign(&ptr, alignment, size) == 0 ? ptr : NULL;
+}
+#define iree_aligned_free(p) free(p)
+#else
+// Emulates alignment with normal malloc. We overallocate by at least the
+// alignment + the size of a pointer, store the base pointer at p[-1], and
+// return the aligned pointer. This lets us easily get the base pointer in free
+// to pass back to the system.
+static inline void* iree_aligned_alloc(size_t alignment, size_t size) {
+ void* base_ptr = malloc(size + alignment + sizeof(uintptr_t));
+ if (!base_ptr) return NULL;
+ uintptr_t* aligned_ptr = (uintptr_t*)iree_host_align(
+ (uintptr_t)base_ptr + sizeof(uintptr_t), alignment);
+ aligned_ptr[-1] = (uintptr_t)base_ptr;
+ return aligned_ptr;
+}
+static inline void iree_aligned_free(void* p) {
+ if (IREE_UNLIKELY(!p)) return;
+ uintptr_t* aligned_ptr = (uintptr_t*)p;
+ void* base_ptr = (void*)aligned_ptr[-1];
+ free(base_ptr);
+}
+#endif // IREE_PLATFORM_WINDOWS
+
+//===----------------------------------------------------------------------===//
+// iree_status_t canonical errors
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_code_t
+iree_status_code_from_errno(int error_number) {
+ switch (error_number) {
+ case 0:
+ return IREE_STATUS_OK;
+ case EINVAL: // Invalid argument
+ case ENAMETOOLONG: // Filename too long
+ case E2BIG: // Argument list too long
+ case EDESTADDRREQ: // Destination address required
+ case EDOM: // Mathematics argument out of domain of function
+ case EFAULT: // Bad address
+ case EILSEQ: // Illegal byte sequence
+ case ENOPROTOOPT: // Protocol not available
+ case ENOSTR: // Not a STREAM
+ case ENOTSOCK: // Not a socket
+ case ENOTTY: // Inappropriate I/O control operation
+ case EPROTOTYPE: // Protocol wrong type for socket
+ case ESPIPE: // Invalid seek
+ return IREE_STATUS_INVALID_ARGUMENT;
+ case ETIMEDOUT: // Connection timed out
+ case ETIME: // Timer expired
+ return IREE_STATUS_DEADLINE_EXCEEDED;
+ case ENODEV: // No such device
+ case ENOENT: // No such file or directory
+#ifdef ENOMEDIUM
+ case ENOMEDIUM: // No medium found
+#endif
+ case ENXIO: // No such device or address
+ case ESRCH: // No such process
+ return IREE_STATUS_NOT_FOUND;
+ case EEXIST: // File exists
+ case EADDRNOTAVAIL: // Address not available
+ case EALREADY: // Connection already in progress
+#ifdef ENOTUNIQ
+ case ENOTUNIQ: // Name not unique on network
+#endif
+ return IREE_STATUS_ALREADY_EXISTS;
+ case EPERM: // Operation not permitted
+ case EACCES: // Permission denied
+#ifdef ENOKEY
+ case ENOKEY: // Required key not available
+#endif
+ case EROFS: // Read only file system
+ return IREE_STATUS_PERMISSION_DENIED;
+ case ENOTEMPTY: // Directory not empty
+ case EISDIR: // Is a directory
+ case ENOTDIR: // Not a directory
+ case EADDRINUSE: // Address already in use
+ case EBADF: // Invalid file descriptor
+#ifdef EBADFD
+ case EBADFD: // File descriptor in bad state
+#endif
+ case EBUSY: // Device or resource busy
+ case ECHILD: // No child processes
+ case EISCONN: // Socket is connected
+#ifdef EISNAM
+ case EISNAM: // Is a named type file
+#endif
+#ifdef ENOTBLK
+ case ENOTBLK: // Block device required
+#endif
+ case ENOTCONN: // The socket is not connected
+ case EPIPE: // Broken pipe
+#ifdef ESHUTDOWN
+ case ESHUTDOWN: // Cannot send after transport endpoint shutdown
+#endif
+ case ETXTBSY: // Text file busy
+#ifdef EUNATCH
+ case EUNATCH: // Protocol driver not attached
+#endif
+ return IREE_STATUS_FAILED_PRECONDITION;
+ case ENOSPC: // No space left on device
+#ifdef EDQUOT
+ case EDQUOT: // Disk quota exceeded
+#endif
+ case EMFILE: // Too many open files
+ case EMLINK: // Too many links
+ case ENFILE: // Too many open files in system
+ case ENOBUFS: // No buffer space available
+ case ENODATA: // No message is available on the STREAM read queue
+ case ENOMEM: // Not enough space
+ case ENOSR: // No STREAM resources
+#ifdef EUSERS
+ case EUSERS: // Too many users
+#endif
+ return IREE_STATUS_RESOURCE_EXHAUSTED;
+#ifdef ECHRNG
+ case ECHRNG: // Channel number out of range
+#endif
+ case EFBIG: // File too large
+ case EOVERFLOW: // Value too large to be stored in data type
+ case ERANGE: // Result too large
+ return IREE_STATUS_OUT_OF_RANGE;
+#ifdef ENOPKG
+ case ENOPKG: // Package not installed
+#endif
+ case ENOSYS: // Function not implemented
+ case ENOTSUP: // Operation not supported
+ case EAFNOSUPPORT: // Address family not supported
+#ifdef EPFNOSUPPORT
+ case EPFNOSUPPORT: // Protocol family not supported
+#endif
+ case EPROTONOSUPPORT: // Protocol not supported
+#ifdef ESOCKTNOSUPPORT
+ case ESOCKTNOSUPPORT: // Socket type not supported
+#endif
+ case EXDEV: // Improper link
+ return IREE_STATUS_UNIMPLEMENTED;
+ case EAGAIN: // Resource temporarily unavailable
+#ifdef ECOMM
+ case ECOMM: // Communication error on send
+#endif
+ case ECONNREFUSED: // Connection refused
+ case ECONNABORTED: // Connection aborted
+ case ECONNRESET: // Connection reset
+ case EINTR: // Interrupted function call
+#ifdef EHOSTDOWN
+ case EHOSTDOWN: // Host is down
+#endif
+ case EHOSTUNREACH: // Host is unreachable
+ case ENETDOWN: // Network is down
+ case ENETRESET: // Connection aborted by network
+ case ENETUNREACH: // Network unreachable
+ case ENOLCK: // No locks available
+ case ENOLINK: // Link has been severed
+#ifdef ENONET
+ case ENONET: // Machine is not on the network
+#endif
+ return IREE_STATUS_UNAVAILABLE;
+ case EDEADLK: // Resource deadlock avoided
+#ifdef ESTALE
+ case ESTALE: // Stale file handle
+#endif
+ return IREE_STATUS_ABORTED;
+ case ECANCELED: // Operation cancelled
+ return IREE_STATUS_CANCELLED;
+ default:
+ return IREE_STATUS_UNKNOWN;
+ }
+}
+
+#if defined(IREE_PLATFORM_WINDOWS)
+IREE_API_EXPORT iree_status_code_t
+iree_status_code_from_win32_error(uint32_t error) {
+ switch (error) {
+ case ERROR_SUCCESS:
+ return IREE_STATUS_OK;
+ case ERROR_FILE_NOT_FOUND:
+ case ERROR_PATH_NOT_FOUND:
+ return IREE_STATUS_NOT_FOUND;
+ case ERROR_TOO_MANY_OPEN_FILES:
+ case ERROR_OUTOFMEMORY:
+ case ERROR_HANDLE_DISK_FULL:
+ case ERROR_HANDLE_EOF:
+ return IREE_STATUS_RESOURCE_EXHAUSTED;
+ case ERROR_ACCESS_DENIED:
+ return IREE_STATUS_PERMISSION_DENIED;
+ case ERROR_INVALID_HANDLE:
+ return IREE_STATUS_INVALID_ARGUMENT;
+ case ERROR_NOT_READY:
+ case ERROR_READ_FAULT:
+ return IREE_STATUS_UNAVAILABLE;
+ case ERROR_WRITE_FAULT:
+ return IREE_STATUS_DATA_LOSS;
+ case ERROR_NOT_SUPPORTED:
+ return IREE_STATUS_UNIMPLEMENTED;
+ default:
+ return IREE_STATUS_UNKNOWN;
+ }
+}
+#endif // IREE_PLATFORM_WINDOWS
+
+//===----------------------------------------------------------------------===//
+// iree_status_t
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT const char* iree_status_code_string(iree_status_code_t code) {
+ switch (code) {
+ case IREE_STATUS_OK:
+ return "OK";
+ case IREE_STATUS_CANCELLED:
+ return "CANCELLED";
+ case IREE_STATUS_UNKNOWN:
+ return "UNKNOWN";
+ case IREE_STATUS_INVALID_ARGUMENT:
+ return "INVALID_ARGUMENT";
+ case IREE_STATUS_DEADLINE_EXCEEDED:
+ return "DEADLINE_EXCEEDED";
+ case IREE_STATUS_NOT_FOUND:
+ return "NOT_FOUND";
+ case IREE_STATUS_ALREADY_EXISTS:
+ return "ALREADY_EXISTS";
+ case IREE_STATUS_PERMISSION_DENIED:
+ return "PERMISSION_DENIED";
+ case IREE_STATUS_RESOURCE_EXHAUSTED:
+ return "RESOURCE_EXHAUSTED";
+ case IREE_STATUS_FAILED_PRECONDITION:
+ return "FAILED_PRECONDITION";
+ case IREE_STATUS_ABORTED:
+ return "ABORTED";
+ case IREE_STATUS_OUT_OF_RANGE:
+ return "OUT_OF_RANGE";
+ case IREE_STATUS_UNIMPLEMENTED:
+ return "UNIMPLEMENTED";
+ case IREE_STATUS_INTERNAL:
+ return "INTERNAL";
+ case IREE_STATUS_UNAVAILABLE:
+ return "UNAVAILABLE";
+ case IREE_STATUS_DATA_LOSS:
+ return "DATA_LOSS";
+ case IREE_STATUS_UNAUTHENTICATED:
+ return "UNAUTHENTICATED";
+ case IREE_STATUS_DEFERRED:
+ return "DEFERRED";
+ default:
+ return "";
+ }
+}
+
+// TODO(#55): move payload methods/types to header when API is stabilized.
+
+struct iree_status_handle_t {
+ uintptr_t value;
+};
+
+// Defines the type of an iree_status_payload_t.
+typedef enum iree_status_payload_type_e {
+ // Opaque; payload may still be formatted by a formatter but is not possible
+ // to retrieve by the programmatic APIs.
+ IREE_STATUS_PAYLOAD_TYPE_OPAQUE = 0,
+ // A string message annotation of type iree_status_payload_message_t.
+ IREE_STATUS_PAYLOAD_TYPE_MESSAGE = 1,
+ // Starting type ID for user payloads. IREE reserves all payloads with types
+ // less than this.
+ IREE_STATUS_PAYLOAD_TYPE_MIN_USER = 0x70000000u,
+} iree_status_payload_type_t;
+
+typedef struct iree_status_payload_t iree_status_payload_t;
+
+// Function that formats a payload into a human-readable string form for logs.
+typedef void(IREE_API_PTR* iree_status_payload_formatter_t)(
+ const iree_status_payload_t* payload, iree_host_size_t buffer_capacity,
+ char* buffer, iree_host_size_t* out_buffer_length);
+
+// Header for optional status payloads.
+// Each status may have zero or more payloads associated with it that can later
+// be used to produce more detailed logging or programmatically query
+// information about an error.
+struct iree_status_payload_t {
+ // Next payload in the status payload linked list.
+ struct iree_status_payload_t* next;
+ // Payload type identifier used for programmatic access to payloads. May be
+ // IREE_STATUS_PAYLOAD_TYPE_OPAQUE if the payload cannot be accessed directly.
+ iree_status_payload_type_t type;
+ // Allocator used for the payload and associated resources.
+ iree_allocator_t allocator;
+ // String formatter callback used to write the payload into a string buffer.
+ // If not present then the payload will be mentioned but not dumped when the
+ // status is logged.
+ iree_status_payload_formatter_t formatter;
+};
+
+// A string message (IREE_STATUS_PAYLOAD_TYPE_MESSAGE).
+typedef struct iree_status_payload_message_t {
+ iree_status_payload_t header;
+ // String data reference. May point to an address immediately following this
+ // struct (if copied) or a constant string reference in rodata.
+ iree_string_view_t message;
+} iree_status_payload_message_t;
+
+// Allocated storage for an iree_status_t.
+// Only statuses that have either source information or payloads will have
+// storage allocated for them.
+typedef struct iree_status_storage_t {
+ // Optional doubly-linked list of payloads associated with the status.
+ // Head = first added, tail = last added.
+ iree_status_payload_t* payload_head;
+ iree_status_payload_t* payload_tail;
+
+#if (IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_SOURCE_LOCATION) != 0
+ // __FILE__ of the originating status allocation.
+ const char* file;
+ // __LINE__ of the originating status allocation.
+ uint32_t line;
+#endif // has IREE_STATUS_FEATURE_SOURCE_LOCATION
+
+#if (IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_ANNOTATIONS) != 0
+ // Optional message that is allocated either as a constant string in rodata or
+ // present as a suffix on the storage.
+ iree_string_view_t message;
+#endif // has IREE_STATUS_FEATURE_ANNOTATIONS
+} iree_status_storage_t;
+
+#define iree_status_storage(status) \
+ ((iree_status_storage_t*)(((uintptr_t)(status) & ~IREE_STATUS_CODE_MASK)))
+
+// Appends a payload to the storage doubly-linked list.
+static iree_status_t iree_status_append_payload(
+ iree_status_t status, iree_status_storage_t* storage,
+ iree_status_payload_t* payload) {
+ if (!storage->payload_tail) {
+ storage->payload_head = payload;
+ } else {
+ storage->payload_tail->next = payload;
+ }
+ storage->payload_tail = payload;
+ return status;
+}
+
+// Formats an iree_status_payload_message_t to the given output |buffer|.
+// |out_buffer_length| will be set to the number of characters written excluding
+// NUL. If |buffer| is omitted then |out_buffer_length| will be set to the
+// total number of characters in |buffer_capacity| required to contain the
+// entire message.
+static void iree_status_payload_message_formatter(
+ const iree_status_payload_t* payload, iree_host_size_t buffer_capacity,
+ char* buffer, iree_host_size_t* out_buffer_length) {
+ iree_status_payload_message_t* message_payload =
+ (iree_status_payload_message_t*)payload;
+ if (!buffer) {
+ *out_buffer_length = message_payload->message.size;
+ return;
+ }
+ iree_host_size_t n = buffer_capacity < message_payload->message.size
+ ? buffer_capacity
+ : message_payload->message.size;
+ memcpy(buffer, message_payload->message.data, n);
+ buffer[n] = '\0';
+ *out_buffer_length = n;
+}
+
+// Captures the current stack and attaches it to the status storage.
+// A count of |skip_frames| will be skipped from the top of the stack.
+// Setting |skip_frames|=0 will include the caller in the stack while
+// |skip_frames|=1 will exclude it.
+static void iree_status_attach_stack_trace(iree_status_storage_t* storage,
+ int skip_frames) {
+#if (IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_STACK_TRACE) != 0
+ // TODO(#55): backtrace or other magic.
+#endif // has IREE_STATUS_FEATURE_STACK_TRACE
+}
+
+IREE_API_EXPORT IREE_MUST_USE_RESULT iree_status_t
+iree_status_allocate(iree_status_code_t code, const char* file, uint32_t line,
+ iree_string_view_t message) {
+#if IREE_STATUS_FEATURES == 0
+ // More advanced status code features like source location and messages are
+ // disabled. All statuses are just the codes.
+ return iree_status_from_code(code);
+#else
+ // No-op for OK statuses; we won't get these from the macros but may be called
+ // with this from marshaling code.
+ if (IREE_UNLIKELY(code == IREE_STATUS_OK)) return iree_ok_status();
+
+ // Allocate storage with the appropriate alignment such that we can pack the
+ // code in the lower bits of the pointer. Since failed statuses are rare and
+ // likely have much larger costs (like string formatting) the extra bytes for
+ // alignment are worth being able to avoid pointer dereferences and other
+ // things during the normal code paths that just check codes.
+ //
+ // Note that we are using the CRT allocation function here, as we can't trust
+ // our allocator system to work when we are throwing errors (as we may be
+ // allocating this error from a failed allocation!).
+ size_t storage_alignment = (IREE_STATUS_CODE_MASK + 1);
+ size_t storage_size =
+ iree_host_align(sizeof(iree_status_storage_t), storage_alignment);
+ iree_status_storage_t* storage = (iree_status_storage_t*)iree_aligned_alloc(
+ storage_alignment, storage_size);
+ if (IREE_UNLIKELY(!storage)) return iree_status_from_code(code);
+ memset(storage, 0, sizeof(*storage));
+
+#if (IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_SOURCE_LOCATION) != 0
+ storage->file = file;
+ storage->line = line;
+#endif // has IREE_STATUS_FEATURE_SOURCE_LOCATION
+
+#if (IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_ANNOTATIONS) != 0
+ // NOTE: messages are rodata strings here and not retained.
+ storage->message = message;
+#endif // has IREE_STATUS_FEATURE_ANNOTATIONS
+
+ iree_status_attach_stack_trace(storage, /*skip_frames=*/1);
+ return (iree_status_t)((uintptr_t)storage | (code & IREE_STATUS_CODE_MASK));
+#endif // has any IREE_STATUS_FEATURES
+}
+
+IREE_API_EXPORT IREE_MUST_USE_RESULT iree_status_t
+iree_status_allocate_f(iree_status_code_t code, const char* file, uint32_t line,
+ const char* format, ...) {
+ va_list varargs_0, varargs_1;
+ va_start(varargs_0, format);
+ va_start(varargs_1, format);
+ iree_status_t ret =
+ iree_status_allocate_vf(code, file, line, format, varargs_0, varargs_1);
+ va_end(varargs_0);
+ va_end(varargs_1);
+ return ret;
+}
+
+IREE_API_EXPORT IREE_MUST_USE_RESULT iree_status_t iree_status_allocate_vf(
+ iree_status_code_t code, const char* file, uint32_t line,
+ const char* format, va_list varargs_0, va_list varargs_1) {
+#if (IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_ANNOTATIONS) == 0
+ // Annotations disabled; ignore the format string/args.
+ return iree_status_allocate(code, file, line, iree_string_view_empty());
+#else
+ // No-op for OK statuses; we won't get these from the macros but may be called
+ // with this from marshaling code.
+ if (IREE_UNLIKELY(code == IREE_STATUS_OK)) return iree_ok_status();
+
+ // Compute the total number of bytes (including NUL) required to store the
+ // message.
+ int message_size =
+ vsnprintf(/*buffer=*/NULL, /*buffer_count=*/0, format, varargs_0);
+ if (message_size < 0) return iree_status_from_code(code);
+ ++message_size; // NUL byte
+
+ // Allocate storage with the additional room to store the formatted message.
+ // This avoids additional allocations for the common case of a message coming
+ // only from the original status error site.
+ size_t storage_alignment = (IREE_STATUS_CODE_MASK + 1);
+ size_t storage_size = iree_host_align(
+ sizeof(iree_status_storage_t) + message_size, storage_alignment);
+ iree_status_storage_t* storage = (iree_status_storage_t*)iree_aligned_alloc(
+ storage_alignment, storage_size);
+ if (IREE_UNLIKELY(!storage)) return iree_status_from_code(code);
+ memset(storage, 0, sizeof(*storage));
+
+#if (IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_SOURCE_LOCATION) != 0
+ storage->file = file;
+ storage->line = line;
+#endif // has IREE_STATUS_FEATURE_SOURCE_LOCATION
+
+ // vsnprintf directly into message buffer.
+ storage->message.size = message_size - 1;
+ storage->message.data = (const char*)storage + sizeof(iree_status_storage_t);
+ int ret =
+ vsnprintf((char*)storage->message.data, message_size, format, varargs_1);
+ if (IREE_UNLIKELY(ret < 0)) {
+ iree_aligned_free(storage);
+ return (iree_status_t)code;
+ }
+
+ iree_status_attach_stack_trace(storage, /*skip_frames=*/1);
+ return (iree_status_t)((uintptr_t)storage | (code & IREE_STATUS_CODE_MASK));
+#endif // has IREE_STATUS_FEATURE_ANNOTATIONS
+}
+
+IREE_API_EXPORT IREE_MUST_USE_RESULT iree_status_t
+iree_status_clone(iree_status_t status) {
+#if IREE_STATUS_FEATURES == 0
+ // Statuses are just codes; nothing to do.
+ return status;
+#else
+ iree_status_storage_t* storage = iree_status_storage(status);
+ if (!storage) return status;
+
+#if (IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_SOURCE_LOCATION) != 0
+ const char* file = storage->file;
+ uint32_t line = storage->line;
+#else
+ const char* file = NULL;
+ uint32_t line = 0;
+#endif // has IREE_STATUS_FEATURE_SOURCE_LOCATION
+
+#if (IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_ANNOTATIONS) != 0
+ iree_string_view_t message = storage->message;
+#else
+ iree_string_view_t message = iree_string_view_empty();
+#endif // has IREE_STATUS_FEATURE_ANNOTATIONS
+
+ // Always copy the message by performing the formatting as we don't know
+ // whether the original status has ownership or not.
+ return iree_status_allocate_f(iree_status_code(status), file, line, "%.*s",
+ (int)message.size, message.data);
+#endif // has no IREE_STATUS_FEATURES
+}
+
+IREE_API_EXPORT void iree_status_free(iree_status_t status) {
+#if IREE_STATUS_FEATURES != 0
+ iree_status_storage_t* storage = iree_status_storage(status);
+ if (!storage) return;
+ iree_status_payload_t* payload = storage->payload_head;
+ while (payload) {
+ iree_status_payload_t* next = payload->next;
+ iree_allocator_free(payload->allocator, payload);
+ payload = next;
+ }
+ iree_aligned_free(storage);
+#endif // has any IREE_STATUS_FEATURES
+}
+
+IREE_API_EXPORT iree_status_t iree_status_ignore(iree_status_t status) {
+ // We can set an 'ignored' flag on the status so that we can otherwise assert
+ // in iree_status_free when statuses are freed without this being called.
+ // Hoping with the C++ Status wrapper we won't hit that often so that
+ // complexity is skipped for now.
+ iree_status_free(status);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_status_join(iree_status_t base_status,
+ iree_status_t new_status) {
+ // TODO(benvanik): annotate |base_status| with |new_status| so we see it?
+ // This is intended for failure handling and usually the first failure is the
+ // root cause and most important to see.
+ if (!iree_status_is_ok(base_status)) {
+ iree_status_ignore(new_status);
+ return base_status;
+ }
+ return new_status;
+}
+
+IREE_API_EXPORT IREE_ATTRIBUTE_NORETURN void iree_status_abort(
+ iree_status_t status) {
+ iree_status_fprint(stderr, status);
+ IREE_ASSERT(!iree_status_is_ok(status),
+ "only valid to call with failing status codes");
+ iree_status_free(status);
+ abort();
+}
+
+IREE_API_EXPORT iree_status_code_t
+iree_status_consume_code(iree_status_t status) {
+ iree_status_code_t code = iree_status_code(status);
+ iree_status_free(status);
+ return code;
+}
+
+#if IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_ANNOTATIONS
+
+IREE_API_EXPORT IREE_MUST_USE_RESULT iree_status_t
+iree_status_annotate(iree_status_t base_status, iree_string_view_t message) {
+ if (iree_status_is_ok(base_status) || iree_string_view_is_empty(message)) {
+ return base_status;
+ }
+
+ // If there's no storage yet we can just reuse normal allocation. Both that
+ // and this do not copy |message|.
+ iree_status_storage_t* storage = iree_status_storage(base_status);
+ if (!storage) {
+ return iree_status_allocate(iree_status_code(base_status), NULL, 0,
+ message);
+ } else if (iree_string_view_is_empty(storage->message)) {
+ storage->message = message;
+ return base_status;
+ }
+
+ iree_allocator_t allocator = iree_allocator_system();
+ iree_status_payload_message_t* payload = NULL;
+ iree_status_ignore(
+ iree_allocator_malloc(allocator, sizeof(*payload), (void**)&payload));
+ if (IREE_UNLIKELY(!payload)) return base_status;
+ memset(payload, 0, sizeof(*payload));
+ payload->header.type = IREE_STATUS_PAYLOAD_TYPE_MESSAGE;
+ payload->header.allocator = allocator;
+ payload->header.formatter = iree_status_payload_message_formatter;
+ payload->message = message;
+ return iree_status_append_payload(base_status, storage,
+ (iree_status_payload_t*)payload);
+}
+
+static IREE_MUST_USE_RESULT iree_status_t
+iree_status_annotate_vf(iree_status_t base_status, const char* format,
+ va_list varargs_0, va_list varargs_1) {
+ if (iree_status_is_ok(base_status)) return base_status;
+
+ // If there's no storage yet we can just reuse normal allocation. Both that
+ // and this do not copy |message|.
+ iree_status_storage_t* storage = iree_status_storage(base_status);
+ if (!storage) {
+ return iree_status_allocate_vf(iree_status_code(base_status), NULL, 0,
+ format, varargs_0, varargs_1);
+ }
+
+ // Compute the total number of bytes (including NUL) required to store the
+ // message.
+ int message_size =
+ vsnprintf(/*buffer=*/NULL, /*buffer_count=*/0, format, varargs_0);
+ if (message_size < 0) return base_status;
+ ++message_size; // NUL byte
+
+ // Allocate storage with the additional room to store the formatted message.
+ // This avoids additional allocations for the common case of a message coming
+ // only from the original status error site.
+ iree_allocator_t allocator = iree_allocator_system();
+ iree_status_payload_message_t* payload = NULL;
+ iree_status_ignore(iree_allocator_malloc(
+ allocator, sizeof(*payload) + message_size, (void**)&payload));
+ if (IREE_UNLIKELY(!payload)) return base_status;
+ memset(payload, 0, sizeof(*payload));
+ payload->header.type = IREE_STATUS_PAYLOAD_TYPE_MESSAGE;
+ payload->header.allocator = allocator;
+ payload->header.formatter = iree_status_payload_message_formatter;
+
+ // vsnprintf directly into message buffer.
+ payload->message.size = message_size - 1;
+ payload->message.data =
+ (const char*)payload + sizeof(iree_status_payload_message_t);
+ int ret = vsnprintf((char*)payload->message.data, payload->message.size + 1,
+ format, varargs_1);
+ if (IREE_UNLIKELY(ret < 0)) {
+ iree_aligned_free(payload);
+ return base_status;
+ }
+ return iree_status_append_payload(base_status, storage,
+ (iree_status_payload_t*)payload);
+}
+
+IREE_API_EXPORT IREE_MUST_USE_RESULT iree_status_t IREE_PRINTF_ATTRIBUTE(2, 3)
+ iree_status_annotate_f(iree_status_t base_status, const char* format, ...) {
+ // We walk the lists twice as each va_list can only be walked once we need to
+ // double-up. iree_status_annotate_vf could use va_copy to clone the single
+ // list however the proper management of va_end is trickier and this works.
+ va_list varargs_0, varargs_1;
+ va_start(varargs_0, format);
+ va_start(varargs_1, format);
+ iree_status_t ret =
+ iree_status_annotate_vf(base_status, format, varargs_0, varargs_1);
+ va_end(varargs_0);
+ va_end(varargs_1);
+ return ret;
+}
+
+#endif // has IREE_STATUS_FEATURE_ANNOTATIONS
+
+IREE_API_EXPORT bool iree_status_format(iree_status_t status,
+ iree_host_size_t buffer_capacity,
+ char* buffer,
+ iree_host_size_t* out_buffer_length) {
+ *out_buffer_length = 0;
+
+ // Grab storage which may have a message and zero or more payloads.
+ iree_status_storage_t* storage IREE_ATTRIBUTE_UNUSED =
+ iree_status_storage(status);
+
+ // Prefix with source location and status code string (may be 'OK').
+ iree_host_size_t buffer_length = 0;
+ iree_status_code_t status_code = iree_status_code(status);
+ int n = 0;
+#if (IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_SOURCE_LOCATION) != 0
+ if (storage && storage->file) {
+ n = snprintf(buffer ? buffer + buffer_length : NULL,
+ buffer ? buffer_capacity - buffer_length : 0, "%s:%d: %s",
+ storage->file, storage->line,
+ iree_status_code_string(status_code));
+ } else {
+ n = snprintf(buffer ? buffer + buffer_length : NULL,
+ buffer ? buffer_capacity - buffer_length : 0, "%s",
+ iree_status_code_string(status_code));
+ }
+#else
+ n = snprintf(buffer ? buffer + buffer_length : NULL,
+ buffer ? buffer_capacity - buffer_length : 0, "%s",
+ iree_status_code_string(status_code));
+#endif // has IREE_STATUS_FEATURE_SOURCE_LOCATION
+ if (IREE_UNLIKELY(n < 0)) {
+ return false;
+ } else if (buffer && n >= buffer_capacity - buffer_length) {
+ buffer = NULL;
+ }
+ buffer_length += n;
+
+#if (IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_ANNOTATIONS) != 0
+ // Append base storage message.
+ if (storage && !iree_string_view_is_empty(storage->message)) {
+ n = snprintf(buffer ? buffer + buffer_length : NULL,
+ buffer ? buffer_capacity - buffer_length : 0, "; %.*s",
+ (int)storage->message.size, storage->message.data);
+ if (IREE_UNLIKELY(n < 0)) {
+ return false;
+ } else if (buffer && n >= buffer_capacity - buffer_length) {
+ buffer = NULL;
+ }
+ buffer_length += n;
+ }
+#endif // has IREE_STATUS_FEATURE_ANNOTATIONS
+
+#if IREE_STATUS_FEATURES != 0
+ // Append each payload separated by a newline.
+ iree_status_payload_t* payload = storage ? storage->payload_head : NULL;
+ while (payload != NULL) {
+ // Skip payloads that have no textual representation.
+ if (!payload->formatter) {
+ payload = payload->next;
+ continue;
+ }
+
+ // Append newline to join with message above and other payloads.
+ if (buffer) {
+ if (2 >= buffer_capacity - buffer_length) {
+ buffer = NULL;
+ } else {
+ buffer[buffer_length] = ';';
+ buffer[buffer_length + 1] = ' ';
+ buffer[buffer_length + 2] = '\0';
+ }
+ }
+ buffer_length += 2; // '; '
+
+ // Append payload via custom formatter callback.
+ iree_host_size_t payload_buffer_length = 0;
+ payload->formatter(payload, buffer ? buffer_capacity - buffer_length : 0,
+ buffer ? buffer + buffer_length : NULL,
+ &payload_buffer_length);
+ if (buffer && payload_buffer_length >= buffer_capacity - buffer_length) {
+ buffer = NULL;
+ }
+ buffer_length += payload_buffer_length;
+
+ payload = payload->next;
+ }
+#endif // has IREE_STATUS_FEATURES
+
+ *out_buffer_length = buffer_length;
+ return true;
+}
+
+// Converts the status to an allocated string value using the given allocator.
+// The caller must free the buffer with |allocator|.
+static bool iree_status_to_string(iree_status_t status,
+ iree_allocator_t allocator, char** out_buffer,
+ iree_host_size_t* out_buffer_length) {
+ *out_buffer_length = 0;
+ iree_host_size_t buffer_length = 0;
+ if (IREE_UNLIKELY(!iree_status_format(status, /*buffer_capacity=*/0,
+ /*buffer=*/NULL, &buffer_length))) {
+ return false;
+ }
+
+ // Buffer capacity needs to be +1 for the NUL terminator (see snprintf).
+ char* buffer = NULL;
+ iree_status_t malloc_status =
+ iree_allocator_malloc(allocator, buffer_length + 1, (void**)&buffer);
+ if (!iree_status_is_ok(malloc_status)) {
+ iree_status_ignore(malloc_status);
+ return false;
+ }
+ bool ret =
+ iree_status_format(status, buffer_length + 1, buffer, out_buffer_length);
+ if (ret) {
+ *out_buffer = buffer;
+ return true;
+ } else {
+ iree_allocator_free(allocator, buffer);
+ return false;
+ }
+}
+
+IREE_API_EXPORT void iree_status_fprint(FILE* file, iree_status_t status) {
+ // TODO(benvanik): better support for colors/etc - possibly move to logging.
+ // TODO(benvanik): do this without allocation by streaming the status.
+ iree_allocator_t allocator = iree_allocator_system();
+ char* status_buffer = NULL;
+ iree_host_size_t status_buffer_length = 0;
+ if (iree_status_to_string(status, allocator, &status_buffer,
+ &status_buffer_length)) {
+ fprintf(file, "%.*s\n", (int)status_buffer_length, status_buffer);
+ iree_allocator_free(allocator, status_buffer);
+ } else {
+ fprintf(file, "(?)\n");
+ }
+ fflush(file);
+}
diff --git a/runtime/src/iree/base/status.h b/runtime/src/iree/base/status.h
new file mode 100644
index 0000000..7192069
--- /dev/null
+++ b/runtime/src/iree/base/status.h
@@ -0,0 +1,505 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_STATUS_H_
+#define IREE_BASE_STATUS_H_
+
+#include <errno.h>
+#include <memory.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iree/base/attributes.h"
+#include "iree/base/config.h"
+#include "iree/base/string_view.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// IREE_STATUS_FEATURE flags and IREE_STATUS_MODE setting
+//===----------------------------------------------------------------------===//
+
+// Captures origin source information on a call to iree_make_status.
+// Status storage will be allocated and reference the __FILE__ and __LINE__
+// of where it is invoked.
+#define IREE_STATUS_FEATURE_SOURCE_LOCATION (1 << 0)
+
+// Captures annotation messages provided via iree_make_status or
+// iree_status_annotate.
+// Status storage will be allocated.
+#define IREE_STATUS_FEATURE_ANNOTATIONS (1 << 1)
+
+// Captures the current callstack on a call to iree_make_status.
+// Status storage will be allocated.
+#define IREE_STATUS_FEATURE_STACK_TRACE (1 << 2)
+
+// Set IREE_STATUS_FEATURES based on IREE_STATUS_MODE if the user hasn't
+// overridden it with more specific settings.
+//
+// IREE_STATUS_MODE = 0: statuses are just integers
+// IREE_STATUS_MODE = 1: statuses have source location of error
+// IREE_STATUS_MODE = 2: statuses also have custom annotations
+// IREE_STATUS_MODE = 3: statuses also have stack traces of the error site
+#if !defined(IREE_STATUS_FEATURES)
+#if defined(IREE_STATUS_MODE) && IREE_STATUS_MODE == 1
+#define IREE_STATUS_FEATURES (IREE_STATUS_FEATURE_SOURCE_LOCATION)
+#elif defined(IREE_STATUS_MODE) && IREE_STATUS_MODE == 2
+#define IREE_STATUS_FEATURES \
+ (IREE_STATUS_FEATURE_SOURCE_LOCATION | IREE_STATUS_FEATURE_ANNOTATIONS)
+#elif defined(IREE_STATUS_MODE) && IREE_STATUS_MODE == 3
+#define IREE_STATUS_FEATURES \
+ (IREE_STATUS_FEATURE_SOURCE_LOCATION | IREE_STATUS_FEATURE_ANNOTATIONS | \
+ IREE_STATUS_FEATURE_STACK_TRACE)
+#else
+#define IREE_STATUS_FEATURES 0
+#endif // IREE_STATUS_MODE
+#endif // !IREE_STATUS_FEATURES
+
+//===----------------------------------------------------------------------===//
+// iree_status_t and error reporting
+//===----------------------------------------------------------------------===//
+
+// Well-known status codes matching iree::StatusCode.
+// Note that any code within IREE_STATUS_CODE_MASK is valid even if not
+// enumerated here. Always check for unhandled errors/have default conditions.
+typedef enum iree_status_code_e {
+ // Successful operation.
+ IREE_STATUS_OK = 0,
+
+ // Operation was cancelled by the caller.
+ IREE_STATUS_CANCELLED = 1,
+
+ // Unknown error, or error that could not be mapped to this enum.
+ IREE_STATUS_UNKNOWN = 2,
+
+ // The caller provided an invalid argument and that future calls with the same
+ // arguments will fail. If the failure is predicated on system state that may
+ // change prefer IREE_STATUS_OUT_OF_RANGE.
+ IREE_STATUS_INVALID_ARGUMENT = 3,
+
+ // A deadline was exceeded before the call could complete.
+ // This can be returned even if the operation would have completed
+ // successfully had the deadline not been met.
+ IREE_STATUS_DEADLINE_EXCEEDED = 4,
+
+ // A referenced resource could not be found or was unavailable to all
+ // requesters. IREE_STATUS_PERMISSION_DENIED should be used if only an
+ // individual requester is denied access.
+ IREE_STATUS_NOT_FOUND = 5,
+
+ // The resource the caller attempted to create already exists.
+ IREE_STATUS_ALREADY_EXISTS = 6,
+
+ // The caller does not have permission to execute the operation or have access
+ // to the requested resources.
+ IREE_STATUS_PERMISSION_DENIED = 7,
+
+ // Some resource type has been exhausted and the operation is unable to
+ // reserve what it requires, either by quota or underlying system exhaustion.
+ IREE_STATUS_RESOURCE_EXHAUSTED = 8,
+
+ // The operation was rejected because the system is not in a state required
+ // for the operation's execution.
+ //
+ // Use IREE_STATUS_UNAVAILABLE if the caller can retry the operation.
+ // Use IREE_STATUS_ABORTED if the caller should restart their transaction
+ // (the entire sequence of operations is invalid).
+ // Use IREE_STATUS_FAILED_PRECONDITION if the caller should not retry until
+ // the system state has been explicitly fixed.
+ IREE_STATUS_FAILED_PRECONDITION = 9,
+
+ // The operation was aborted by the system.
+ // If responding to a caller-requested cancellation use IREE_STATUS_CANCELLED.
+ IREE_STATUS_ABORTED = 10,
+
+ // The operation was attempted past the valid range (of a resource, etc).
+ // Indicates the operation can be retried if the system state is fixed.
+ IREE_STATUS_OUT_OF_RANGE = 11,
+
+ // Operation has not been implemented or is not supported.
+ IREE_STATUS_UNIMPLEMENTED = 12,
+
+ // An internal error has occurred and some invariants expected by an
+ // underlying system have been violated. This error code is reserved for
+ // serious errors.
+ IREE_STATUS_INTERNAL = 13,
+
+ // The system used to perform the operation is currently (and transiently)
+ // unavailable. Callers can retry with backoff.
+ IREE_STATUS_UNAVAILABLE = 14,
+
+ // An serious unrecoverable data loss or corruption has occurred.
+ // Indicates that an underlying system or resource has failed in such a way
+ // that all related operations may produce incorrect results.
+ IREE_STATUS_DATA_LOSS = 15,
+
+ // The requested operation does not have proper authentication.
+ // Callers can correct this and retry.
+ IREE_STATUS_UNAUTHENTICATED = 16,
+
+ // The operation has been deferred and must be resumed at a future point.
+ // Used by resumable operations as part of scheduling and execution systems.
+ // Callers that do not handle deferred execution can treat this as a failure.
+ IREE_STATUS_DEFERRED = 17,
+
+ IREE_STATUS_CODE_MASK = 0x1Fu,
+} iree_status_code_t;
+
+// Opaque status structure containing an iree_status_code_t and optional status
+// object with more detailed information and payloads.
+//
+// The status value uses the lower 5 bits to store the iree_status_code_t and
+// the remaining uintptr_t bits to store an optional status payload pointer.
+// An OK status will always be bit-equivalent to 0 to make success/failure
+// checks as cheap as an integer non-zero comparison. As the payload is optional
+// it's legal to construct an iree_status_t from an iree_status_code_t directly
+// meaning `return iree_status_from_code(IREE_STATUS_INTERNAL);` (etc) is valid,
+// though not as useful as constructing via iree_make_status (which captures
+// additional info).
+typedef struct iree_status_handle_t* iree_status_t;
+
+// Returns an iree_status_t from the an iree_status_code_t.
+#define iree_status_from_code(code) \
+ ((iree_status_t)((uintptr_t)((iree_status_code_t)(code)) & \
+ IREE_STATUS_CODE_MASK))
+
+// Returns the iree_status_code_t from an iree_status_t.
+#define iree_status_code(value) \
+ ((iree_status_code_t)(((uintptr_t)(value)) & IREE_STATUS_CODE_MASK))
+
+// Macros to check the value of a status code.
+#define iree_status_is_ok(value) \
+ IREE_LIKELY((uintptr_t)(value) == IREE_STATUS_OK)
+#define iree_status_is_cancelled(value) \
+ (iree_status_code(value) == IREE_STATUS_CANCELLED)
+#define iree_status_is_unknown(value) \
+ (iree_status_code(value) == IREE_STATUS_UNKNOWN)
+#define iree_status_is_invalid_argument(value) \
+ (iree_status_code(value) == IREE_STATUS_INVALID_ARGUMENT)
+#define iree_status_is_deadline_exceeded(value) \
+ (iree_status_code(value) == IREE_STATUS_DEADLINE_EXCEEDED)
+#define iree_status_is_not_found(value) \
+ (iree_status_code(value) == IREE_STATUS_NOT_FOUND)
+#define iree_status_is_already_exists(value) \
+ (iree_status_code(value) == IREE_STATUS_ALREADY_EXISTS)
+#define iree_status_is_permission_denied(value) \
+ (iree_status_code(value) == IREE_STATUS_PERMISSION_DENIED)
+#define iree_status_is_resource_exhausted(value) \
+ (iree_status_code(value) == IREE_STATUS_RESOURCE_EXHAUSTED)
+#define iree_status_is_failed_precondition(value) \
+ (iree_status_code(value) == IREE_STATUS_FAILED_PRECONDITION)
+#define iree_status_is_aborted(value) \
+ (iree_status_code(value) == IREE_STATUS_ABORTED)
+#define iree_status_is_out_of_range(value) \
+ (iree_status_code(value) == IREE_STATUS_OUT_OF_RANGE)
+#define iree_status_is_unimplemented(value) \
+ (iree_status_code(value) == IREE_STATUS_UNIMPLEMENTED)
+#define iree_status_is_internal(value) \
+ (iree_status_code(value) == IREE_STATUS_INTERNAL)
+#define iree_status_is_unavailable(value) \
+ (iree_status_code(value) == IREE_STATUS_UNAVAILABLE)
+#define iree_status_is_data_loss(value) \
+ (iree_status_code(value) == IREE_STATUS_DATA_LOSS)
+#define iree_status_is_unauthenticated(value) \
+ (iree_status_code(value) == IREE_STATUS_UNAUTHENTICATED)
+#define iree_status_is_deferred(value) \
+ (iree_status_code(value) == IREE_STATUS_DEFERRED)
+
+#define IREE_STATUS_IMPL_CONCAT_INNER_(x, y) x##y
+#define IREE_STATUS_IMPL_CONCAT_(x, y) IREE_STATUS_IMPL_CONCAT_INNER_(x, y)
+
+#define IREE_STATUS_IMPL_IDENTITY_(...) __VA_ARGS__
+#define IREE_STATUS_IMPL_GET_EXPR_(expr, ...) expr
+#define IREE_STATUS_IMPL_GET_ARGS_(expr, ...) __VA_ARGS__
+#define IREE_STATUS_IMPL_GET_MACRO_(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, \
+ _10, _11, _12, _13, _14, ...) \
+ IREE_STATUS_IMPL_IDENTITY_( \
+ IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_GET_EXPR_)(__VA_ARGS__))
+
+#define IREE_STATUS_IMPL_MAKE_EMPTY_(file, line, status_code, ...) \
+ iree_status_allocate(status_code, file, line, iree_string_view_empty())
+#define IREE_STATUS_IMPL_MAKE_ANNOTATE_(file, line, status_code, message) \
+ iree_status_allocate(status_code, file, line, iree_make_cstring_view(message))
+#define IREE_STATUS_IMPL_MAKE_ANNOTATE_F_(file, line, status_code, ...) \
+ iree_status_allocate_f(status_code, file, line, __VA_ARGS__)
+#define IREE_STATUS_IMPL_MAKE_SWITCH_(file, line, ...) \
+ IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_IDENTITY_( \
+ IREE_STATUS_IMPL_GET_MACRO_)( \
+ __VA_ARGS__, IREE_STATUS_IMPL_MAKE_ANNOTATE_F_, \
+ IREE_STATUS_IMPL_MAKE_ANNOTATE_F_, IREE_STATUS_IMPL_MAKE_ANNOTATE_F_, \
+ IREE_STATUS_IMPL_MAKE_ANNOTATE_F_, IREE_STATUS_IMPL_MAKE_ANNOTATE_F_, \
+ IREE_STATUS_IMPL_MAKE_ANNOTATE_F_, IREE_STATUS_IMPL_MAKE_ANNOTATE_F_, \
+ IREE_STATUS_IMPL_MAKE_ANNOTATE_F_, IREE_STATUS_IMPL_MAKE_ANNOTATE_F_, \
+ IREE_STATUS_IMPL_MAKE_ANNOTATE_F_, IREE_STATUS_IMPL_MAKE_ANNOTATE_F_, \
+ IREE_STATUS_IMPL_MAKE_ANNOTATE_F_, IREE_STATUS_IMPL_MAKE_ANNOTATE_F_, \
+ IREE_STATUS_IMPL_MAKE_ANNOTATE_, IREE_STATUS_IMPL_MAKE_EMPTY_)) \
+ (file, line, IREE_STATUS_IMPL_GET_EXPR_(__VA_ARGS__), \
+ IREE_STATUS_IMPL_GET_ARGS_(__VA_ARGS__))
+
+#define IREE_STATUS_IMPL_PASS_(var, ...) var
+#define IREE_STATUS_IMPL_ANNOTATE_(var, ...) \
+ IREE_STATUS_IMPL_IDENTITY_(iree_status_annotate( \
+ var, iree_make_cstring_view(IREE_STATUS_IMPL_IDENTITY_( \
+ IREE_STATUS_IMPL_GET_ARGS_)(__VA_ARGS__))))
+#define IREE_STATUS_IMPL_ANNOTATE_F_(var, ...) \
+ IREE_STATUS_IMPL_IDENTITY_(iree_status_annotate_f( \
+ var, \
+ IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_GET_ARGS_)(__VA_ARGS__)))
+#define IREE_STATUS_IMPL_ANNOTATE_SWITCH_(...) \
+ IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_IDENTITY_( \
+ IREE_STATUS_IMPL_GET_MACRO_)( \
+ __VA_ARGS__, IREE_STATUS_IMPL_ANNOTATE_F_, IREE_STATUS_IMPL_ANNOTATE_F_, \
+ IREE_STATUS_IMPL_ANNOTATE_F_, IREE_STATUS_IMPL_ANNOTATE_F_, \
+ IREE_STATUS_IMPL_ANNOTATE_F_, IREE_STATUS_IMPL_ANNOTATE_F_, \
+ IREE_STATUS_IMPL_ANNOTATE_F_, IREE_STATUS_IMPL_ANNOTATE_F_, \
+ IREE_STATUS_IMPL_ANNOTATE_F_, IREE_STATUS_IMPL_ANNOTATE_F_, \
+ IREE_STATUS_IMPL_ANNOTATE_F_, IREE_STATUS_IMPL_ANNOTATE_F_, \
+ IREE_STATUS_IMPL_ANNOTATE_, IREE_STATUS_IMPL_PASS_)) \
+ (IREE_STATUS_IMPL_GET_EXPR_(__VA_ARGS__), \
+ IREE_STATUS_IMPL_GET_ARGS_(__VA_ARGS__))
+#define IREE_STATUS_IMPL_RETURN_IF_API_ERROR_(var, ...) \
+ iree_status_t var = (IREE_STATUS_IMPL_IDENTITY_( \
+ IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_GET_EXPR_)(__VA_ARGS__))); \
+ if (IREE_UNLIKELY(var)) { \
+ return IREE_STATUS_IMPL_ANNOTATE_SWITCH_(var, __VA_ARGS__); \
+ }
+#define IREE_STATUS_IMPL_RETURN_AND_EVAL_IF_API_ERROR_(tail_expr, var, ...) \
+ iree_status_t var = (IREE_STATUS_IMPL_IDENTITY_( \
+ IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_GET_EXPR_)(__VA_ARGS__))); \
+ if (IREE_UNLIKELY(var)) { \
+ (tail_expr); \
+ return IREE_STATUS_IMPL_ANNOTATE_SWITCH_(var, __VA_ARGS__); \
+ }
+
+#define IREE_STATUS_IMPL_IGNORE_ERROR_(var, expr) \
+ iree_status_t var = (expr); \
+ if (IREE_UNLIKELY(var)) iree_status_ignore(var);
+
+#define IREE_STATUS_IMPL_CHECK_OK_(var, expr) \
+ iree_status_t var = (expr); \
+ if (IREE_UNLIKELY(var)) iree_status_abort(var);
+
+// We cut out all status storage code when not used.
+#if IREE_STATUS_FEATURES == 0
+#define IREE_STATUS_IMPL_MAKE_(code, ...) \
+ (iree_status_t)(uintptr_t)((code)&IREE_STATUS_CODE_MASK)
+#define IREE_STATUS_IMPL_MAKE_LOC_(file, line, code, ...) \
+ IREE_STATUS_IMPL_MAKE_(code)
+#undef IREE_STATUS_IMPL_RETURN_IF_API_ERROR_
+#define IREE_STATUS_IMPL_RETURN_IF_API_ERROR_(var, ...) \
+ iree_status_t var = (IREE_STATUS_IMPL_IDENTITY_( \
+ IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_GET_EXPR_)(__VA_ARGS__))); \
+ if (IREE_UNLIKELY(var)) return var;
+#undef IREE_STATUS_IMPL_RETURN_AND_EVAL_IF_API_ERROR_
+#define IREE_STATUS_IMPL_RETURN_AND_EVAL_IF_API_ERROR_(tail_expr, var, ...) \
+ iree_status_t var = (IREE_STATUS_IMPL_IDENTITY_( \
+ IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_GET_EXPR_)(__VA_ARGS__))); \
+ if (IREE_UNLIKELY(var)) { \
+ (tail_expr); \
+ return var; \
+ }
+#undef IREE_STATUS_IMPL_IGNORE_ERROR_
+#define IREE_STATUS_IMPL_IGNORE_ERROR_(var, expr) \
+ iree_status_t var = (expr); \
+ (void)(var);
+#undef IREE_STATUS_IMPL_CHECK_OK_
+#define IREE_STATUS_IMPL_CHECK_OK_(var, expr) \
+ iree_status_t var = (expr); \
+ if (IREE_UNLIKELY(!iree_status_is_ok(var))) abort();
+#else
+#define IREE_STATUS_IMPL_MAKE_(...) \
+ IREE_STATUS_IMPL_MAKE_SWITCH_(__FILE__, __LINE__, __VA_ARGS__)
+#define IREE_STATUS_IMPL_MAKE_LOC_(file, line, ...) \
+ IREE_STATUS_IMPL_MAKE_SWITCH_(file, line, __VA_ARGS__)
+#endif // !IREE_STATUS_FEATURES
+
+// Returns an IREE_STATUS_OK.
+#define iree_ok_status() iree_status_from_code(IREE_STATUS_OK)
+
+// Makes an iree_status_t with the given iree_status_code_t code and records
+// the current source location.
+//
+// Optionally either a message string literal or printf-style format string may
+// be associated with the status.
+//
+// Examples:
+// return iree_make_status(IREE_STATUS_CANCELLED);
+// return iree_make_status(IREE_STATUS_CANCELLED, "because reasons");
+// return iree_make_status(IREE_STATUS_CANCELLED, "because %d > %d", a, b);
+#define iree_make_status IREE_STATUS_IMPL_MAKE_
+
+// Makes an iree_status_t with the given iree_status_code_t code using the given
+// source location. Besides taking the file and line of the source location this
+// is the same as iree_make_status.
+//
+// Examples:
+// return iree_make_status_with_location(
+// "file.c", 40, IREE_STATUS_CANCELLED, "because %d > %d", a, b);
+#define iree_make_status_with_location IREE_STATUS_IMPL_MAKE_LOC_
+
+// Propagates the error returned by (expr) by returning from the current
+// function on non-OK status. Optionally annotates the status with additional
+// information (see iree_status_annotate for more information).
+//
+// Example:
+// iree_status_t OtherFunc(...);
+// iree_status_t MyFunc(...) {
+// IREE_RETURN_IF_ERROR(OtherFunc(...));
+// IREE_RETURN_IF_ERROR(OtherFunc(...), "with a message");
+// IREE_RETURN_IF_ERROR(OtherFunc(...), "with a value: %d", 5);
+// return iree_ok_status();
+// }
+#define IREE_RETURN_IF_ERROR(...) \
+ IREE_STATUS_IMPL_RETURN_IF_API_ERROR_( \
+ IREE_STATUS_IMPL_CONCAT_(__status_, __COUNTER__), \
+ IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_IDENTITY_(__VA_ARGS__)))
+
+// IREE_RETURN_IF_ERROR with a custom expression to evaluate before returning.
+#define IREE_RETURN_AND_EVAL_IF_ERROR(tail_expr, ...) \
+ IREE_STATUS_IMPL_RETURN_AND_EVAL_IF_API_ERROR_( \
+ tail_expr, IREE_STATUS_IMPL_CONCAT_(__status_, __COUNTER__), \
+ IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_IDENTITY_(__VA_ARGS__)))
+
+// Ignores the status result of (expr) regardless of its value.
+//
+// Example:
+// IREE_IGNORE_ERROR(some_fn_that_may_fail());
+#define IREE_IGNORE_ERROR(expr) \
+ IREE_STATUS_IMPL_IGNORE_ERROR_( \
+ IREE_STATUS_IMPL_CONCAT_(__status_, __COUNTER__), (expr))
+
+// Aborts the program if the result of (expr) is not IREE_STATUS_OK.
+//
+// WARNING: this should only be used when absolutely required and avoided in any
+// core IREE code. Aborting is a very user-hostile behavior and on some systems
+// can cause major issues. Prefer instead to properly handle errors and route
+// them through hosting application infrastructure in a way that preserves more
+// context than just an instruction pointer and a SIGABRT.
+//
+// Example:
+// IREE_CHECK_OK(some_fn_that_may_fail());
+#define IREE_CHECK_OK(expr) \
+ IREE_STATUS_IMPL_CHECK_OK_(IREE_STATUS_IMPL_CONCAT_(__status_, __COUNTER__), \
+ (expr))
+
+// Returns the canonical status code for the given errno value.
+// https://en.cppreference.com/w/cpp/error/errno_macros
+IREE_API_EXPORT iree_status_code_t
+iree_status_code_from_errno(int error_number);
+
+#if defined(_WIN32) || defined(_WIN64)
+// Returns the canonical status code for the given Win32 GetLastError code.
+// https://docs.microsoft.com/en-us/windows/win32/api/errhandlingapi/nf-errhandlingapi-getlasterror
+IREE_API_EXPORT iree_status_code_t
+iree_status_code_from_win32_error(uint32_t error);
+#endif // _WIN32 || _WIN64
+
+// Returns a NUL-terminated string constant for the given status code, such as
+// IREE_STATUS_UNAVAILABLE = "UNAVAILABLE". Do not rely on string-matching the
+// result as the exact text may change.
+IREE_API_EXPORT const char* iree_status_code_string(iree_status_code_t code);
+
+// Allocates a new status instance for a failing error |code|.
+// |file| and |line| should be populated with __FILE__ and __LINE__ at the call
+// site and an optional string |message| may be provided.
+//
+// The status will be allocated using the default system allocator and must be
+// freed using either iree_status_free or iree_status_ignore.
+IREE_API_EXPORT IREE_MUST_USE_RESULT iree_status_t
+iree_status_allocate(iree_status_code_t code, const char* file, uint32_t line,
+ iree_string_view_t message);
+
+// Allocates a new status instance for a failing error |code| and annotates it
+// with a printf-style format string. Roughly equivalent (though more efficient)
+// than iree_status_allocate + iree_status_annotate_f.
+IREE_API_EXPORT IREE_MUST_USE_RESULT iree_status_t IREE_PRINTF_ATTRIBUTE(4, 5)
+ iree_status_allocate_f(iree_status_code_t code, const char* file,
+ uint32_t line, const char* format, ...);
+
+IREE_API_EXPORT IREE_MUST_USE_RESULT iree_status_t iree_status_allocate_vf(
+ iree_status_code_t code, const char* file, uint32_t line,
+ const char* format, va_list varargs_0, va_list varargs_1);
+
+// Clones |status| into a new status instance.
+// No payloads, if present, will be cloned.
+IREE_API_EXPORT IREE_MUST_USE_RESULT iree_status_t
+iree_status_clone(iree_status_t status);
+
+// Frees |status| if it has any associated storage.
+IREE_API_EXPORT void iree_status_free(iree_status_t status);
+
+// Ignores |status| regardless of its value and frees any associated payloads.
+// Returns an OK status that can be used when chaining.
+IREE_API_EXPORT iree_status_t iree_status_ignore(iree_status_t status);
+
+// Returns a new status that is |base_status| if not OK and otherwise returns
+// |new_status|. This allows for chaining failure handling code that may also
+// return statuses.
+//
+// Example:
+// iree_status_t status = do_something();
+// return iree_status_join(status, do_cleanup());
+IREE_API_EXPORT iree_status_t iree_status_join(iree_status_t base_status,
+ iree_status_t new_status);
+
+// Aborts the program with a failing |status|.
+// This will trigger a SIGABRT. It's best not to use this at all outside of
+// demos or tools.
+IREE_API_EXPORT IREE_ATTRIBUTE_NORETURN void iree_status_abort(
+ iree_status_t status);
+
+// Consumes the |status| by freeing its storage and returning its code.
+IREE_API_EXPORT iree_status_code_t
+iree_status_consume_code(iree_status_t status);
+
+// NOTE: varargs don't optimize well so we hard-no-op the functions when
+// annotations are not enabled.
+#if IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_ANNOTATIONS
+
+// Annotates a status message with the given constant string message.
+// Ignored if |base_status| is OK.
+IREE_API_EXPORT IREE_MUST_USE_RESULT iree_status_t
+iree_status_annotate(iree_status_t base_status, iree_string_view_t message);
+
+// Annotates a status message with the given printf-style message.
+// Ignored if |base_status| is OK.
+IREE_API_EXPORT IREE_MUST_USE_RESULT iree_status_t IREE_PRINTF_ATTRIBUTE(2, 3)
+ iree_status_annotate_f(iree_status_t base_status, const char* format, ...);
+
+#else
+#define iree_status_annotate(base_status, ...) (base_status)
+#define iree_status_annotate_f(base_status, ...) (base_status)
+#endif // has IREE_STATUS_FEATURE_ANNOTATIONS
+
+// Formats the status as a multi-line string containing all associated payloads.
+// Note that this may contain PII such as file paths and must only be used for
+// presenting errors to users and not sent to a logs aggregation service.
+//
+// If |buffer_capacity| is insufficient, then |out_buffer_length| is the
+// number of characters that would have been written if |buffer_capacity|
+// had been sufficiently large, not counting the terminating null character.
+IREE_API_EXPORT bool iree_status_format(iree_status_t status,
+ iree_host_size_t buffer_capacity,
+ char* buffer,
+ iree_host_size_t* out_buffer_length);
+
+// Prints |status| to the given |file| as a string with all available
+// annotations. This will produce multiple lines of output and should be used
+// only when dumping a status on failure.
+IREE_API_EXPORT void iree_status_fprint(FILE* file, iree_status_t status);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_STATUS_H_
diff --git a/runtime/src/iree/base/status_cc.cc b/runtime/src/iree/base/status_cc.cc
new file mode 100644
index 0000000..edd207c
--- /dev/null
+++ b/runtime/src/iree/base/status_cc.cc
@@ -0,0 +1,65 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/status_cc.h"
+
+#include <cstddef>
+#include <cstdlib>
+#include <ostream>
+
+#include "iree/base/attributes.h"
+#include "iree/base/logging.h"
+
+namespace iree {
+
+std::ostream& operator<<(std::ostream& os, const StatusCode& x) {
+ os << StatusCodeToString(x);
+ return os;
+}
+
+// static
+IREE_MUST_USE_RESULT std::string Status::ToString(iree_status_t status) {
+ if (iree_status_is_ok(status)) {
+ return "OK";
+ }
+ iree_host_size_t buffer_length = 0;
+ if (IREE_UNLIKELY(!iree_status_format(status, /*buffer_capacity=*/0,
+ /*buffer=*/NULL, &buffer_length))) {
+ return "<!>";
+ }
+ std::string result(buffer_length, '\0');
+ if (IREE_UNLIKELY(!iree_status_format(status, result.size() + 1,
+ const_cast<char*>(result.data()),
+ &buffer_length))) {
+ return "<!>";
+ }
+ return result;
+}
+
+std::ostream& operator<<(std::ostream& os, const Status& x) {
+ os << x.ToString();
+ return os;
+}
+
+namespace status_impl {
+
+void Helper::HandleInvalidStatusCtorArg(Status* status) {
+ const char* kMessage =
+ "An OK status is not a valid constructor argument to StatusOr<T>";
+ IREE_LOG(ERROR) << kMessage;
+ *status = Status(StatusCode::kInternal, kMessage);
+ abort();
+}
+
+void Helper::Crash(const Status& status) {
+ IREE_LOG(FATAL) << "Attempting to fetch value instead of handling error "
+ << status;
+ abort();
+}
+
+} // namespace status_impl
+
+} // namespace iree
diff --git a/runtime/src/iree/base/status_cc.h b/runtime/src/iree/base/status_cc.h
new file mode 100644
index 0000000..4795dda
--- /dev/null
+++ b/runtime/src/iree/base/status_cc.h
@@ -0,0 +1,944 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_STATUS_CC_H_
+#define IREE_BASE_STATUS_CC_H_
+
+#ifndef __cplusplus
+#error iree::Status is only usable in C++ code.
+#endif // !__cplusplus
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+#include "iree/base/api.h"
+#include "iree/base/attributes.h"
+#include "iree/base/logging.h"
+#include "iree/base/target_platform.h"
+
+namespace iree {
+
+namespace status_impl {
+
+template <class T, class U = T>
+constexpr T exchange(T& obj, U&& new_value) {
+ T old_value = std::move(obj);
+ obj = std::forward<U>(new_value);
+ return old_value;
+}
+
+} // namespace status_impl
+
+//===----------------------------------------------------------------------===//
+// Status codes and source location utilities
+//===----------------------------------------------------------------------===//
+
+// Class representing a specific location in the source code of a program.
+class SourceLocation {
+ public:
+ // Avoid this constructor; it populates the object with dummy values.
+ constexpr SourceLocation() : line_(0), file_name_(nullptr) {}
+
+ // `file_name` must outlive all copies of the `iree::SourceLocation` object,
+ // so in practice it should be a string literal.
+ constexpr SourceLocation(std::uint_least32_t line, const char* file_name)
+ : line_(line), file_name_(file_name) {}
+
+ // The line number of the captured source location.
+ constexpr std::uint_least32_t line() const { return line_; }
+
+ // The file name of the captured source location.
+ constexpr const char* file_name() const { return file_name_; }
+
+ private:
+ std::uint_least32_t line_;
+ const char* file_name_;
+};
+
+// If a function takes an `iree::SourceLocation` parameter, pass this as the
+// argument.
+#if IREE_STATUS_FEATURES == 0
+#define IREE_LOC ::iree::SourceLocation(0, NULL)
+#else
+#define IREE_LOC ::iree::SourceLocation(__LINE__, __FILE__)
+#endif // IREE_STATUS_FEATURES == 0
+
+enum class StatusCode : uint32_t {
+ kOk = IREE_STATUS_OK,
+ kCancelled = IREE_STATUS_CANCELLED,
+ kUnknown = IREE_STATUS_UNKNOWN,
+ kInvalidArgument = IREE_STATUS_INVALID_ARGUMENT,
+ kDeadlineExceeded = IREE_STATUS_DEADLINE_EXCEEDED,
+ kNotFound = IREE_STATUS_NOT_FOUND,
+ kAlreadyExists = IREE_STATUS_ALREADY_EXISTS,
+ kPermissionDenied = IREE_STATUS_PERMISSION_DENIED,
+ kResourceExhausted = IREE_STATUS_RESOURCE_EXHAUSTED,
+ kFailedPrecondition = IREE_STATUS_FAILED_PRECONDITION,
+ kAborted = IREE_STATUS_ABORTED,
+ kOutOfRange = IREE_STATUS_OUT_OF_RANGE,
+ kUnimplemented = IREE_STATUS_UNIMPLEMENTED,
+ kInternal = IREE_STATUS_INTERNAL,
+ kUnavailable = IREE_STATUS_UNAVAILABLE,
+ kDataLoss = IREE_STATUS_DATA_LOSS,
+ kUnauthenticated = IREE_STATUS_UNAUTHENTICATED,
+ kDeferred = IREE_STATUS_DEFERRED,
+};
+
+static inline const char* StatusCodeToString(StatusCode code) {
+ return iree_status_code_string(static_cast<iree_status_code_t>(code));
+}
+
+// Prints a human-readable representation of `x` to `os`.
+std::ostream& operator<<(std::ostream& os, const StatusCode& x);
+
+//===----------------------------------------------------------------------===//
+// Status
+//===----------------------------------------------------------------------===//
+
+class IREE_MUST_USE_RESULT Status;
+
+// A Status value can be either OK or not-OK
+// * OK indicates that the operation succeeded.
+// * A not-OK value indicates that the operation failed and contains
+// status_impls
+// about the error.
+class Status final {
+ public:
+ // Return a combination of the error code name and message.
+ static IREE_MUST_USE_RESULT std::string ToString(iree_status_t status);
+
+ // Creates an OK status with no message.
+ Status() = default;
+
+ // Takes ownership of a C API status instance.
+ Status(iree_status_t&& status) noexcept
+ : value_(status_impl::exchange(
+ status, iree_status_from_code(iree_status_code(status)))) {}
+
+ // Takes ownership of a C API status instance wrapped in a Status.
+ Status(Status& other) noexcept
+ : value_(status_impl::exchange(other.value_,
+ iree_status_from_code(other.code()))) {}
+ Status(Status&& other) noexcept
+ : value_(status_impl::exchange(other.value_,
+ iree_status_from_code(other.code()))) {}
+ Status& operator=(Status&& other) {
+ if (this != &other) {
+ if (IREE_UNLIKELY(value_)) iree_status_ignore(value_);
+ value_ = status_impl::exchange(other.value_,
+ iree_status_from_code(other.code()));
+ }
+ return *this;
+ }
+
+ Status(iree_status_code_t code) : value_(iree_status_from_code(code)) {}
+ Status& operator=(const iree_status_code_t& code) {
+ if (IREE_UNLIKELY(value_)) iree_status_ignore(value_);
+ value_ = iree_status_from_code(code);
+ return *this;
+ }
+
+ Status(StatusCode code) : value_(iree_status_from_code(code)) {}
+ Status& operator=(const StatusCode& code) {
+ if (IREE_UNLIKELY(value_)) iree_status_ignore(value_);
+ value_ = iree_status_from_code(code);
+ return *this;
+ }
+
+ // Creates a status with the specified code and error message.
+ // If `code` is kOk, `message` is ignored.
+ Status(StatusCode code, const char* message) {
+ if (IREE_UNLIKELY(code != StatusCode::kOk)) {
+ value_ = (!message || !strlen(message))
+ ? iree_status_from_code(code)
+ : iree_status_allocate(static_cast<iree_status_code_t>(code),
+ /*file=*/nullptr, /*line=*/0,
+ iree_make_cstring_view(message));
+ }
+ }
+ Status(StatusCode code, SourceLocation location, const char* message) {
+ if (IREE_UNLIKELY(code != StatusCode::kOk)) {
+ value_ = iree_status_allocate(static_cast<iree_status_code_t>(code),
+ location.file_name(), location.line(),
+ iree_make_cstring_view(message));
+ }
+ }
+
+ ~Status() {
+ if (IREE_UNLIKELY((uintptr_t)(value_) & ~IREE_STATUS_CODE_MASK)) {
+ iree_status_free(value_);
+ }
+ }
+
+ // Returns true if the Status is OK.
+ IREE_MUST_USE_RESULT bool ok() const { return iree_status_is_ok(value_); }
+
+ // Returns the error code.
+ IREE_MUST_USE_RESULT StatusCode code() const {
+ return static_cast<StatusCode>(iree_status_code(value_));
+ }
+
+ // Return a combination of the error code name and message.
+ IREE_MUST_USE_RESULT std::string ToString() const {
+ return Status::ToString(value_);
+ }
+
+ // Ignores any errors, potentially suppressing complaints from any tools.
+ void IgnoreError() { value_ = iree_status_ignore(value_); }
+
+ // Converts to a C API status instance and transfers ownership.
+ IREE_MUST_USE_RESULT operator iree_status_t() && {
+ return status_impl::exchange(
+ value_, iree_status_from_code(iree_status_code(value_)));
+ }
+
+ IREE_MUST_USE_RESULT iree_status_t release() {
+ return status_impl::exchange(value_, iree_ok_status());
+ }
+
+ friend bool operator==(const Status& lhs, const Status& rhs) {
+ return lhs.code() == rhs.code();
+ }
+ friend bool operator!=(const Status& lhs, const Status& rhs) {
+ return !(lhs == rhs);
+ }
+
+ friend bool operator==(const Status& lhs, const StatusCode& rhs) {
+ return lhs.code() == rhs;
+ }
+ friend bool operator!=(const Status& lhs, const StatusCode& rhs) {
+ return !(lhs == rhs);
+ }
+
+ friend bool operator==(const StatusCode& lhs, const Status& rhs) {
+ return lhs == rhs.code();
+ }
+ friend bool operator!=(const StatusCode& lhs, const Status& rhs) {
+ return !(lhs == rhs);
+ }
+
+ private:
+ iree_status_t value_ = iree_ok_status();
+};
+
+// Returns an OK status, equivalent to a default constructed instance.
+IREE_MUST_USE_RESULT static inline Status OkStatus() { return Status(); }
+
+// Prints a human-readable representation of `x` to `os`.
+std::ostream& operator<<(std::ostream& os, const Status& x);
+
+IREE_MUST_USE_RESULT static inline bool IsOk(const Status& status) {
+ return status.code() == StatusCode::kOk;
+}
+
+IREE_MUST_USE_RESULT static inline bool IsOk(const iree_status_t& status) {
+ return iree_status_is_ok(status);
+}
+
+//===----------------------------------------------------------------------===//
+// StatusOr<T>
+//===----------------------------------------------------------------------===//
+
+template <typename T>
+class IREE_MUST_USE_RESULT StatusOr;
+
+namespace status_impl {
+
+// https://en.cppreference.com/w/cpp/types/conjunction
+template <typename... Ts>
+struct conjunction : std::true_type {};
+template <typename T, typename... Ts>
+struct conjunction<T, Ts...>
+ : std::conditional<T::value, conjunction<Ts...>, T>::type {};
+template <typename T>
+struct conjunction<T> : T {};
+
+// https://en.cppreference.com/w/cpp/types/disjunction
+template <typename... Ts>
+struct disjunction : std::false_type {};
+template <typename T, typename... Ts>
+struct disjunction<T, Ts...>
+ : std::conditional<T::value, T, disjunction<Ts...>>::type {};
+template <typename T>
+struct disjunction<T> : T {};
+
+// https://en.cppreference.com/w/cpp/utility/in_place
+struct in_place_t {
+ explicit in_place_t() = default;
+};
+/*inline*/ constexpr in_place_t in_place{};
+
+// https://en.cppreference.com/w/cpp/types/negation
+template <typename T>
+struct negation : std::integral_constant<bool, !T::value> {};
+
+template <typename T, typename U>
+using IsStatusOrConversionAmbiguous =
+ status_impl::disjunction<std::is_constructible<T, StatusOr<U>&>,
+ std::is_constructible<T, const StatusOr<U>&>,
+ std::is_constructible<T, StatusOr<U>&&>,
+ std::is_constructible<T, const StatusOr<U>&&>,
+ std::is_convertible<StatusOr<U>&, T>,
+ std::is_convertible<const StatusOr<U>&, T>,
+ std::is_convertible<StatusOr<U>&&, T>,
+ std::is_convertible<const StatusOr<U>&&, T>>;
+
+template <typename T, typename U>
+using IsStatusOrConversionAssigmentAmbiguous =
+ status_impl::disjunction<IsStatusOrConversionAmbiguous<T, U>,
+ std::is_assignable<T&, StatusOr<U>&>,
+ std::is_assignable<T&, const StatusOr<U>&>,
+ std::is_assignable<T&, StatusOr<U>&&>,
+ std::is_assignable<T&, const StatusOr<U>&&>>;
+
+template <typename T, typename U>
+struct IsAmbiguousStatusOrForInitialization
+ : // Strip const-value refs from type and check again, else false_type.
+ public std::conditional_t<
+ std::is_same<std::remove_cv_t<std::remove_reference_t<U>>, U>::value,
+ std::false_type,
+ IsAmbiguousStatusOrForInitialization<
+ T, std::remove_cv_t<std::remove_reference_t<U>>>> {};
+
+template <typename T, typename U>
+struct IsAmbiguousStatusOrForInitialization<T, StatusOr<U>>
+ : public IsStatusOrConversionAmbiguous<T, U> {};
+
+template <typename T, typename U>
+using IsStatusOrDirectInitializationAmbiguous = status_impl::disjunction<
+ std::is_same<StatusOr<T>, std::remove_cv_t<std::remove_reference_t<U>>>,
+ std::is_same<Status, std::remove_cv_t<std::remove_reference_t<U>>>,
+ std::is_same<status_impl::in_place_t,
+ std::remove_cv_t<std::remove_reference_t<U>>>,
+ IsAmbiguousStatusOrForInitialization<T, U>>;
+
+template <typename T, typename U>
+using IsStatusOrDirectInitializationValid = status_impl::disjunction<
+ // The is_same allows nested status ors to ignore this check iff same type.
+ std::is_same<T, std::remove_cv_t<std::remove_reference_t<U>>>,
+ status_impl::negation<IsStatusOrDirectInitializationAmbiguous<T, U>>>;
+
+class Helper {
+ public:
+ IREE_ATTRIBUTE_NORETURN static void HandleInvalidStatusCtorArg(Status*);
+ IREE_ATTRIBUTE_NORETURN static void Crash(const Status& status);
+};
+
+// Construct an instance of T in `p` through placement new, passing Args... to
+// the constructor.
+// This abstraction is here mostly for the gcc performance fix.
+template <typename T, typename... Args>
+void PlacementNew(void* p, Args&&... args) {
+#if defined(__GNUC__) && !defined(__clang__)
+ // Teach gcc that 'p' cannot be null, fixing code size issues.
+ if (p == nullptr) __builtin_unreachable();
+#endif
+ new (p) T(std::forward<Args>(args)...);
+}
+
+// Helper base class to hold the data and all operations.
+// We move all this to a base class to allow mixing with the appropriate
+// TraitsBase specialization.
+template <typename T>
+class StatusOrData {
+ template <typename U>
+ friend class StatusOrData;
+
+ public:
+ StatusOrData() = delete;
+
+ StatusOrData(const StatusOrData& other) {
+ if (other.ok()) {
+ MakeValue(other.data_);
+ MakeStatus();
+ } else {
+ MakeStatus(other.status_);
+ }
+ }
+
+ StatusOrData(StatusOrData&& other) noexcept {
+ if (other.ok()) {
+ MakeValue(std::move(other.data_));
+ MakeStatus();
+ } else {
+ MakeStatus(status_impl::exchange(other.status_, other.status_.code()));
+ }
+ }
+
+ template <typename U>
+ explicit StatusOrData(const StatusOrData<U>& other) {
+ if (other.ok()) {
+ MakeValue(other.data_);
+ MakeStatus();
+ } else {
+ MakeStatus(other.status_);
+ }
+ }
+
+ template <typename U>
+ explicit StatusOrData(StatusOrData<U>&& other) {
+ if (other.ok()) {
+ MakeValue(std::move(other.data_));
+ MakeStatus();
+ } else {
+ MakeStatus(status_impl::exchange(other.status_, other.status_.code()));
+ }
+ }
+
+ template <typename... Args>
+ explicit StatusOrData(status_impl::in_place_t, Args&&... args)
+ : data_(std::forward<Args>(args)...) {
+ MakeStatus();
+ }
+
+ explicit StatusOrData(const T& value) : data_(value) { MakeStatus(); }
+ explicit StatusOrData(T&& value) : data_(std::move(value)) { MakeStatus(); }
+
+ explicit StatusOrData(Status&& status)
+ : status_(status_impl::exchange(status, status.code())) {
+ EnsureNotOk();
+ }
+
+ StatusOrData& operator=(const StatusOrData& other) {
+ if (this == &other) return *this;
+ if (other.ok()) {
+ Assign(other.data_);
+ } else {
+ Assign(other.status_);
+ }
+ return *this;
+ }
+
+ StatusOrData& operator=(StatusOrData&& other) {
+ if (this == &other) return *this;
+ if (other.ok()) {
+ Assign(std::move(other.data_));
+ } else {
+ Assign(status_impl::exchange(other.status_, other.status_.code()));
+ }
+ return *this;
+ }
+
+ ~StatusOrData() {
+ if (ok()) {
+ status_.~Status();
+ data_.~T();
+ } else {
+ status_.~Status();
+ }
+ }
+
+ void Assign(const T& value) {
+ if (ok()) {
+ data_.~T();
+ MakeValue(value);
+ } else {
+ MakeValue(value);
+ status_ = StatusCode::kOk;
+ }
+ }
+
+ void Assign(T&& value) {
+ if (ok()) {
+ data_.~T();
+ MakeValue(std::move(value));
+ } else {
+ MakeValue(std::move(value));
+ status_ = StatusCode::kOk;
+ }
+ }
+
+ void Assign(Status&& status) {
+ Clear();
+ status_ = status_impl::exchange(status, status.code());
+ EnsureNotOk();
+ }
+
+ bool ok() const { return status_.ok(); }
+
+ protected:
+ // status_ will always be active after the constructor.
+ // Union to be able to initialize exactly how we need without waste.
+ // Eg. in the copy constructor we use the default constructor of Status in
+ // the ok() path to avoid an extra Ref call.
+ union {
+ Status status_;
+ };
+
+ // data_ is active iff status_.ok()==true
+ struct Dummy {};
+ union {
+ // When T is const, we need some non-const object we can cast to void* for
+ // the placement new. dummy_ is that object.
+ Dummy dummy_;
+ T data_;
+ };
+
+ void Clear() {
+ if (ok()) data_.~T();
+ }
+
+ void EnsureOk() const {
+ if (IREE_UNLIKELY(!ok())) Helper::Crash(status_);
+ }
+
+ void EnsureNotOk() {
+ if (IREE_UNLIKELY(ok())) Helper::HandleInvalidStatusCtorArg(&status_);
+ }
+
+ // Construct the value (data_) through placement new with the passed arg.
+ template <typename Arg>
+ void MakeValue(Arg&& arg) {
+ status_impl::PlacementNew<T>(&dummy_, std::forward<Arg>(arg));
+ }
+
+ // Construct the status (status_) through placement new with the passed arg.
+ template <typename... Args>
+ void MakeStatus(Args&&... args) {
+ status_impl::PlacementNew<Status>(&status_, std::forward<Args>(args)...);
+ }
+};
+
+// Helper base class to allow implicitly deleted constructors and assignment
+// operations in StatusOr.
+// TraitsBase will explicitly delete what it can't support and StatusOr will
+// inherit that behavior implicitly.
+template <bool Copy, bool Move>
+struct TraitsBase {
+ TraitsBase() = default;
+ TraitsBase(const TraitsBase&) = default;
+ TraitsBase(TraitsBase&&) = default;
+ TraitsBase& operator=(const TraitsBase&) = default;
+ TraitsBase& operator=(TraitsBase&&) = default;
+};
+
+template <>
+struct TraitsBase<false, true> {
+ TraitsBase() = default;
+ TraitsBase(const TraitsBase&) = delete;
+ TraitsBase(TraitsBase&&) = default;
+ TraitsBase& operator=(const TraitsBase&) = delete;
+ TraitsBase& operator=(TraitsBase&&) = default;
+};
+
+template <>
+struct TraitsBase<false, false> {
+ TraitsBase() = default;
+ TraitsBase(const TraitsBase&) = delete;
+ TraitsBase(TraitsBase&&) = delete;
+ TraitsBase& operator=(const TraitsBase&) = delete;
+ TraitsBase& operator=(TraitsBase&&) = delete;
+};
+
+} // namespace status_impl
+
+// StatusOr<T> is the union of a Status object and a T object.
+//
+// A StatusOr object either holds a usable value, or an error Status explaining
+// why such a value is not present.
+template <typename T>
+class StatusOr
+ : private status_impl::StatusOrData<T>,
+ private status_impl::TraitsBase<std::is_copy_constructible<T>::value,
+ std::is_move_constructible<T>::value> {
+ template <typename U>
+ friend class StatusOr;
+
+ typedef status_impl::StatusOrData<T> Base;
+
+ public:
+ typedef T element_type;
+
+ // Constructs a new StatusOr with StatusCode::kUnknown status.
+ explicit StatusOr();
+
+ // StatusOr<T> is copy constructible/assignable if T is copy constructible.
+ StatusOr(const StatusOr&) = default;
+ StatusOr& operator=(const StatusOr&) = default;
+
+ // StatusOr<T> is move constructible/assignable if T is move constructible.
+ StatusOr(StatusOr&&) = default;
+ StatusOr& operator=(StatusOr&&) = default;
+
+ // Converting constructors from StatusOr<U>, when T is constructible from U.
+ // To avoid ambiguity, they are disabled if T is also constructible from
+ // StatusOr<U>. Explicit iff the corresponding construction of T from U is
+ // explicit.
+ template <
+ typename U,
+ std::enable_if_t<
+ status_impl::conjunction<
+ status_impl::negation<std::is_same<T, U>>,
+ std::is_constructible<T, const U&>,
+ std::is_convertible<const U&, T>,
+ status_impl::negation<
+ status_impl::IsStatusOrConversionAmbiguous<T, U>>>::value,
+ int> = 0>
+ StatusOr(const StatusOr<U>& other) // NOLINT
+ : Base(static_cast<const typename StatusOr<U>::Base&>(other)) {}
+ template <
+ typename U,
+ std::enable_if_t<
+ status_impl::conjunction<
+ status_impl::negation<std::is_same<T, U>>,
+ std::is_constructible<T, const U&>,
+ status_impl::negation<std::is_convertible<const U&, T>>,
+ status_impl::negation<
+ status_impl::IsStatusOrConversionAmbiguous<T, U>>>::value,
+ int> = 0>
+ explicit StatusOr(const StatusOr<U>& other)
+ : Base(static_cast<const typename StatusOr<U>::Base&>(other)) {}
+
+ template <
+ typename U,
+ std::enable_if_t<
+ status_impl::conjunction<
+ status_impl::negation<std::is_same<T, U>>,
+ std::is_constructible<T, U&&>, std::is_convertible<U&&, T>,
+ status_impl::negation<
+ status_impl::IsStatusOrConversionAmbiguous<T, U>>>::value,
+ int> = 0>
+ StatusOr(StatusOr<U>&& other) // NOLINT
+ : Base(static_cast<typename StatusOr<U>::Base&&>(other)) {}
+ template <
+ typename U,
+ std::enable_if_t<
+ status_impl::conjunction<
+ status_impl::negation<std::is_same<T, U>>,
+ std::is_constructible<T, U&&>,
+ status_impl::negation<std::is_convertible<U&&, T>>,
+ status_impl::negation<
+ status_impl::IsStatusOrConversionAmbiguous<T, U>>>::value,
+ int> = 0>
+ explicit StatusOr(StatusOr<U>&& other)
+ : Base(static_cast<typename StatusOr<U>::Base&&>(other)) {}
+
+ // Conversion copy/move assignment operator, T must be constructible and
+ // assignable from U. Only enable if T cannot be directly assigned from
+ // StatusOr<U>.
+ template <typename U,
+ std::enable_if_t<
+ status_impl::conjunction<
+ status_impl::negation<std::is_same<T, U>>,
+ std::is_constructible<T, const U&>,
+ std::is_assignable<T, const U&>,
+ status_impl::negation<
+ status_impl::IsStatusOrConversionAssigmentAmbiguous<
+ T, U>>>::value,
+ int> = 0>
+ StatusOr& operator=(const StatusOr<U>& other) {
+ this->Assign(other);
+ return *this;
+ }
+ template <typename U,
+ std::enable_if_t<
+ status_impl::conjunction<
+ status_impl::negation<std::is_same<T, U>>,
+ std::is_constructible<T, U&&>, std::is_assignable<T, U&&>,
+ status_impl::negation<
+ status_impl::IsStatusOrConversionAssigmentAmbiguous<
+ T, U>>>::value,
+ int> = 0>
+ StatusOr& operator=(StatusOr<U>&& other) {
+ this->Assign(std::move(other));
+ return *this;
+ }
+
+ // Constructs a new StatusOr with the given value. After calling this
+ // constructor, this->ok() will be true and the contained value may be
+ // retrieved with value(), operator*(), or operator->().
+ StatusOr(const T& value);
+
+ // Takes ownership of a C API status instance.
+ StatusOr(iree_status_t&& status) noexcept
+ : Base(status_impl::exchange(
+ status, iree_status_from_code(iree_status_code(status)))) {}
+
+ // Constructs a new StatusOr with the given non-ok status. After calling this
+ // constructor, this->ok() will be false and calls to value() will
+ // IREE_CHECK-fail.
+ StatusOr(const Status& status);
+ StatusOr& operator=(const Status& status);
+
+ // Similar to the `const T&` overload.
+ //
+ // REQUIRES: T is move constructible.
+ StatusOr(T&& value);
+
+ // RValue versions of the operations declared above.
+ StatusOr(Status&& status);
+ StatusOr& operator=(Status&& status);
+
+ // Constructs the inner value T in-place using the provided args, using the
+ // T(args...) constructor.
+ template <typename... Args>
+ explicit StatusOr(status_impl::in_place_t, Args&&... args);
+ template <typename U, typename... Args>
+ explicit StatusOr(status_impl::in_place_t, std::initializer_list<U> ilist,
+ Args&&... args);
+
+ // Constructs the inner value T in-place using the provided args, using the
+ // T(U) (direct-initialization) constructor. Only valid if T can be
+ // constructed from a U. Can accept move or copy constructors. Explicit it
+ // U is not convertible to T. To avoid ambiguity, this is disabled if U is
+ // a StatusOr<J>, where J is convertible to T.
+ template <typename U = T,
+ std::enable_if_t<
+ status_impl::conjunction<
+ status_impl::IsStatusOrDirectInitializationValid<T, U&&>,
+ std::is_constructible<T, U&&>,
+ std::is_convertible<U&&, T>>::value,
+ int> = 0>
+ StatusOr(U&& u) // NOLINT
+ : StatusOr(status_impl::in_place, std::forward<U>(u)) {}
+
+ template <typename U = T,
+ std::enable_if_t<
+ status_impl::conjunction<
+ status_impl::IsStatusOrDirectInitializationValid<T, U&&>,
+ std::is_constructible<T, U&&>,
+ status_impl::negation<std::is_convertible<U&&, T>>>::value,
+ int> = 0>
+ explicit StatusOr(U&& u) // NOLINT
+ : StatusOr(status_impl::in_place, std::forward<U>(u)) {}
+
+ // Returns this->ok()
+ explicit operator bool() const { return ok(); }
+
+ // Returns this->status().ok()
+ IREE_MUST_USE_RESULT bool ok() const { return this->status_.ok(); }
+
+ // Returns a reference to our status. If this contains a T, then
+ // returns OkStatus().
+ const Status& status() const&;
+ Status status() &&;
+
+ // Returns a reference to the held value if `this->ok()`, or IREE_CHECK-fails.
+ // If you have already checked the status using `this->ok()` or
+ // `operator bool()`, you probably want to use `operator*()` or `operator->()`
+ // to access the value instead of `value`.
+ const T& value() const&;
+ T& value() &;
+ const T&& value() const&&;
+ T&& value() &&;
+
+ // Returns a reference to the current value.
+ //
+ // REQUIRES: this->ok() == true, otherwise the behavior is undefined.
+ const T& operator*() const&;
+ T& operator*() &;
+ const T&& operator*() const&&;
+ T&& operator*() &&;
+
+ // Returns a pointer to the current value.
+ //
+ // REQUIRES: this->ok() == true, otherwise the behavior is undefined.
+ const T* operator->() const;
+ T* operator->();
+
+ // Returns a copy of the current value if this->ok() == true. Otherwise
+ // returns a default value.
+ template <typename U>
+ T value_or(U&& default_value) const&;
+ template <typename U>
+ T value_or(U&& default_value) &&;
+
+ // Ignores any errors. This method does nothing except potentially suppress
+ // complaints from any tools that are checking that errors are not dropped on
+ // the floor.
+ void IgnoreError() const;
+
+ private:
+ using status_impl::StatusOrData<T>::Assign;
+ template <typename U>
+ void Assign(const StatusOr<U>& other);
+ template <typename U>
+ void Assign(StatusOr<U>&& other);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementation status_impls for StatusOr<T>
+
+template <typename T>
+StatusOr<T>::StatusOr() : Base(Status(StatusCode::kUnknown, "")) {}
+
+template <typename T>
+StatusOr<T>::StatusOr(const T& value) : Base(value) {}
+
+template <typename T>
+StatusOr<T>::StatusOr(T&& value) : Base(std::move(value)) {}
+
+template <typename T>
+StatusOr<T>::StatusOr(const Status& status) : Base(status) {}
+
+template <typename T>
+StatusOr<T>::StatusOr(Status&& status) : Base(std::move(status)) {}
+
+template <typename T>
+StatusOr<T>& StatusOr<T>::operator=(const Status& status) {
+ this->Assign(status);
+ return *this;
+}
+
+template <typename T>
+StatusOr<T>& StatusOr<T>::operator=(Status&& status) {
+ this->Assign(std::move(status));
+ return *this;
+}
+
+template <typename T>
+template <typename U>
+inline void StatusOr<T>::Assign(const StatusOr<U>& other) {
+ if (other.ok()) {
+ this->Assign(other.value());
+ } else {
+ this->Assign(other.status());
+ }
+}
+
+template <typename T>
+template <typename U>
+inline void StatusOr<T>::Assign(StatusOr<U>&& other) {
+ if (other.ok()) {
+ this->Assign(std::move(other).value());
+ } else {
+ this->Assign(std::move(other).status());
+ }
+}
+template <typename T>
+template <typename... Args>
+StatusOr<T>::StatusOr(status_impl::in_place_t, Args&&... args)
+ : Base(status_impl::in_place, std::forward<Args>(args)...) {}
+
+template <typename T>
+template <typename U, typename... Args>
+StatusOr<T>::StatusOr(status_impl::in_place_t, std::initializer_list<U> ilist,
+ Args&&... args)
+ : Base(status_impl::in_place, ilist, std::forward<Args>(args)...) {}
+
+template <typename T>
+const Status& StatusOr<T>::status() const& {
+ return this->status_;
+}
+
+template <typename T>
+Status StatusOr<T>::status() && {
+ if (ok()) {
+ return OkStatus();
+ } else {
+ return status_impl::exchange(this->status_, this->status_.code());
+ }
+}
+
+template <typename T>
+const T& StatusOr<T>::value() const& {
+ this->EnsureOk();
+ return this->data_;
+}
+
+template <typename T>
+T& StatusOr<T>::value() & {
+ this->EnsureOk();
+ return this->data_;
+}
+
+template <typename T>
+const T&& StatusOr<T>::value() const&& {
+ this->EnsureOk();
+ return std::move(this->data_);
+}
+
+template <typename T>
+T&& StatusOr<T>::value() && {
+ this->EnsureOk();
+ return std::move(this->data_);
+}
+
+template <typename T>
+const T& StatusOr<T>::operator*() const& {
+ this->EnsureOk();
+ return this->data_;
+}
+
+template <typename T>
+T& StatusOr<T>::operator*() & {
+ this->EnsureOk();
+ return this->data_;
+}
+
+template <typename T>
+const T&& StatusOr<T>::operator*() const&& {
+ this->EnsureOk();
+ return std::move(this->data_);
+}
+
+template <typename T>
+T&& StatusOr<T>::operator*() && {
+ this->EnsureOk();
+ return std::move(this->data_);
+}
+
+template <typename T>
+const T* StatusOr<T>::operator->() const {
+ this->EnsureOk();
+ return &this->data_;
+}
+
+template <typename T>
+T* StatusOr<T>::operator->() {
+ this->EnsureOk();
+ return &this->data_;
+}
+
+template <typename T>
+template <typename U>
+T StatusOr<T>::value_or(U&& default_value) const& {
+ if (ok()) {
+ return this->data_;
+ }
+ return std::forward<U>(default_value);
+}
+
+template <typename T>
+template <typename U>
+T StatusOr<T>::value_or(U&& default_value) && {
+ if (ok()) {
+ return std::move(this->data_);
+ }
+ return std::forward<U>(default_value);
+}
+
+template <typename T>
+void StatusOr<T>::IgnoreError() const {
+ this->status_.IgnoreError();
+}
+
+template <typename T>
+IREE_MUST_USE_RESULT static inline bool IsOk(const StatusOr<T>& status_or) {
+ return status_or.ok();
+}
+
+} // namespace iree
+
+// Executes an expression `rexpr` that returns a `iree::StatusOr<T>`. On OK,
+// moves its value into the variable defined by `lhs`, otherwise returns
+// from the current function.
+#define IREE_ASSIGN_OR_RETURN(lhs, rexpr) \
+ IREE_STATUS_MACROS_IMPL_ASSIGN_OR_RETURN_2_( \
+ IREE_STATUS_IMPL_CONCAT_(_status_or_value, __LINE__), lhs, (rexpr))
+
+#define IREE_STATUS_MACROS_IMPL_ASSIGN_OR_RETURN_2_(statusor, lhs, rexpr) \
+ auto statusor = rexpr; \
+ if (IREE_UNLIKELY(!::iree::IsOk(statusor))) { \
+ return std::move(statusor).status(); \
+ } \
+ lhs = std::move(statusor).value()
+
+#endif // IREE_BASE_STATUS_CC_H_
diff --git a/runtime/src/iree/base/status_test.cc b/runtime/src/iree/base/status_test.cc
new file mode 100644
index 0000000..c035e1e
--- /dev/null
+++ b/runtime/src/iree/base/status_test.cc
@@ -0,0 +1,102 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <ostream>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+#include "iree/base/api.h"
+#include "iree/base/status_cc.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace {
+
+using ::iree::testing::status::StatusIs;
+using ::testing::HasSubstr;
+
+#if (IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_ANNOTATIONS) != 0
+#define CHECK_STATUS_MESSAGE(status, message_substr) \
+ EXPECT_THAT(status.ToString(), \
+ HasSubstr(StatusCodeToString(status.code()))); \
+ EXPECT_THAT(status.ToString(), HasSubstr(message_substr))
+#define CHECK_STREAM_MESSAGE(status, os, message_substr) \
+ EXPECT_THAT(os.str(), HasSubstr(StatusCodeToString(status.code()))); \
+ EXPECT_THAT(os.str(), HasSubstr(message_substr))
+#else
+#define CHECK_STATUS_MESSAGE(status, message_substr) \
+ EXPECT_THAT(status.ToString(), HasSubstr(StatusCodeToString(status.code())));
+#define CHECK_STREAM_MESSAGE(status, os, message_substr) \
+ EXPECT_THAT(os.str(), HasSubstr(StatusCodeToString(status.code())));
+#endif // has IREE_STATUS_FEATURE_ANNOTATIONS
+
+TEST(Status, ConstructedWithMessage) {
+ Status status = Status(StatusCode::kInvalidArgument, "message");
+ CHECK_STATUS_MESSAGE(status, "message");
+}
+
+TEST(Status, StreamInsertion) {
+ Status status = Status(StatusCode::kInvalidArgument, "message");
+ std::ostringstream os;
+ os << status;
+ CHECK_STREAM_MESSAGE(status, os, "message");
+}
+
+TEST(Status, StreamInsertionContinued) {
+ Status status = Status(StatusCode::kInvalidArgument, "message");
+ std::ostringstream os;
+ os << status << " annotation";
+ CHECK_STREAM_MESSAGE(status, os, "message");
+ CHECK_STREAM_MESSAGE(status, os, "annotation");
+}
+
+TEST(StatusMacro, ReturnIfError) {
+ auto returnIfError = [](iree_status_t status) -> iree_status_t {
+ IREE_RETURN_IF_ERROR(status, "annotation");
+ return iree_ok_status();
+ };
+ Status status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "message");
+ status = returnIfError(std::move(status));
+ EXPECT_THAT(status, StatusIs(StatusCode::kInvalidArgument));
+ CHECK_STATUS_MESSAGE(status, "message");
+ CHECK_STATUS_MESSAGE(status, "annotation");
+
+ IREE_EXPECT_OK(returnIfError(OkStatus()));
+}
+
+TEST(StatusMacro, ReturnIfErrorFormat) {
+ auto returnIfError = [](iree_status_t status) -> iree_status_t {
+ IREE_RETURN_IF_ERROR(status, "annotation %d %d %d", 1, 2, 3);
+ return iree_ok_status();
+ };
+ Status status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "message");
+ status = returnIfError(std::move(status));
+ EXPECT_THAT(status, StatusIs(StatusCode::kInvalidArgument));
+ CHECK_STATUS_MESSAGE(status, "message");
+ CHECK_STATUS_MESSAGE(status, "annotation 1 2 3");
+
+ IREE_EXPECT_OK(returnIfError(OkStatus()));
+}
+
+TEST(StatusMacro, AssignOrReturn) {
+ auto assignOrReturn = [](StatusOr<std::string> statusOr) -> iree_status_t {
+ IREE_ASSIGN_OR_RETURN(auto ret, std::move(statusOr));
+ (void)ret;
+ return iree_ok_status();
+ };
+ StatusOr<std::string> statusOr =
+ iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "message");
+ Status status = assignOrReturn(std::move(statusOr));
+ EXPECT_THAT(status, StatusIs(StatusCode::kInvalidArgument));
+ CHECK_STATUS_MESSAGE(status, "message");
+
+ IREE_EXPECT_OK(assignOrReturn("foo"));
+}
+
+} // namespace
+} // namespace iree
diff --git a/runtime/src/iree/base/string_builder.c b/runtime/src/iree/base/string_builder.c
new file mode 100644
index 0000000..590e1d5
--- /dev/null
+++ b/runtime/src/iree/base/string_builder.c
@@ -0,0 +1,151 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/string_builder.h"
+
+#include "iree/base/alignment.h"
+
+// Minimum alignment for storage buffer allocations.
+#define IREE_STRING_BUILDER_ALIGNMENT 128
+
+IREE_API_EXPORT void iree_string_builder_initialize(
+ iree_allocator_t allocator, iree_string_builder_t* out_builder) {
+ memset(out_builder, 0, sizeof(*out_builder));
+ out_builder->allocator = allocator;
+}
+
+IREE_API_EXPORT void iree_string_builder_initialize_with_storage(
+ char* buffer, iree_host_size_t buffer_capacity,
+ iree_string_builder_t* out_builder) {
+ iree_string_builder_initialize(iree_allocator_null(), out_builder);
+ out_builder->buffer = buffer;
+ out_builder->capacity = buffer_capacity;
+}
+
+IREE_API_EXPORT void iree_string_builder_deinitialize(
+ iree_string_builder_t* builder) {
+ if (builder->buffer != NULL) {
+ iree_allocator_free(builder->allocator, builder->buffer);
+ }
+ memset(builder, 0, sizeof(*builder));
+}
+
+IREE_API_EXPORT const char* iree_string_builder_buffer(
+ const iree_string_builder_t* builder) {
+ return builder->buffer;
+}
+
+IREE_API_EXPORT iree_host_size_t
+iree_string_builder_size(const iree_string_builder_t* builder) {
+ return builder->size;
+}
+
+IREE_API_EXPORT iree_host_size_t
+iree_string_builder_capacity(const iree_string_builder_t* builder) {
+ return builder->capacity;
+}
+
+IREE_API_EXPORT iree_string_view_t
+iree_string_builder_view(const iree_string_builder_t* builder) {
+ return iree_make_string_view(iree_string_builder_buffer(builder),
+ iree_string_builder_size(builder));
+}
+
+IREE_API_EXPORT char* iree_string_builder_take_storage(
+ iree_string_builder_t* builder) {
+ char* buffer = builder->buffer;
+ if (builder->size == 0) {
+ // In empty cases we return NULL and need to clean up inline as the user is
+ // expecting to be able to discard the builder after this returns.
+ if (builder->buffer != NULL) {
+ iree_allocator_free(builder->allocator, builder->buffer);
+ builder->buffer = NULL;
+ }
+ buffer = NULL;
+ }
+ builder->size = 0;
+ builder->capacity = 0;
+ builder->buffer = NULL;
+ return buffer;
+}
+
+IREE_API_EXPORT iree_status_t iree_string_builder_reserve(
+ iree_string_builder_t* builder, iree_host_size_t minimum_capacity) {
+ if (iree_allocator_is_null(builder->allocator)) return iree_ok_status();
+ iree_host_size_t new_capacity = builder->capacity;
+ if (builder->capacity < minimum_capacity) {
+ new_capacity =
+ iree_host_align(minimum_capacity, IREE_STRING_BUILDER_ALIGNMENT);
+ }
+ if (builder->capacity >= new_capacity) return iree_ok_status();
+ IREE_RETURN_IF_ERROR(iree_allocator_realloc(builder->allocator, new_capacity,
+ (void**)&builder->buffer));
+ builder->buffer[builder->size] = 0;
+ builder->capacity = new_capacity;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_string_builder_append_string(
+ iree_string_builder_t* builder, iree_string_view_t value) {
+ // Ensure capacity for the value + NUL terminator.
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_reserve(builder, builder->size + value.size + 1));
+ if (builder->buffer != NULL) {
+ // Only copy the bytes if we are not doing a size calculation.
+ memcpy(builder->buffer + builder->size, value.data, value.size);
+ builder->buffer[builder->size + value.size] = 0; // NUL
+ }
+ builder->size += value.size;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_string_builder_append_cstring(
+ iree_string_builder_t* builder, const char* value) {
+ return iree_string_builder_append_string(builder,
+ iree_make_cstring_view(value));
+}
+
+static iree_status_t iree_string_builder_append_format_impl(
+ iree_string_builder_t* builder, const char* format, va_list varargs_0,
+ va_list varargs_1) {
+ // Try to directly print into the buffer we have. This may work if we have
+ // capacity but otherwise will yield us the size we need to grow our buffer.
+ int n = vsnprintf(builder->buffer ? builder->buffer + builder->size : NULL,
+ builder->buffer ? builder->capacity - builder->size : 0,
+ format, varargs_0);
+ if (IREE_UNLIKELY(n < 0)) {
+ return iree_make_status(IREE_STATUS_INTERNAL, "printf try failed");
+ }
+ if (n < builder->capacity - builder->size) {
+ // Printed into the buffer.
+ builder->size += n;
+ return iree_ok_status();
+ }
+
+ // Reserve new minimum capacity.
+ IREE_RETURN_IF_ERROR(iree_string_builder_reserve(
+ builder, iree_string_builder_size(builder) + n + /*NUL*/ 1));
+
+ // Try printing again.
+ vsnprintf(builder->buffer ? builder->buffer + builder->size : NULL,
+ builder->buffer ? builder->capacity - builder->size : 0, format,
+ varargs_1);
+ builder->size += n;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t IREE_PRINTF_ATTRIBUTE(2, 3)
+ iree_string_builder_append_format(iree_string_builder_t* builder,
+ const char* format, ...) {
+ va_list varargs_0, varargs_1;
+ va_start(varargs_0, format);
+ va_start(varargs_1, format);
+ iree_status_t status = iree_string_builder_append_format_impl(
+ builder, format, varargs_0, varargs_1);
+ va_end(varargs_1);
+ va_end(varargs_0);
+ return status;
+}
diff --git a/runtime/src/iree/base/string_builder.h b/runtime/src/iree/base/string_builder.h
new file mode 100644
index 0000000..ff6eeba
--- /dev/null
+++ b/runtime/src/iree/base/string_builder.h
@@ -0,0 +1,126 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_STRING_BUILDER_H_
+#define IREE_BASE_STRING_BUILDER_H_
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "iree/base/allocator.h"
+#include "iree/base/attributes.h"
+#include "iree/base/status.h"
+#include "iree/base/string_view.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Lightweight string builder.
+// Used to dynamically produce strings in a growable buffer.
+//
+// Usage:
+// iree_string_builder_t builder;
+// iree_string_builder_initialize(iree_allocator_system(), &builder);
+// IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(&builder, "hel"));
+// IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(&builder, "lo"));
+// fprintf(stream, "%.*s", (int)iree_string_builder_size(&builder),
+// iree_string_builder_buffer(&builder));
+// iree_string_builder_deinitialize(&builder);
+//
+// Usage for preallocation:
+// iree_string_builder_t builder;
+// iree_string_builder_initialize(iree_allocator_null(), &builder);
+// IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(&builder, "123"));
+// // str_length is total number of characters (excluding NUL).
+// iree_host_size_t str_length = iree_string_builder_size(builder);
+// iree_string_builder_deinitialize(&builder);
+typedef struct iree_string_builder_t {
+ // Allocator used for buffer storage.
+ // May be iree_allocator_null() to have the builder total up the required
+ // size.
+ iree_allocator_t allocator;
+ // Allocated storage buffer, if any.
+ char* buffer;
+ // Total length of the string in the buffer in characters (excluding NUL).
+ iree_host_size_t size;
+ // Total allocated buffer capacity in bytes.
+ iree_host_size_t capacity;
+} iree_string_builder_t;
+
+// Initializes a string builder in |out_builder| with the given |allocator|.
+IREE_API_EXPORT void iree_string_builder_initialize(
+ iree_allocator_t allocator, iree_string_builder_t* out_builder);
+
+// Initializes a string builder in |out_builder| using the given storage.
+// Once the capacity is reached further appending will fail.
+IREE_API_EXPORT void iree_string_builder_initialize_with_storage(
+ char* buffer, iree_host_size_t buffer_capacity,
+ iree_string_builder_t* out_builder);
+
+// Deinitializes |builder| and releases allocated storage.
+IREE_API_EXPORT void iree_string_builder_deinitialize(
+ iree_string_builder_t* builder);
+
+// Returns a pointer into the builder storage.
+// The pointer is only valid so long as the string builder is initialized and
+// unmodified.
+IREE_API_EXPORT const char* iree_string_builder_buffer(
+ const iree_string_builder_t* builder);
+
+// Returns the total length of the string in the buffer in characters (excluding
+// NUL).
+IREE_API_EXPORT iree_host_size_t
+iree_string_builder_size(const iree_string_builder_t* builder);
+
+// Returns the total allocated buffer capacity in bytes.
+IREE_API_EXPORT iree_host_size_t
+iree_string_builder_capacity(const iree_string_builder_t* builder);
+
+// Returns a string view into the builder storage.
+// The pointer is only valid so long as the string builder is initialized and
+// unmodified.
+IREE_API_EXPORT iree_string_view_t
+iree_string_builder_view(const iree_string_builder_t* builder);
+
+// Releases the storage from the builder and returns ownership to the caller.
+// The caller must free the string using the same allocator used by the builder.
+// Returns NULL if the string builder is empty.
+//
+// Usage:
+// iree_string_builder_t builder;
+// iree_string_builder_initialize(iree_allocator_system(), &builder);
+// ...
+// char* buffer = iree_string_builder_take_storage(&builder);
+// iree_host_size_t buffer_size = iree_string_builder_size(&builder);
+// iree_string_builder_deinitialize(&builder);
+// ...
+// iree_allocator_free(iree_allocator_system(), buffer);
+IREE_API_EXPORT IREE_MUST_USE_RESULT char* iree_string_builder_take_storage(
+ iree_string_builder_t* builder);
+
+// Reserves storage for at least |minimum_capacity|.
+IREE_API_EXPORT iree_status_t iree_string_builder_reserve(
+ iree_string_builder_t* builder, iree_host_size_t minimum_capacity);
+
+// Appends a string to the builder.
+IREE_API_EXPORT iree_status_t iree_string_builder_append_string(
+ iree_string_builder_t* builder, iree_string_view_t value);
+
+// Appends a NUL-terminated C string to the builder.
+IREE_API_EXPORT iree_status_t iree_string_builder_append_cstring(
+ iree_string_builder_t* builder, const char* value);
+
+// Appends a printf-style formatted string to the builder.
+IREE_API_EXPORT IREE_PRINTF_ATTRIBUTE(2, 3) iree_status_t
+ iree_string_builder_append_format(iree_string_builder_t* builder,
+ const char* format, ...);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_STRING_BUILDER_H_
diff --git a/runtime/src/iree/base/string_builder_test.cc b/runtime/src/iree/base/string_builder_test.cc
new file mode 100644
index 0000000..fad7034
--- /dev/null
+++ b/runtime/src/iree/base/string_builder_test.cc
@@ -0,0 +1,164 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <string>
+
+#include "iree/base/api.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace {
+
+struct StringBuilder {
+ static StringBuilder MakeSystem() {
+ iree_string_builder_t builder;
+ iree_string_builder_initialize(iree_allocator_system(), &builder);
+ return StringBuilder(builder);
+ }
+
+ static StringBuilder MakeEmpty() {
+ iree_string_builder_t builder;
+ iree_string_builder_initialize(iree_allocator_null(), &builder);
+ return StringBuilder(builder);
+ }
+
+ explicit StringBuilder(iree_string_builder_t builder)
+ : builder(std::move(builder)) {}
+
+ ~StringBuilder() { iree_string_builder_deinitialize(&builder); }
+
+ operator iree_string_builder_t*() { return &builder; }
+
+ std::string ToString() const {
+ return std::string(builder.buffer, builder.size);
+ }
+
+ iree_string_builder_t builder;
+};
+
+TEST(StringBuilderTest, QueryEmpty) {
+ auto builder = StringBuilder::MakeEmpty();
+ EXPECT_EQ(iree_string_builder_buffer(builder),
+ static_cast<const char*>(NULL));
+ EXPECT_EQ(iree_string_builder_size(builder), 0);
+ EXPECT_EQ(iree_string_builder_capacity(builder), 0);
+ EXPECT_TRUE(iree_string_view_is_empty(iree_string_builder_view(builder)));
+ EXPECT_EQ(iree_string_builder_take_storage(builder),
+ static_cast<char*>(NULL));
+}
+
+TEST(StringBuilderTest, QueryAppendString) {
+ auto builder = StringBuilder::MakeEmpty();
+ EXPECT_EQ(iree_string_builder_size(builder), 0);
+ IREE_EXPECT_OK(iree_string_builder_append_cstring(builder, ""));
+ EXPECT_EQ(iree_string_builder_size(builder), 0);
+ IREE_EXPECT_OK(iree_string_builder_append_cstring(builder, "a"));
+ EXPECT_EQ(iree_string_builder_size(builder), 1);
+ IREE_EXPECT_OK(iree_string_builder_append_cstring(builder, "abc"));
+ EXPECT_EQ(iree_string_builder_size(builder), 1 + 3);
+ IREE_EXPECT_OK(iree_string_builder_append_cstring(builder, ""));
+ EXPECT_EQ(iree_string_builder_size(builder), 1 + 3);
+
+ char kLongString[1024];
+ memset(kLongString, 'x', IREE_ARRAYSIZE(kLongString));
+ IREE_EXPECT_OK(iree_string_builder_append_string(
+ builder,
+ iree_make_string_view(kLongString, IREE_ARRAYSIZE(kLongString))));
+ EXPECT_EQ(iree_string_builder_size(builder),
+ 1 + 3 + IREE_ARRAYSIZE(kLongString));
+}
+
+TEST(StringBuilderTest, QueryFormat) {
+ auto builder = StringBuilder::MakeEmpty();
+ EXPECT_EQ(iree_string_builder_size(builder), 0);
+ IREE_EXPECT_OK(iree_string_builder_append_format(builder, ""));
+ EXPECT_EQ(iree_string_builder_size(builder), 0);
+ IREE_EXPECT_OK(iree_string_builder_append_format(builder, "abc"));
+ EXPECT_EQ(iree_string_builder_size(builder), 3);
+ IREE_EXPECT_OK(iree_string_builder_append_format(builder, "a%cc", 'b'));
+ EXPECT_EQ(iree_string_builder_size(builder), 6);
+ IREE_EXPECT_OK(iree_string_builder_append_format(builder, "%*c", 1024, 'x'));
+ EXPECT_EQ(iree_string_builder_size(builder), 6 + 1024);
+}
+
+TEST(StringBuilderTest, Empty) {
+ auto builder = StringBuilder::MakeSystem();
+ EXPECT_EQ(iree_string_builder_size(builder), 0);
+ EXPECT_GE(iree_string_builder_capacity(builder), 0);
+ EXPECT_TRUE(iree_string_view_is_empty(iree_string_builder_view(builder)));
+ EXPECT_EQ(iree_string_builder_take_storage(builder),
+ static_cast<char*>(NULL));
+}
+
+TEST(StringBuilderTest, AppendString) {
+ auto builder = StringBuilder::MakeSystem();
+ EXPECT_EQ(iree_string_builder_size(builder), 0);
+ IREE_EXPECT_OK(iree_string_builder_append_cstring(builder, ""));
+ EXPECT_EQ(builder.ToString(), "");
+ IREE_EXPECT_OK(iree_string_builder_append_cstring(builder, "a"));
+ EXPECT_EQ(builder.ToString(), "a");
+ EXPECT_EQ(strlen(builder.builder.buffer), 1); // NUL check
+ IREE_EXPECT_OK(iree_string_builder_append_cstring(builder, "abc"));
+ EXPECT_EQ(builder.ToString(), "aabc");
+ EXPECT_EQ(strlen(builder.builder.buffer), 1 + 3); // NUL check
+ IREE_EXPECT_OK(iree_string_builder_append_cstring(builder, ""));
+ EXPECT_EQ(builder.ToString(), "aabc");
+ EXPECT_EQ(iree_string_builder_size(builder), 1 + 3);
+ EXPECT_EQ(strlen(builder.builder.buffer), 1 + 3); // NUL check
+
+ char kLongString[1024];
+ memset(kLongString, 'x', IREE_ARRAYSIZE(kLongString));
+ IREE_EXPECT_OK(iree_string_builder_append_string(
+ builder,
+ iree_make_string_view(kLongString, IREE_ARRAYSIZE(kLongString))));
+ EXPECT_EQ(iree_string_builder_size(builder),
+ 1 + 3 + IREE_ARRAYSIZE(kLongString));
+ EXPECT_EQ(strlen(builder.builder.buffer),
+ 1 + 3 + IREE_ARRAYSIZE(kLongString)); // NUL check
+ EXPECT_EQ(builder.ToString(),
+ std::string("aabc") +
+ std::string(kLongString, IREE_ARRAYSIZE(kLongString)));
+}
+
+TEST(StringBuilderTest, TakeStorage) {
+ auto builder = StringBuilder::MakeSystem();
+ EXPECT_EQ(iree_string_builder_size(builder), 0);
+ IREE_EXPECT_OK(iree_string_builder_append_cstring(builder, "a"));
+ EXPECT_EQ(builder.ToString(), "a");
+ IREE_EXPECT_OK(iree_string_builder_append_cstring(builder, "abc"));
+ EXPECT_EQ(builder.ToString(), "aabc");
+ EXPECT_EQ(iree_string_builder_size(builder), 1 + 3);
+ EXPECT_EQ(strlen(builder.builder.buffer),
+ 1 + 3); // NUL check
+
+ char* storage = iree_string_builder_take_storage(builder);
+ EXPECT_EQ(iree_string_builder_buffer(builder),
+ static_cast<const char*>(NULL));
+ EXPECT_EQ(iree_string_builder_size(builder), 0);
+ EXPECT_EQ(iree_string_builder_capacity(builder), 0);
+ EXPECT_NE(storage, static_cast<char*>(NULL));
+ EXPECT_STREQ(storage, "aabc");
+ EXPECT_EQ(builder.builder.buffer, static_cast<char*>(NULL));
+ iree_allocator_free(builder.builder.allocator, storage);
+}
+
+TEST(StringBuilderTest, Format) {
+ auto builder = StringBuilder::MakeSystem();
+ EXPECT_EQ(builder.ToString(), "");
+ IREE_EXPECT_OK(iree_string_builder_append_format(builder, ""));
+ EXPECT_EQ(builder.ToString(), "");
+ IREE_EXPECT_OK(iree_string_builder_append_format(builder, "abc"));
+ EXPECT_EQ(builder.ToString(), "abc");
+ IREE_EXPECT_OK(iree_string_builder_append_format(builder, "a%cc", 'b'));
+ EXPECT_EQ(builder.ToString(), "abcabc");
+ IREE_EXPECT_OK(iree_string_builder_append_format(builder, "%*c", 1024, 'x'));
+ EXPECT_EQ(iree_string_builder_size(builder), 6 + 1024);
+ EXPECT_EQ(strlen(builder.builder.buffer), 6 + 1024); // NUL check
+ EXPECT_EQ(builder.ToString(),
+ std::string("abcabc") + std::string(1023, ' ') + std::string("x"));
+}
+
+} // namespace
diff --git a/runtime/src/iree/base/string_view.c b/runtime/src/iree/base/string_view.c
new file mode 100644
index 0000000..f117939
--- /dev/null
+++ b/runtime/src/iree/base/string_view.c
@@ -0,0 +1,387 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/string_view.h"
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+
+static inline size_t iree_min_host_size(size_t a, size_t b) {
+ return a < b ? a : b;
+}
+
+IREE_API_EXPORT bool iree_string_view_equal(iree_string_view_t lhs,
+ iree_string_view_t rhs) {
+ if (lhs.size != rhs.size) return false;
+ for (iree_host_size_t i = 0; i < lhs.size; ++i) {
+ if (lhs.data[i] != rhs.data[i]) return false;
+ }
+ return true;
+}
+
+IREE_API_EXPORT int iree_string_view_compare(iree_string_view_t lhs,
+ iree_string_view_t rhs) {
+ iree_host_size_t min_size = iree_min_host_size(lhs.size, rhs.size);
+ int cmp = strncmp(lhs.data, rhs.data, min_size);
+ if (cmp != 0) {
+ return cmp;
+ } else if (lhs.size == rhs.size) {
+ return 0;
+ }
+ return lhs.size < rhs.size ? -1 : 1;
+}
+
+IREE_API_EXPORT iree_host_size_t iree_string_view_find_char(
+ iree_string_view_t value, char c, iree_host_size_t pos) {
+ if (iree_string_view_is_empty(value) || pos >= value.size) {
+ return IREE_STRING_VIEW_NPOS;
+ }
+ const char* result =
+ (const char*)(memchr(value.data + pos, c, value.size - pos));
+ return result != NULL ? result - value.data : IREE_STRING_VIEW_NPOS;
+}
+
+IREE_API_EXPORT iree_host_size_t iree_string_view_find_first_of(
+ iree_string_view_t value, iree_string_view_t s, iree_host_size_t pos) {
+ if (iree_string_view_is_empty(value) || iree_string_view_is_empty(s)) {
+ return IREE_STRING_VIEW_NPOS;
+ }
+ if (s.size == 1) {
+ // Avoid the cost of the lookup table for a single-character search.
+ return iree_string_view_find_char(value, s.data[0], pos);
+ }
+ bool lookup_table[UCHAR_MAX + 1] = {0};
+ for (iree_host_size_t i = 0; i < s.size; ++i) {
+ lookup_table[(uint8_t)s.data[i]] = true;
+ }
+ for (iree_host_size_t i = pos; i < value.size; ++i) {
+ if (lookup_table[(uint8_t)value.data[i]]) {
+ return i;
+ }
+ }
+ return IREE_STRING_VIEW_NPOS;
+}
+
+IREE_API_EXPORT iree_host_size_t iree_string_view_find_last_of(
+ iree_string_view_t value, iree_string_view_t s, iree_host_size_t pos) {
+ if (iree_string_view_is_empty(value) || iree_string_view_is_empty(s)) {
+ return IREE_STRING_VIEW_NPOS;
+ }
+ bool lookup_table[UCHAR_MAX + 1] = {0};
+ for (iree_host_size_t i = 0; i < s.size; ++i) {
+ lookup_table[(uint8_t)s.data[i]] = true;
+ }
+ pos = iree_min(pos, value.size) + 1;
+ iree_host_size_t i = pos;
+ while (i != 0) {
+ --i;
+ if (lookup_table[(uint8_t)value.data[i]]) {
+ return i;
+ }
+ }
+ return IREE_STRING_VIEW_NPOS;
+}
+
+IREE_API_EXPORT bool iree_string_view_starts_with(iree_string_view_t value,
+ iree_string_view_t prefix) {
+ if (!value.data || !prefix.data || !prefix.size || prefix.size > value.size) {
+ return false;
+ }
+ return strncmp(value.data, prefix.data, prefix.size) == 0;
+}
+
+IREE_API_EXPORT bool iree_string_view_ends_with(iree_string_view_t value,
+ iree_string_view_t suffix) {
+ if (!value.data || !suffix.data || !suffix.size || suffix.size > value.size) {
+ return false;
+ }
+ return strncmp(value.data + value.size - suffix.size, suffix.data,
+ suffix.size) == 0;
+}
+
+IREE_API_EXPORT iree_string_view_t
+iree_string_view_remove_prefix(iree_string_view_t value, iree_host_size_t n) {
+ if (n >= value.size) {
+ return iree_string_view_empty();
+ }
+ return iree_make_string_view(value.data + n, value.size - n);
+}
+
+IREE_API_EXPORT iree_string_view_t
+iree_string_view_remove_suffix(iree_string_view_t value, iree_host_size_t n) {
+ if (n >= value.size) {
+ return iree_string_view_empty();
+ }
+ return iree_make_string_view(value.data, value.size - n);
+}
+
+IREE_API_EXPORT iree_string_view_t iree_string_view_strip_prefix(
+ iree_string_view_t value, iree_string_view_t prefix) {
+ if (iree_string_view_starts_with(value, prefix)) {
+ return iree_string_view_remove_prefix(value, prefix.size);
+ }
+ return value;
+}
+
+IREE_API_EXPORT iree_string_view_t iree_string_view_strip_suffix(
+ iree_string_view_t value, iree_string_view_t suffix) {
+ if (iree_string_view_ends_with(value, suffix)) {
+ return iree_string_view_remove_suffix(value, suffix.size);
+ }
+ return value;
+}
+
+IREE_API_EXPORT bool iree_string_view_consume_prefix(
+ iree_string_view_t* value, iree_string_view_t prefix) {
+ if (iree_string_view_starts_with(*value, prefix)) {
+ *value = iree_string_view_remove_prefix(*value, prefix.size);
+ return true;
+ }
+ return false;
+}
+
+IREE_API_EXPORT bool iree_string_view_consume_suffix(
+ iree_string_view_t* value, iree_string_view_t suffix) {
+ if (iree_string_view_ends_with(*value, suffix)) {
+ *value = iree_string_view_remove_suffix(*value, suffix.size);
+ return true;
+ }
+ return false;
+}
+
+IREE_API_EXPORT iree_string_view_t
+iree_string_view_trim(iree_string_view_t value) {
+ if (iree_string_view_is_empty(value)) return value;
+ iree_host_size_t start = 0;
+ iree_host_size_t end = value.size - 1;
+ while (value.size > 0 && start <= end) {
+ if (isspace(value.data[start])) {
+ start++;
+ } else {
+ break;
+ }
+ }
+ while (end > start) {
+ if (isspace(value.data[end])) {
+ --end;
+ } else {
+ break;
+ }
+ }
+ return iree_make_string_view(value.data + start, end - start + 1);
+}
+
+IREE_API_EXPORT iree_string_view_t iree_string_view_substr(
+ iree_string_view_t value, iree_host_size_t pos, iree_host_size_t n) {
+ pos = iree_min_host_size(pos, value.size);
+ n = iree_min_host_size(n, value.size - pos);
+ return iree_make_string_view(value.data + pos, n);
+}
+
+IREE_API_EXPORT intptr_t iree_string_view_split(iree_string_view_t value,
+ char split_char,
+ iree_string_view_t* out_lhs,
+ iree_string_view_t* out_rhs) {
+ *out_lhs = iree_string_view_empty();
+ *out_rhs = iree_string_view_empty();
+ if (!value.data || !value.size) {
+ return -1;
+ }
+ const void* first_ptr = memchr(value.data, split_char, value.size);
+ if (!first_ptr) {
+ *out_lhs = value;
+ return -1;
+ }
+ intptr_t offset = (intptr_t)((const char*)(first_ptr)-value.data);
+ if (out_lhs) {
+ out_lhs->data = value.data;
+ out_lhs->size = offset;
+ }
+ if (out_rhs) {
+ out_rhs->data = value.data + offset + 1;
+ out_rhs->size = value.size - offset - 1;
+ }
+ return offset;
+}
+
+IREE_API_EXPORT void iree_string_view_replace_char(iree_string_view_t value,
+ char old_char,
+ char new_char) {
+ char* p = (char*)value.data;
+ for (iree_host_size_t i = 0; i < value.size; ++i) {
+ if (p[i] == old_char) p[i] = new_char;
+ }
+}
+
+static bool iree_string_view_match_pattern_impl(iree_string_view_t value,
+ iree_string_view_t pattern) {
+ iree_host_size_t next_char_index = iree_string_view_find_first_of(
+ pattern, iree_make_cstring_view("*?"), /*pos=*/0);
+ if (next_char_index == IREE_STRING_VIEW_NPOS) {
+ return iree_string_view_equal(value, pattern);
+ } else if (next_char_index > 0) {
+ iree_string_view_t value_prefix =
+ iree_string_view_substr(value, 0, next_char_index);
+ iree_string_view_t pattern_prefix =
+ iree_string_view_substr(pattern, 0, next_char_index);
+ if (!iree_string_view_equal(value_prefix, pattern_prefix)) {
+ return false;
+ }
+ value =
+ iree_string_view_substr(value, next_char_index, IREE_STRING_VIEW_NPOS);
+ pattern = iree_string_view_substr(pattern, next_char_index,
+ IREE_STRING_VIEW_NPOS);
+ }
+ if (iree_string_view_is_empty(value) && iree_string_view_is_empty(pattern)) {
+ return true;
+ }
+ char pattern_char = pattern.data[0];
+ if (pattern_char == '*' && pattern.size > 1 &&
+ iree_string_view_is_empty(value)) {
+ return false;
+ } else if (pattern_char == '*' && pattern.size == 1) {
+ return true;
+ } else if (pattern_char == '?' || value.data[0] == pattern_char) {
+ return iree_string_view_match_pattern_impl(
+ iree_string_view_substr(value, 1, IREE_STRING_VIEW_NPOS),
+ iree_string_view_substr(pattern, 1, IREE_STRING_VIEW_NPOS));
+ } else if (pattern_char == '*') {
+ return iree_string_view_match_pattern_impl(
+ value,
+ iree_string_view_substr(pattern, 1, IREE_STRING_VIEW_NPOS)) ||
+ iree_string_view_match_pattern_impl(
+ iree_string_view_substr(value, 1, IREE_STRING_VIEW_NPOS),
+ pattern);
+ }
+ return false;
+}
+
+IREE_API_EXPORT bool iree_string_view_match_pattern(
+ iree_string_view_t value, iree_string_view_t pattern) {
+ return iree_string_view_match_pattern_impl(value, pattern);
+}
+
+IREE_API_EXPORT iree_host_size_t iree_string_view_append_to_buffer(
+ iree_string_view_t source_value, iree_string_view_t* target_value,
+ char* buffer) {
+ memcpy(buffer, source_value.data, source_value.size);
+ target_value->data = buffer;
+ target_value->size = source_value.size;
+ return source_value.size;
+}
+
+// NOTE: these implementations aren't great due to the enforced memcpy we
+// perform. These _should_ never be on a hot path, though, so this keeps our
+// code size small.
+
+IREE_API_EXPORT bool iree_string_view_atoi_int32(iree_string_view_t value,
+ int32_t* out_value) {
+ // Copy to scratch memory with a NUL terminator.
+ char temp[16] = {0};
+ if (value.size >= IREE_ARRAYSIZE(temp)) return false;
+ memcpy(temp, value.data, value.size);
+
+ // Attempt to parse.
+ errno = 0;
+ char* end = NULL;
+ long parsed_value = strtol(temp, &end, 0);
+ if (temp == end) return false;
+ if ((parsed_value == LONG_MIN || parsed_value == LONG_MAX) &&
+ errno == ERANGE) {
+ return false;
+ }
+ *out_value = (int32_t)parsed_value;
+ return parsed_value != 0 || errno == 0;
+}
+
+IREE_API_EXPORT bool iree_string_view_atoi_uint32(iree_string_view_t value,
+ uint32_t* out_value) {
+ // Copy to scratch memory with a NUL terminator.
+ char temp[16] = {0};
+ if (value.size >= IREE_ARRAYSIZE(temp)) return false;
+ memcpy(temp, value.data, value.size);
+
+ // Attempt to parse.
+ errno = 0;
+ char* end = NULL;
+ unsigned long parsed_value = strtoul(temp, &end, 0);
+ if (temp == end) return false;
+ if (parsed_value == ULONG_MAX && errno == ERANGE) return false;
+ *out_value = (uint32_t)parsed_value;
+ return parsed_value != 0 || errno == 0;
+}
+
+IREE_API_EXPORT bool iree_string_view_atoi_int64(iree_string_view_t value,
+ int64_t* out_value) {
+ // Copy to scratch memory with a NUL terminator.
+ char temp[32] = {0};
+ if (value.size >= IREE_ARRAYSIZE(temp)) return false;
+ memcpy(temp, value.data, value.size);
+
+ // Attempt to parse.
+ errno = 0;
+ char* end = NULL;
+ long long parsed_value = strtoll(temp, &end, 0);
+ if (temp == end) return false;
+ if ((parsed_value == LLONG_MIN || parsed_value == LLONG_MAX) &&
+ errno == ERANGE) {
+ return false;
+ }
+ *out_value = (int64_t)parsed_value;
+ return parsed_value != 0 || errno == 0;
+}
+
+IREE_API_EXPORT bool iree_string_view_atoi_uint64(iree_string_view_t value,
+ uint64_t* out_value) {
+ // Copy to scratch memory with a NUL terminator.
+ char temp[32] = {0};
+ if (value.size >= IREE_ARRAYSIZE(temp)) return false;
+ memcpy(temp, value.data, value.size);
+
+ // Attempt to parse.
+ errno = 0;
+ char* end = NULL;
+ unsigned long long parsed_value = strtoull(temp, &end, 0);
+ if (temp == end) return false;
+ if (parsed_value == ULLONG_MAX && errno == ERANGE) return false;
+ *out_value = (uint64_t)parsed_value;
+ return parsed_value != 0 || errno == 0;
+}
+
+IREE_API_EXPORT bool iree_string_view_atof(iree_string_view_t value,
+ float* out_value) {
+ // Copy to scratch memory with a NUL terminator.
+ char temp[32] = {0};
+ if (value.size >= IREE_ARRAYSIZE(temp)) return false;
+ memcpy(temp, value.data, value.size);
+
+ // Attempt to parse.
+ errno = 0;
+ char* end = NULL;
+ *out_value = strtof(temp, &end);
+ if (temp == end) return false;
+ return *out_value != 0 || errno == 0;
+}
+
+IREE_API_EXPORT bool iree_string_view_atod(iree_string_view_t value,
+ double* out_value) {
+ // Copy to scratch memory with a NUL terminator.
+ char temp[32] = {0};
+ if (value.size >= IREE_ARRAYSIZE(temp)) return false;
+ memcpy(temp, value.data, value.size);
+
+ // Attempt to parse.
+ errno = 0;
+ char* end = NULL;
+ *out_value = strtod(temp, &end);
+ if (temp == end) return false;
+ return *out_value != 0 || errno == 0;
+}
diff --git a/runtime/src/iree/base/string_view.h b/runtime/src/iree/base/string_view.h
new file mode 100644
index 0000000..5d191a4
--- /dev/null
+++ b/runtime/src/iree/base/string_view.h
@@ -0,0 +1,176 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_STRING_VIEW_H_
+#define IREE_BASE_STRING_VIEW_H_
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iree/base/attributes.h"
+#include "iree/base/config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#define IREE_STRING_VIEW_NPOS SIZE_MAX
+
+// A string view (ala std::string_view) into a non-NUL-terminated string.
+typedef struct iree_string_view_t {
+ const char* data;
+ iree_host_size_t size;
+} iree_string_view_t;
+
+// Returns an empty string view ("").
+static inline iree_string_view_t iree_string_view_empty(void) {
+ iree_string_view_t v = {0, 0};
+ return v;
+}
+
+// Returns true if the given string view is the empty string.
+#define iree_string_view_is_empty(sv) (((sv).data == NULL) || ((sv).size == 0))
+
+static inline iree_string_view_t iree_make_string_view(
+ const char* str, iree_host_size_t str_length) {
+ iree_string_view_t v = {str, str_length};
+ return v;
+}
+
+// Returns a string view initialized with a reference to the given
+// NUL-terminated string literal.
+static inline iree_string_view_t iree_make_cstring_view(const char* str) {
+ iree_string_view_t v = {str, strlen(str)};
+ return v;
+}
+
+#define iree_string_view_literal(str) \
+ { .data = (str), .size = IREE_ARRAYSIZE(str) - 1 }
+
+// Returns a string view initialized with the given cstring.
+#define IREE_SV(cstr) iree_make_cstring_view(cstr)
+
+// Returns a string view initialized with the given string literal.
+#define IREE_SVL(cstr) iree_string_view_literal(cstr)
+
+// Returns true if the two strings are equal (compare == 0).
+IREE_API_EXPORT bool iree_string_view_equal(iree_string_view_t lhs,
+ iree_string_view_t rhs);
+
+// Like std::string::compare but with iree_string_view_t values.
+IREE_API_EXPORT int iree_string_view_compare(iree_string_view_t lhs,
+ iree_string_view_t rhs);
+
+// Finds the first occurrence of |c| in |value| starting at |pos|.
+// Returns the found character position or IREE_STRING_VIEW_NPOS if not found.
+IREE_API_EXPORT iree_host_size_t iree_string_view_find_char(
+ iree_string_view_t value, char c, iree_host_size_t pos);
+
+// Returns the index of the first occurrence of one of the characters in |s| or
+// IREE_STRING_VIEW_NPOS if none of the characters were found.
+IREE_API_EXPORT iree_host_size_t iree_string_view_find_first_of(
+ iree_string_view_t value, iree_string_view_t s, iree_host_size_t pos);
+
+// Returns the index of the last occurrence of one of the characters in |s| or
+// IREE_STRING_VIEW_NPOS if none of the characters were found.
+IREE_API_EXPORT iree_host_size_t iree_string_view_find_last_of(
+ iree_string_view_t value, iree_string_view_t s, iree_host_size_t pos);
+
+// Returns true if the string starts with the given prefix.
+IREE_API_EXPORT bool iree_string_view_starts_with(iree_string_view_t value,
+ iree_string_view_t prefix);
+
+// Returns true if the string starts with the given suffix.
+IREE_API_EXPORT bool iree_string_view_ends_with(iree_string_view_t value,
+ iree_string_view_t suffix);
+
+// Removes the first |n| characters from the string view (not the data).
+IREE_API_EXPORT iree_string_view_t
+iree_string_view_remove_prefix(iree_string_view_t value, iree_host_size_t n);
+
+// Removes the last |n| characters from the string view (not the data).
+IREE_API_EXPORT iree_string_view_t
+iree_string_view_remove_suffix(iree_string_view_t value, iree_host_size_t n);
+
+// Removes the given substring prefix from the string view if present.
+IREE_API_EXPORT iree_string_view_t iree_string_view_strip_prefix(
+ iree_string_view_t value, iree_string_view_t prefix);
+
+// Removes the given substring suffix from the string view if present.
+IREE_API_EXPORT iree_string_view_t iree_string_view_strip_suffix(
+ iree_string_view_t value, iree_string_view_t suffix);
+
+// Removes the given substring prefix from the string view if present in-place.
+// Returns true if the strip succeeded.
+IREE_API_EXPORT bool iree_string_view_consume_prefix(iree_string_view_t* value,
+ iree_string_view_t prefix);
+
+// Removes the given substring suffix from the string view if present in-place.
+// Returns true if the strip succeeded.
+IREE_API_EXPORT bool iree_string_view_consume_suffix(iree_string_view_t* value,
+ iree_string_view_t suffix);
+
+// Removes leading and trailing whitespace.
+IREE_API_EXPORT iree_string_view_t
+iree_string_view_trim(iree_string_view_t value);
+
+// Returns a substring of the string view at offset |pos| and length |n|.
+// Use |n| == INTPTR_MAX to take the remainder of the string after |pos|.
+// Returns empty string on failure.
+IREE_API_EXPORT iree_string_view_t iree_string_view_substr(
+ iree_string_view_t value, iree_host_size_t pos, iree_host_size_t n);
+
+// Splits |value| into two parts based on the first occurrence of |split_char|.
+// Returns the index of the |split_char| in the original |value| or -1 if not
+// found.
+IREE_API_EXPORT intptr_t iree_string_view_split(iree_string_view_t value,
+ char split_char,
+ iree_string_view_t* out_lhs,
+ iree_string_view_t* out_rhs);
+
+// Replaces all occurrences of |old_char| with |new_char|.
+IREE_API_EXPORT void iree_string_view_replace_char(iree_string_view_t value,
+ char old_char,
+ char new_char);
+
+// Returns true if the given |value| matches |pattern| (normal * and ? rules).
+// This accepts wildcards in the form of '*' and '?' for any delimited value.
+// '*' will match zero or more of any character and '?' will match exactly one
+// of any character.
+//
+// For example,
+// 'foo-*-bar' matches: 'foo-123-bar', 'foo-456-789-bar'
+// 'foo-10?' matches: 'foo-101', 'foo-102'
+IREE_API_EXPORT bool iree_string_view_match_pattern(iree_string_view_t value,
+ iree_string_view_t pattern);
+
+// Copies the string bytes into the target buffer and returns the number of
+// characters copied. Does not include a NUL terminator.
+IREE_API_EXPORT iree_host_size_t iree_string_view_append_to_buffer(
+ iree_string_view_t source_value, iree_string_view_t* target_value,
+ char* buffer);
+
+IREE_API_EXPORT bool iree_string_view_atoi_int32(iree_string_view_t value,
+ int32_t* out_value);
+IREE_API_EXPORT bool iree_string_view_atoi_uint32(iree_string_view_t value,
+ uint32_t* out_value);
+IREE_API_EXPORT bool iree_string_view_atoi_int64(iree_string_view_t value,
+ int64_t* out_value);
+IREE_API_EXPORT bool iree_string_view_atoi_uint64(iree_string_view_t value,
+ uint64_t* out_value);
+IREE_API_EXPORT bool iree_string_view_atof(iree_string_view_t value,
+ float* out_value);
+IREE_API_EXPORT bool iree_string_view_atod(iree_string_view_t value,
+ double* out_value);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_STRING_VIEW_H_
diff --git a/runtime/src/iree/base/string_view_test.cc b/runtime/src/iree/base/string_view_test.cc
new file mode 100644
index 0000000..ac5a713
--- /dev/null
+++ b/runtime/src/iree/base/string_view_test.cc
@@ -0,0 +1,365 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <string>
+
+#include "iree/base/api.h"
+#include "iree/testing/gtest.h"
+
+namespace {
+
+std::string ToString(iree_string_view_t value) {
+ return std::string(value.data, value.size);
+}
+
+TEST(StringViewTest, Equal) {
+ auto equal = [](const char* lhs, const char* rhs) -> bool {
+ return iree_string_view_equal(iree_make_cstring_view(lhs),
+ iree_make_cstring_view(rhs));
+ };
+ EXPECT_TRUE(equal("", ""));
+ EXPECT_FALSE(equal("a", ""));
+ EXPECT_FALSE(equal("", "a"));
+ EXPECT_TRUE(equal("a", "a"));
+ EXPECT_FALSE(equal("a", "ab"));
+ EXPECT_FALSE(equal("b", "ab"));
+ EXPECT_TRUE(equal("abc", "abc"));
+ EXPECT_FALSE(equal("abc", "aBc"));
+}
+
+TEST(StringViewTest, FindChar) {
+ auto find_char = [](const char* value, char c, iree_host_size_t pos) {
+ return iree_string_view_find_char(iree_make_cstring_view(value), c, pos);
+ };
+ EXPECT_EQ(find_char("", 'x', 0), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_char("", 'x', 1), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_char("", 'x', IREE_STRING_VIEW_NPOS), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_char("x", 'x', 0), 0);
+ EXPECT_EQ(find_char("x", 'x', 1), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_char("x", 'x', IREE_STRING_VIEW_NPOS), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_char("abc", 'x', 0), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_char("abc", 'x', 1), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_char("abc", 'x', IREE_STRING_VIEW_NPOS),
+ IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_char("axbxc", 'x', 0), 1);
+ EXPECT_EQ(find_char("axbxc", 'x', 1), 1);
+ EXPECT_EQ(find_char("axbxc", 'x', 2), 3);
+ EXPECT_EQ(find_char("axbxc", 'x', 3), 3);
+ EXPECT_EQ(find_char("axbxc", 'x', 4), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_char("axbxc", 'x', IREE_STRING_VIEW_NPOS),
+ IREE_STRING_VIEW_NPOS);
+}
+
+TEST(StringViewTest, FindFirstOf) {
+ auto find_first_of = [](const char* value, const char* s,
+ iree_host_size_t pos) {
+ return iree_string_view_find_first_of(iree_make_cstring_view(value),
+ iree_make_cstring_view(s), pos);
+ };
+ EXPECT_EQ(find_first_of("", "", 0), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("", "", 1), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("", "", IREE_STRING_VIEW_NPOS),
+ IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("", "x", 0), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("", "x", 1), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("", "x", IREE_STRING_VIEW_NPOS),
+ IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("x", "x", 0), 0);
+ EXPECT_EQ(find_first_of("x", "x", 1), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("x", "x", IREE_STRING_VIEW_NPOS),
+ IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("x", "", 0), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("x", "", 1), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("x", "", IREE_STRING_VIEW_NPOS),
+ IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("abc", "x", 0), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("abc", "x", 1), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("abc", "x", IREE_STRING_VIEW_NPOS),
+ IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("axbxc", "xy", 0), 1);
+ EXPECT_EQ(find_first_of("axbxc", "xy", 1), 1);
+ EXPECT_EQ(find_first_of("axbxc", "xy", 2), 3);
+ EXPECT_EQ(find_first_of("axbxc", "xy", 3), 3);
+ EXPECT_EQ(find_first_of("axbxc", "xy", 4), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("axbxc", "xy", IREE_STRING_VIEW_NPOS),
+ IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("aybxc", "xy", 0), 1);
+ EXPECT_EQ(find_first_of("aybxc", "xy", 1), 1);
+ EXPECT_EQ(find_first_of("aybxc", "xy", 2), 3);
+ EXPECT_EQ(find_first_of("aybxc", "xy", 3), 3);
+ EXPECT_EQ(find_first_of("aybxc", "xy", 4), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_first_of("aybxc", "xy", IREE_STRING_VIEW_NPOS),
+ IREE_STRING_VIEW_NPOS);
+}
+
+TEST(StringViewTest, FindLastOf) {
+ auto find_last_of = [](const char* value, const char* s,
+ iree_host_size_t pos) {
+ return iree_string_view_find_last_of(iree_make_cstring_view(value),
+ iree_make_cstring_view(s), pos);
+ };
+ EXPECT_EQ(find_last_of("", "", 0), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("", "", 1), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("", "", IREE_STRING_VIEW_NPOS), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("", "x", 0), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("", "x", 1), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("", "x", IREE_STRING_VIEW_NPOS),
+ IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("x", "x", 0), 0);
+ EXPECT_EQ(find_last_of("x", "x", 1), 0);
+ EXPECT_EQ(find_last_of("x", "x", IREE_STRING_VIEW_NPOS), 0);
+ EXPECT_EQ(find_last_of("x", "", 0), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("x", "", 1), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("x", "", IREE_STRING_VIEW_NPOS),
+ IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("abc", "x", 0), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("abc", "x", 1), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("abc", "x", IREE_STRING_VIEW_NPOS),
+ IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("axbxc", "xy", 0), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("axbxc", "xy", 1), 1);
+ EXPECT_EQ(find_last_of("axbxc", "xy", 2), 1);
+ EXPECT_EQ(find_last_of("axbxc", "xy", 3), 3);
+ EXPECT_EQ(find_last_of("axbxc", "xy", 4), 3);
+ EXPECT_EQ(find_last_of("axbxc", "xy", IREE_STRING_VIEW_NPOS), 3);
+ EXPECT_EQ(find_last_of("aybxc", "xy", 0), IREE_STRING_VIEW_NPOS);
+ EXPECT_EQ(find_last_of("aybxc", "xy", 1), 1);
+ EXPECT_EQ(find_last_of("aybxc", "xy", 2), 1);
+ EXPECT_EQ(find_last_of("aybxc", "xy", 3), 3);
+ EXPECT_EQ(find_last_of("aybxc", "xy", 4), 3);
+ EXPECT_EQ(find_last_of("aybxc", "xy", IREE_STRING_VIEW_NPOS), 3);
+}
+
+TEST(StringViewTest, StartsWith) {
+ auto starts_with = [](const char* value, const char* prefix) -> bool {
+ return iree_string_view_starts_with(iree_make_cstring_view(value),
+ iree_make_cstring_view(prefix));
+ };
+ EXPECT_TRUE(starts_with("a", "a"));
+ EXPECT_TRUE(starts_with("ab", "a"));
+ EXPECT_TRUE(starts_with("ab", "ab"));
+ EXPECT_TRUE(starts_with("abc", "ab"));
+ EXPECT_TRUE(starts_with("abc", "abc"));
+ EXPECT_FALSE(starts_with("abc", ""));
+ EXPECT_FALSE(starts_with("", ""));
+ EXPECT_FALSE(starts_with("", "a"));
+ EXPECT_FALSE(starts_with("", "abc"));
+ EXPECT_FALSE(starts_with("abc", "b"));
+ EXPECT_FALSE(starts_with("abc", "bc"));
+ EXPECT_FALSE(starts_with("a", "abc"));
+}
+
+TEST(StringViewTest, EndsWith) {
+ auto ends_with = [](const char* value, const char* suffix) -> bool {
+ return iree_string_view_ends_with(iree_make_cstring_view(value),
+ iree_make_cstring_view(suffix));
+ };
+ EXPECT_TRUE(ends_with("a", "a"));
+ EXPECT_TRUE(ends_with("ab", "b"));
+ EXPECT_TRUE(ends_with("ab", "ab"));
+ EXPECT_TRUE(ends_with("abc", "bc"));
+ EXPECT_TRUE(ends_with("abc", "c"));
+ EXPECT_FALSE(ends_with("abc", ""));
+ EXPECT_FALSE(ends_with("", ""));
+ EXPECT_FALSE(ends_with("", "a"));
+ EXPECT_FALSE(ends_with("", "abc"));
+ EXPECT_FALSE(ends_with("abc", "b"));
+ EXPECT_FALSE(ends_with("abc", "ab"));
+ EXPECT_FALSE(ends_with("a", "abc"));
+}
+
+TEST(StringViewTest, RemovePrefix) {
+ auto remove_prefix = [](const char* value,
+ iree_host_size_t n) -> std::string {
+ return ToString(
+ iree_string_view_remove_prefix(iree_make_cstring_view(value), n));
+ };
+ EXPECT_EQ(remove_prefix("", 0), "");
+ EXPECT_EQ(remove_prefix("", 1), "");
+ EXPECT_EQ(remove_prefix("a", 10), "");
+ EXPECT_EQ(remove_prefix("ab", 1), "b");
+ EXPECT_EQ(remove_prefix("ab", 2), "");
+ EXPECT_EQ(remove_prefix("abcdef", 2), "cdef");
+}
+
+TEST(StringViewTest, RemoveSuffix) {
+ auto remove_suffix = [](const char* value,
+ iree_host_size_t n) -> std::string {
+ return ToString(
+ iree_string_view_remove_suffix(iree_make_cstring_view(value), n));
+ };
+ EXPECT_EQ(remove_suffix("", 0), "");
+ EXPECT_EQ(remove_suffix("", 1), "");
+ EXPECT_EQ(remove_suffix("a", 10), "");
+ EXPECT_EQ(remove_suffix("ab", 1), "a");
+ EXPECT_EQ(remove_suffix("ab", 2), "");
+ EXPECT_EQ(remove_suffix("abcdef", 2), "abcd");
+}
+
+TEST(StringViewTest, StripPrefix) {
+ auto strip_prefix = [](const char* value, const char* prefix) -> std::string {
+ return ToString(iree_string_view_strip_prefix(
+ iree_make_cstring_view(value), iree_make_cstring_view(prefix)));
+ };
+ EXPECT_EQ(strip_prefix("", ""), "");
+ EXPECT_EQ(strip_prefix("", "a"), "");
+ EXPECT_EQ(strip_prefix("a", ""), "a");
+ EXPECT_EQ(strip_prefix("a", "a"), "");
+ EXPECT_EQ(strip_prefix("ab", "a"), "b");
+ EXPECT_EQ(strip_prefix("ab", "b"), "ab");
+ EXPECT_EQ(strip_prefix("ab", "ab"), "");
+ EXPECT_EQ(strip_prefix("ab", "abc"), "ab");
+ EXPECT_EQ(strip_prefix("abcdef", "ab"), "cdef");
+ EXPECT_EQ(strip_prefix("abcdef", "bc"), "abcdef");
+}
+
+TEST(StringViewTest, StripSuffix) {
+ auto strip_suffix = [](const char* value, const char* suffix) -> std::string {
+ return ToString(iree_string_view_strip_suffix(
+ iree_make_cstring_view(value), iree_make_cstring_view(suffix)));
+ };
+ EXPECT_EQ(strip_suffix("", ""), "");
+ EXPECT_EQ(strip_suffix("", "a"), "");
+ EXPECT_EQ(strip_suffix("a", ""), "a");
+ EXPECT_EQ(strip_suffix("a", "a"), "");
+ EXPECT_EQ(strip_suffix("ab", "a"), "ab");
+ EXPECT_EQ(strip_suffix("ab", "b"), "a");
+ EXPECT_EQ(strip_suffix("ab", "ab"), "");
+ EXPECT_EQ(strip_suffix("ab", "abc"), "ab");
+ EXPECT_EQ(strip_suffix("abcdef", "ef"), "abcd");
+ EXPECT_EQ(strip_suffix("abcdef", "de"), "abcdef");
+}
+
+TEST(StringViewTest, ConsumePrefix) {
+ auto consume_prefix = [](const char* value,
+ const char* prefix) -> std::string {
+ iree_string_view_t value_sv = iree_make_cstring_view(value);
+ if (iree_string_view_consume_prefix(&value_sv,
+ iree_make_cstring_view(prefix))) {
+ return ToString(value_sv);
+ } else {
+ return "FAILED";
+ }
+ };
+ EXPECT_EQ(consume_prefix("", ""), "FAILED");
+ EXPECT_EQ(consume_prefix("", "a"), "FAILED");
+ EXPECT_EQ(consume_prefix("a", ""), "FAILED");
+ EXPECT_EQ(consume_prefix("a", "a"), "");
+ EXPECT_EQ(consume_prefix("ab", "a"), "b");
+ EXPECT_EQ(consume_prefix("ab", "b"), "FAILED");
+ EXPECT_EQ(consume_prefix("ab", "ab"), "");
+ EXPECT_EQ(consume_prefix("ab", "abc"), "FAILED");
+ EXPECT_EQ(consume_prefix("abcdef", "ab"), "cdef");
+ EXPECT_EQ(consume_prefix("abcdef", "bc"), "FAILED");
+}
+
+TEST(StringViewTest, ConsumeSuffix) {
+ auto consume_suffix = [](const char* value,
+ const char* suffix) -> std::string {
+ iree_string_view_t value_sv = iree_make_cstring_view(value);
+ if (iree_string_view_consume_suffix(&value_sv,
+ iree_make_cstring_view(suffix))) {
+ return ToString(value_sv);
+ } else {
+ return "FAILED";
+ }
+ };
+ EXPECT_EQ(consume_suffix("", ""), "FAILED");
+ EXPECT_EQ(consume_suffix("", "a"), "FAILED");
+ EXPECT_EQ(consume_suffix("a", ""), "FAILED");
+ EXPECT_EQ(consume_suffix("a", "a"), "");
+ EXPECT_EQ(consume_suffix("ab", "a"), "FAILED");
+ EXPECT_EQ(consume_suffix("ab", "b"), "a");
+ EXPECT_EQ(consume_suffix("ab", "ab"), "");
+ EXPECT_EQ(consume_suffix("ab", "abc"), "FAILED");
+ EXPECT_EQ(consume_suffix("abcdef", "ef"), "abcd");
+ EXPECT_EQ(consume_suffix("abcdef", "de"), "FAILED");
+}
+
+TEST(StringViewTest, Trim) {
+ auto trim = [](const char* value) -> std::string {
+ return ToString(iree_string_view_trim(iree_make_cstring_view(value)));
+ };
+ EXPECT_EQ(trim(""), "");
+ EXPECT_EQ(trim("a"), "a");
+ EXPECT_EQ(trim(" a"), "a");
+ EXPECT_EQ(trim("a "), "a");
+ EXPECT_EQ(trim("a b"), "a b");
+ EXPECT_EQ(trim(" a b "), "a b");
+ EXPECT_EQ(trim("\t\t\na b\n \t "), "a b");
+ EXPECT_EQ(trim("\n"), "");
+ EXPECT_EQ(trim("\r\n"), "");
+}
+
+TEST(StringViewTest, Substr) {
+ auto substr = [](const char* value, iree_host_size_t pos,
+ iree_host_size_t n) {
+ return ToString(
+ iree_string_view_substr(iree_make_cstring_view(value), pos, n));
+ };
+ EXPECT_EQ(substr("", 0, 0), "");
+ EXPECT_EQ(substr("", 0, 1), "");
+ EXPECT_EQ(substr("", 0, INTPTR_MAX), "");
+ EXPECT_EQ(substr("", 1, 0), "");
+ EXPECT_EQ(substr("", 1, 1), "");
+ EXPECT_EQ(substr("", 1, INTPTR_MAX), "");
+
+ EXPECT_EQ(substr("a", 0, 0), "");
+ EXPECT_EQ(substr("a", 0, 1), "a");
+ EXPECT_EQ(substr("a", 0, 2), "a");
+ EXPECT_EQ(substr("a", 0, INTPTR_MAX), "a");
+ EXPECT_EQ(substr("a", 1, 0), "");
+ EXPECT_EQ(substr("a", 1, 1), "");
+ EXPECT_EQ(substr("a", 1, INTPTR_MAX), "");
+
+ EXPECT_EQ(substr("abc", 0, 1), "a");
+ EXPECT_EQ(substr("abc", 1, 1), "b");
+ EXPECT_EQ(substr("abc", 2, 1), "c");
+ EXPECT_EQ(substr("abc", 0, 2), "ab");
+ EXPECT_EQ(substr("abc", 1, 2), "bc");
+ EXPECT_EQ(substr("abc", 1, INTPTR_MAX), "bc");
+ EXPECT_EQ(substr("abc", 0, 3), "abc");
+ EXPECT_EQ(substr("abc", 0, INTPTR_MAX), "abc");
+}
+
+TEST(StringViewTest, Split) {
+ auto split =
+ [](const char* value,
+ char split_char) -> std::tuple<intptr_t, std::string, std::string> {
+ iree_string_view_t lhs;
+ iree_string_view_t rhs;
+ intptr_t index = iree_string_view_split(iree_make_cstring_view(value),
+ split_char, &lhs, &rhs);
+ return std::make_tuple(index, ToString(lhs), ToString(rhs));
+ };
+ EXPECT_EQ(split("", 'x'), std::make_tuple(-1, "", ""));
+ EXPECT_EQ(split(" ", 'x'), std::make_tuple(-1, " ", ""));
+ EXPECT_EQ(split("x", 'x'), std::make_tuple(0, "", ""));
+ EXPECT_EQ(split(" x ", 'x'), std::make_tuple(1, " ", " "));
+ EXPECT_EQ(split("axb", 'x'), std::make_tuple(1, "a", "b"));
+ EXPECT_EQ(split("axxxb", 'x'), std::make_tuple(1, "a", "xxb"));
+ EXPECT_EQ(split("ax", 'x'), std::make_tuple(1, "a", ""));
+ EXPECT_EQ(split("xb", 'x'), std::make_tuple(0, "", "b"));
+ EXPECT_EQ(split("axbxc", 'x'), std::make_tuple(1, "a", "bxc"));
+}
+
+TEST(StringViewTest, ReplaceChar) {
+ auto replace_char = [](const char* value, char old_char, char new_char) {
+ std::string value_clone(value);
+ iree_string_view_replace_char(
+ iree_make_string_view(value_clone.data(), value_clone.size()), old_char,
+ new_char);
+ return value_clone;
+ };
+ EXPECT_EQ(replace_char("", 'x', 'y'), "");
+ EXPECT_EQ(replace_char(" ", 'x', 'y'), " ");
+ EXPECT_EQ(replace_char("a", 'x', 'y'), "a");
+ EXPECT_EQ(replace_char("x", 'x', 'y'), "y");
+ EXPECT_EQ(replace_char("xx", 'x', 'y'), "yy");
+ EXPECT_EQ(replace_char("axbxc", 'x', 'y'), "aybyc");
+}
+
+} // namespace
diff --git a/runtime/src/iree/base/target_platform.h b/runtime/src/iree/base/target_platform.h
new file mode 100644
index 0000000..a15f80c
--- /dev/null
+++ b/runtime/src/iree/base/target_platform.h
@@ -0,0 +1,293 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_TARGET_PLATFORM_H_
+#define IREE_BASE_TARGET_PLATFORM_H_
+
+#include <assert.h>
+#include <stdint.h>
+
+// The build system defines one of the following top-level platforms and then
+// one platform+architecture pair for that platform.
+//
+// IREE_ARCH ("arm_32", "arm_64", etc)
+// IREE_ARCH_ARM_32
+// IREE_ARCH_ARM_64
+// IREE_ARCH_RISCV_32
+// IREE_ARCH_RISCV_64
+// IREE_ARCH_WASM_32
+// IREE_ARCH_WASM_64
+// IREE_ARCH_X86_32
+// IREE_ARCH_X86_64
+//
+// IREE_PTR_SIZE
+// IREE_PTR_SIZE_32
+// IREE_PTR_SIZE_64
+//
+// IREE_ENDIANNESS_LITTLE
+// IREE_ENDIANNESS_BIG
+//
+// IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED (0/1)
+//
+// IREE_COMPILER_CLANG
+// IREE_COMPILER_GCC
+// IREE_COMPILER_GCC_COMPAT
+// IREE_COMPILER_MSVC
+//
+// IREE_SANITIZER_ADDRESS
+// IREE_SANITIZER_MEMORY
+// IREE_SANITIZER_THREAD
+//
+// IREE_PLATFORM_ANDROID
+// IREE_PLATFORM_ANDROID_EMULATOR
+// IREE_PLATFORM_APPLE (IOS | MACOS)
+// IREE_PLATFORM_EMSCRIPTEN
+// IREE_PLATFORM_GENERIC
+// IREE_PLATFORM_IOS
+// IREE_PLATFORM_IOS_SIMULATOR
+// IREE_PLATFORM_LINUX
+// IREE_PLATFORM_MACOS
+// IREE_PLATFORM_WINDOWS
+
+//==============================================================================
+// IREE_ARCH_*
+//==============================================================================
+
+#if defined(__arm__) || defined(__arm64) || defined(__aarch64__) || \
+ defined(__thumb__) || defined(__TARGET_ARCH_ARM) || \
+ defined(__TARGET_ARCH_THUMB) || defined(_M_ARM)
+#if defined(__arm64) || defined(__aarch64__)
+#define IREE_ARCH "arm_64"
+#define IREE_ARCH_ARM_64 1
+#else
+#define IREE_ARCH "arm_32"
+#define IREE_ARCH_ARM_32 1
+#endif // __arm64
+#endif // ARM
+
+#if defined(__wasm32__)
+#define IREE_ARCH "wasm_32"
+#define IREE_ARCH_WASM_32 1
+#elif defined(__wasm64__)
+#define IREE_ARCH "wasm_64"
+#define IREE_ARCH_WASM_64 1
+#endif // WASM
+
+#if defined(__i386__) || defined(__i486__) || defined(__i586__) || \
+ defined(__i686__) || defined(__i386) || defined(_M_IX86) || defined(_X86_)
+#define IREE_ARCH "x86_32"
+#define IREE_ARCH_X86_32 1
+#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || \
+ defined(__amd64) || defined(_M_X64)
+#define IREE_ARCH "x86_64"
+#define IREE_ARCH_X86_64 1
+#endif // X86
+
+#if defined(__riscv) && (__riscv_xlen == 32)
+#define IREE_ARCH "riscv_32"
+#define IREE_ARCH_RISCV_32 1
+#elif defined(__riscv) && (__riscv_xlen == 64)
+#define IREE_ARCH "riscv_64"
+#define IREE_ARCH_RISCV_64 1
+#endif
+
+#if !defined(IREE_ARCH_ARM_32) && !defined(IREE_ARCH_ARM_64) && \
+ !defined(IREE_ARCH_RISCV_32) && !defined(IREE_ARCH_RISCV_64) && \
+ !defined(IREE_ARCH_WASM_32) && !defined(IREE_ARCH_WASM_64) && \
+ !defined(IREE_ARCH_X86_32) && !defined(IREE_ARCH_X86_64)
+#error Unknown architecture.
+#endif // all archs
+
+//==============================================================================
+// IREE_PTR_SIZE_*
+//==============================================================================
+
+// See https://stackoverflow.com/q/51616057
+static_assert(sizeof(void*) == sizeof(uintptr_t),
+ "can't determine pointer size");
+
+#if UINTPTR_MAX == 0xFFFFFFFF
+#define IREE_PTR_SIZE_32
+#define IREE_PTR_SIZE 4
+#elif UINTPTR_MAX == 0xFFFFFFFFFFFFFFFFu
+#define IREE_PTR_SIZE_64
+#define IREE_PTR_SIZE 8
+#else
+#error "can't determine pointer size"
+#endif
+
+//==============================================================================
+// IREE_ENDIANNESS_*
+//==============================================================================
+// https://en.wikipedia.org/wiki/Endianness
+
+#if (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
+ __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#define IREE_ENDIANNESS_LITTLE 1
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
+ __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define IREE_ENDIANNESS_BIG 1
+#elif defined(_WIN32)
+#define IREE_ENDIANNESS_LITTLE 1
+#else
+#error IREE endian detection needs to be set up for your compiler
+#endif // __BYTE_ORDER__
+
+//==============================================================================
+// IREE_MEMORY_ACCESS_*
+//==============================================================================
+// Certain architectures have specific memory access requirements that require
+// user-mode code changes to work at all or work at reasonable performance.
+
+#if !defined(IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED)
+
+#if defined(IREE_ARCH_ARM_32) || defined(IREE_ARCH_ARM_64)
+
+// Armv6‑M and Armv8-M (w/o the main extension) do not support unaligned access.
+// The -munaligned-access and -mno-unaligned-access flags control this.
+// https://www.keil.com/support/man/docs/armclang_ref/armclang_ref_sam1444138667173.htm
+#if !defined(__ARM_FEATURE_UNALIGNED)
+#define IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED 1
+#else
+#define IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED 0
+#endif // !__ARM_FEATURE_UNALIGNED
+
+#elif defined(IREE_ARCH_RISCV_32) || defined(IREE_ARCH_RISCV_64)
+
+// Though unaligned access is part of the base spec it is allowed to be
+// implemented with trap handlers. Bare-metal systems likely won't have these
+// handlers and even on systems that do (linux) we don't want to be trapping for
+// every load/store.
+#define IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED 1
+
+#endif // IREE_ARCH_*
+
+#else
+#define IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED 0
+#endif // !IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED
+
+//==============================================================================
+// IREE_COMPILER_*
+//==============================================================================
+
+#if defined(__clang__)
+#define IREE_COMPILER_CLANG 1
+#define IREE_COMPILER_GCC_COMPAT 1
+#elif defined(__GNUC__)
+#define IREE_COMPILER_GCC 1
+#define IREE_COMPILER_GCC_COMPAT 1
+#elif defined(_MSC_VER)
+#define IREE_COMPILER_MSVC 1
+#else
+#error Unrecognized compiler.
+#endif // compiler versions
+
+#if defined(__has_feature)
+#if __has_feature(address_sanitizer)
+#define IREE_SANITIZER_ADDRESS 1
+#endif // __has_feature(address_sanitizer)
+#if __has_feature(memory_sanitizer)
+#define IREE_SANITIZER_MEMORY 1
+#endif // __has_feature(memory_sanitizer)
+#if __has_feature(thread_sanitizer)
+#define IREE_SANITIZER_THREAD 1
+#endif // __has_feature(thread_sanitizer)
+#endif // defined(__has_feature)
+
+//==============================================================================
+// IREE_COMPILER_HAS_BUILTIN_DEBUG_TRAP
+//==============================================================================
+
+#if defined __has_builtin
+#if __has_builtin(__builtin_debugtrap)
+#define IREE_COMPILER_HAS_BUILTIN_DEBUG_TRAP 1
+#endif
+#endif
+
+//==============================================================================
+// IREE_PLATFORM_ANDROID
+//==============================================================================
+
+#if defined(__ANDROID__)
+#define IREE_PLATFORM_ANDROID 1
+#endif // __ANDROID__
+
+//==============================================================================
+// IREE_PLATFORM_EMSCRIPTEN
+//==============================================================================
+
+#if defined(__EMSCRIPTEN__)
+#define IREE_PLATFORM_EMSCRIPTEN 1
+#endif // __ANDROID__
+
+//==============================================================================
+// IREE_PLATFORM_IOS | IREE_PLATFORM_MACOS
+//==============================================================================
+
+#if defined(__APPLE__)
+#include <TargetConditionals.h> // IWYU pragma: export
+#if TARGET_OS_IPHONE
+#define IREE_PLATFORM_IOS 1
+#else
+#define IREE_PLATFORM_MACOS 1
+#endif // TARGET_OS_IPHONE
+#if TARGET_IPHONE_SIMULATOR
+#define IREE_PLATFORM_IOS_SIMULATOR 1
+#endif // TARGET_IPHONE_SIMULATOR
+#endif // __APPLE__
+
+#if defined(IREE_PLATFORM_IOS) || defined(IREE_PLATFORM_MACOS)
+#define IREE_PLATFORM_APPLE 1
+#endif // IREE_PLATFORM_IOS || IREE_PLATFORM_MACOS
+
+//==============================================================================
+// IREE_PLATFORM_LINUX
+//==============================================================================
+
+#if defined(__linux__) || defined(linux) || defined(__linux)
+#define IREE_PLATFORM_LINUX 1
+#endif // __linux__
+
+//==============================================================================
+// IREE_PLATFORM_WINDOWS
+//==============================================================================
+
+#if defined(_WIN32) || defined(_WIN64)
+#define IREE_PLATFORM_WINDOWS 1
+#endif // _WIN32 || _WIN64
+
+#if defined(IREE_PLATFORM_WINDOWS)
+
+#if defined(_MSC_VER)
+// Abseil compatibility: don't include incompatible winsock versions.
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif // WIN32_LEAN_AND_MEAN
+// Abseil compatibility: don't define min and max macros.
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif // NOMINMAX
+#endif // _MSC_VER
+
+#include <windows.h> // IWYU pragma: export
+
+// WinGDI.h defines `ERROR`, undef to avoid conflict naming.
+#undef ERROR
+
+#endif // IREE_PLATFORM_WINDOWS
+
+//==============================================================================
+// Fallthrough for unsupported platforms
+//==============================================================================
+
+#if !defined(IREE_PLATFORM_ANDROID) && !defined(IREE_PLATFORM_EMSCRIPTEN) && \
+ !defined(IREE_PLATFORM_GENERIC) && !defined(IREE_PLATFORM_IOS) && \
+ !defined(IREE_PLATFORM_LINUX) && !defined(IREE_PLATFORM_MACOS) && \
+ !defined(IREE_PLATFORM_WINDOWS)
+#error Unknown platform.
+#endif // all archs
+
+#endif // IREE_BASE_TARGET_PLATFORM_H_
diff --git a/runtime/src/iree/base/testing/BUILD b/runtime/src/iree/base/testing/BUILD
new file mode 100644
index 0000000..abc160b
--- /dev/null
+++ b/runtime/src/iree/base/testing/BUILD
@@ -0,0 +1,44 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/embed_data:build_defs.bzl", "c_embed_data")
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_test")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+cc_binary(
+ name = "dynamic_library_test_library.so",
+ testonly = True,
+ srcs = ["dynamic_library_test_library.cc"],
+ linkshared = True,
+)
+
+c_embed_data(
+ name = "dynamic_library_test_library",
+ testonly = True,
+ srcs = [":dynamic_library_test_library.so"],
+ c_file_output = "dynamic_library_test_library_embed.c",
+ flatten = True,
+ h_file_output = "dynamic_library_test_library_embed.h",
+)
+
+iree_runtime_cc_test(
+ name = "dynamic_library_test",
+ srcs = ["dynamic_library_test.cc"],
+ deps = [
+ ":dynamic_library_test_library",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:logging",
+ "//runtime/src/iree/base/internal:dynamic_library",
+ "//runtime/src/iree/base/internal:file_io",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
diff --git a/runtime/src/iree/base/testing/CMakeLists.txt b/runtime/src/iree/base/testing/CMakeLists.txt
new file mode 100644
index 0000000..2daa820
--- /dev/null
+++ b/runtime/src/iree/base/testing/CMakeLists.txt
@@ -0,0 +1,46 @@
+# Copyright 2019 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# TODO(scotttodd): clean up bazel_to_cmake handling here
+# * this is a cc_binary in Bazel, but `linkshared` fits iree_cc_library better
+# * the output file name is platform-specific, get it with $<TARGET_FILE:>
+iree_cc_library(
+ NAME
+ dynamic_library_test_library.so
+ SRCS
+ "dynamic_library_test_library.cc"
+ TESTONLY
+ SHARED
+)
+
+iree_c_embed_data(
+ NAME
+ dynamic_library_test_library
+ GENERATED_SRCS
+ "$<TARGET_FILE:iree::base::testing::dynamic_library_test_library.so>"
+ C_FILE_OUTPUT
+ "dynamic_library_test_library_embed.c"
+ H_FILE_OUTPUT
+ "dynamic_library_test_library_embed.h"
+ TESTONLY
+ FLATTEN
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ dynamic_library_test
+ SRCS
+ "dynamic_library_test.cc"
+ DEPS
+ ::dynamic_library_test_library
+ iree::base
+ iree::base::internal::dynamic_library
+ iree::base::internal::file_io
+ iree::base::logging
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
diff --git a/runtime/src/iree/base/testing/dynamic_library_test.cc b/runtime/src/iree/base/testing/dynamic_library_test.cc
new file mode 100644
index 0000000..a63338b
--- /dev/null
+++ b/runtime/src/iree/base/testing/dynamic_library_test.cc
@@ -0,0 +1,138 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/dynamic_library.h"
+
+#include <cstdlib>
+#include <iostream>
+#include <ostream>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/file_io.h"
+#include "iree/base/logging.h"
+#include "iree/base/testing/dynamic_library_test_library_embed.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace {
+
+using iree::testing::status::StatusIs;
+
+static const char* kUnknownName = "library_that_does_not_exist.so";
+
+class DynamicLibraryTest : public ::testing::Test {
+ public:
+ static std::string GetTempFilename(const char* suffix) {
+ static int unique_id = 0;
+ char* test_tmpdir = getenv("TEST_TMPDIR");
+ if (!test_tmpdir) {
+ test_tmpdir = getenv("TMPDIR");
+ }
+ if (!test_tmpdir) {
+ test_tmpdir = getenv("TEMP");
+ }
+ IREE_CHECK(test_tmpdir) << "TEST_TMPDIR/TMPDIR/TEMP not defined";
+ return test_tmpdir + std::string("/iree_test_") +
+ std::to_string(unique_id++) + suffix;
+ }
+
+ static void SetUpTestCase() {
+ // Making files available to tests, particularly across operating systems
+ // and build tools (Bazel/CMake) is complicated. Rather than include a test
+ // dynamic library as a "testdata" file, we use c_embed_data to package
+ // the file so it's embedded in a C module, then write that embedded file
+ // to a platform/test-environment specific temp file for loading.
+
+ // System APIs for loading dynamic libraries typically require an extension.
+#if defined(IREE_PLATFORM_WINDOWS)
+ static constexpr const char* ext = ".dll";
+#else
+ static constexpr const char* ext = ".so";
+#endif
+ library_temp_path_ = GetTempFilename(ext);
+
+ const struct iree_file_toc_t* file_toc =
+ dynamic_library_test_library_create();
+ IREE_ASSERT_OK(iree_file_write_contents(
+ library_temp_path_.c_str(),
+ iree_make_const_byte_span(file_toc->data, file_toc->size)));
+
+ std::cout << "Embedded test library written to temp path: "
+ << library_temp_path_;
+ }
+
+ static std::string library_temp_path_;
+};
+
+std::string DynamicLibraryTest::library_temp_path_;
+
+TEST_F(DynamicLibraryTest, LoadLibrarySuccess) {
+ iree_dynamic_library_t* library = NULL;
+ IREE_ASSERT_OK(iree_dynamic_library_load_from_file(
+ library_temp_path_.c_str(), IREE_DYNAMIC_LIBRARY_FLAG_NONE,
+ iree_allocator_system(), &library));
+ iree_dynamic_library_release(library);
+}
+
+TEST_F(DynamicLibraryTest, LoadLibraryFailure) {
+ iree_dynamic_library_t* library = NULL;
+ iree_status_t status = iree_dynamic_library_load_from_file(
+ kUnknownName, IREE_DYNAMIC_LIBRARY_FLAG_NONE, iree_allocator_system(),
+ &library);
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_NOT_FOUND, status);
+ iree_status_free(status);
+}
+
+TEST_F(DynamicLibraryTest, LoadLibraryTwice) {
+ iree_dynamic_library_t* library1 = NULL;
+ iree_dynamic_library_t* library2 = NULL;
+ IREE_ASSERT_OK(iree_dynamic_library_load_from_file(
+ library_temp_path_.c_str(), IREE_DYNAMIC_LIBRARY_FLAG_NONE,
+ iree_allocator_system(), &library1));
+ IREE_ASSERT_OK(iree_dynamic_library_load_from_file(
+ library_temp_path_.c_str(), IREE_DYNAMIC_LIBRARY_FLAG_NONE,
+ iree_allocator_system(), &library2));
+ iree_dynamic_library_release(library1);
+ iree_dynamic_library_release(library2);
+}
+
+TEST_F(DynamicLibraryTest, GetSymbolSuccess) {
+ iree_dynamic_library_t* library = NULL;
+ IREE_ASSERT_OK(iree_dynamic_library_load_from_file(
+ library_temp_path_.c_str(), IREE_DYNAMIC_LIBRARY_FLAG_NONE,
+ iree_allocator_system(), &library));
+
+ int (*fn_ptr)(int);
+ IREE_ASSERT_OK(iree_dynamic_library_lookup_symbol(library, "times_two",
+ (void**)&fn_ptr));
+ ASSERT_NE(nullptr, fn_ptr);
+ EXPECT_EQ(246, fn_ptr(123));
+
+ iree_dynamic_library_release(library);
+}
+
+TEST_F(DynamicLibraryTest, GetSymbolFailure) {
+ iree_dynamic_library_t* library = NULL;
+ IREE_ASSERT_OK(iree_dynamic_library_load_from_file(
+ library_temp_path_.c_str(), IREE_DYNAMIC_LIBRARY_FLAG_NONE,
+ iree_allocator_system(), &library));
+
+ int (*fn_ptr)(int);
+ iree_status_t status =
+ iree_dynamic_library_lookup_symbol(library, "unknown", (void**)&fn_ptr);
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_NOT_FOUND, status);
+ iree_status_free(status);
+ EXPECT_EQ(nullptr, fn_ptr);
+
+ iree_dynamic_library_release(library);
+}
+
+} // namespace
+} // namespace iree
diff --git a/runtime/src/iree/base/testing/dynamic_library_test_library.cc b/runtime/src/iree/base/testing/dynamic_library_test_library.cc
new file mode 100644
index 0000000..d356eeb
--- /dev/null
+++ b/runtime/src/iree/base/testing/dynamic_library_test_library.cc
@@ -0,0 +1,21 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#if defined(_WIN32)
+#define IREE_SYM_EXPORT __declspec(dllexport)
+#else
+#define IREE_SYM_EXPORT __attribute__((visibility("default")))
+#endif // _WIN32
+
+int IREE_SYM_EXPORT times_two(int value) { return value * 2; }
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
diff --git a/runtime/src/iree/base/time.c b/runtime/src/iree/base/time.c
new file mode 100644
index 0000000..b9ad245
--- /dev/null
+++ b/runtime/src/iree/base/time.c
@@ -0,0 +1,182 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/time.h"
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+
+IREE_API_EXPORT iree_time_t iree_time_now(void) {
+#if defined(IREE_TIME_NOW_FN)
+ IREE_TIME_NOW_FN
+#elif defined(IREE_PLATFORM_WINDOWS)
+ // GetSystemTimePreciseAsFileTime requires Windows 8, add a fallback
+ // (such as using std::chrono) if older support is needed.
+ FILETIME system_time;
+ GetSystemTimePreciseAsFileTime(&system_time);
+ const int64_t kUnixEpochStartTicks = 116444736000000000i64;
+ const int64_t kFtToNanoSec = 100;
+ LARGE_INTEGER li;
+ li.LowPart = system_time.dwLowDateTime;
+ li.HighPart = system_time.dwHighDateTime;
+ li.QuadPart -= kUnixEpochStartTicks;
+ li.QuadPart *= kFtToNanoSec;
+ return li.QuadPart;
+#elif defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_APPLE) || \
+ defined(IREE_PLATFORM_LINUX) || defined(IREE_PLATFORM_EMSCRIPTEN)
+ struct timespec clock_time;
+ clock_gettime(CLOCK_REALTIME, &clock_time);
+ return clock_time.tv_sec * 1000000000ull + clock_time.tv_nsec;
+#else
+#error "IREE system clock needs to be set up for your platform"
+#endif // IREE_PLATFORM_*
+}
+
+IREE_API_EXPORT iree_time_t
+iree_relative_timeout_to_deadline_ns(iree_duration_t timeout_ns) {
+ if (timeout_ns == IREE_DURATION_ZERO) {
+ return IREE_TIME_INFINITE_PAST;
+ } else if (timeout_ns == IREE_DURATION_INFINITE) {
+ return IREE_TIME_INFINITE_FUTURE;
+ }
+ return iree_time_now() + timeout_ns;
+}
+
+IREE_API_EXPORT iree_duration_t
+iree_absolute_deadline_to_timeout_ns(iree_time_t deadline_ns) {
+ if (deadline_ns == IREE_TIME_INFINITE_PAST) {
+ return IREE_DURATION_ZERO;
+ } else if (deadline_ns == IREE_TIME_INFINITE_FUTURE) {
+ return IREE_DURATION_INFINITE;
+ } else {
+ iree_time_t now_ns = iree_time_now();
+ return deadline_ns < now_ns ? IREE_DURATION_ZERO : deadline_ns - now_ns;
+ }
+}
+
+IREE_API_EXPORT uint32_t
+iree_absolute_deadline_to_timeout_ms(iree_time_t deadline_ns) {
+ if (deadline_ns == IREE_TIME_INFINITE_PAST) {
+ return IREE_DURATION_ZERO;
+ } else if (deadline_ns == IREE_TIME_INFINITE_FUTURE) {
+ return UINT32_MAX;
+ } else {
+ // We have either already passed the deadline (and can turn this into a
+ // poll) or want to do nanos->millis. We round up so that a deadline of 1ns
+ // results in 1ms as it should still wait, vs. if it was actually 0ns
+ // indicating the user intended a poll.
+ iree_time_t now_ns = iree_time_now();
+ return deadline_ns < now_ns
+ ? IREE_DURATION_ZERO
+ : (deadline_ns - now_ns + 1000000 - 1) / 1000000ull;
+ }
+}
+
+#if defined(IREE_WAIT_UNTIL_FN)
+
+// Define IREE_WAIT_UNTIL_FN to call out to a user-configured function.
+static bool iree_wait_until_impl(iree_time_t deadline_ns) {
+ return IREE_WAIT_UNTIL_FN(deadline_ns);
+}
+
+#elif defined(IREE_PLATFORM_WINDOWS)
+
+// No good sleep APIs on Windows; we need to accumulate low-precision relative
+// waits to reach the absolute time. Lots of slop here, but we primarily use
+// nanoseconds as a uniform time API and don't guarantee that precision. Note
+// that we try to round up to ensure we wait until at least the requested time.
+static bool iree_wait_until_impl(iree_time_t deadline_ns) {
+ iree_time_t now_ns = iree_time_now();
+ while (now_ns < deadline_ns) {
+ iree_time_t delta_ns = deadline_ns - now_ns;
+ uint32_t delta_ms = (uint32_t)((delta_ns + 1000000 - 1) / 1000000ull);
+ if (delta_ms == 0) {
+ // Sleep(0) doesn't actually sleep and instead acts as a yield; instead of
+ // potentially spilling in a tight loop when we get down near the end of
+ // the wait we bail a bit early. We don't guarantee the precision of the
+ // waits so this is fine.
+ break;
+ }
+ Sleep(delta_ms);
+ now_ns = iree_time_now();
+ }
+ return true;
+}
+
+#elif (_POSIX_C_SOURCE >= 200112L) && defined(TIMER_ABSTIME)
+
+// This is widely available on *nix-like systems (linux/bsd/etc) and in
+// most libc implementations (glibc/musl/etc). It's the best as we get to
+// tell the system the exact time we want to sleep until.
+//
+// https://man7.org/linux/man-pages/man2/clock_nanosleep.2.html
+//
+// NOTE: we could save a syscall in many cases if we returned the time upon wake
+// from the API.
+static bool iree_wait_until_impl(iree_time_t deadline_ns) {
+ struct timespec ts = {
+ .tv_sec = (time_t)(deadline_ns / 1000000000ull),
+ .tv_nsec = (long)(deadline_ns % 1000000000ull),
+ };
+ int ret = clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL);
+ return ret == 0;
+}
+
+#elif (_POSIX_C_SOURCE >= 199309L) || defined(IREE_PLATFORM_APPLE)
+
+// Apple doesn't have clock_nanosleep. We could use the Mach APIs on darwin to
+// do this but they require initialization and potential updates during
+// execution as clock frequencies change. Instead we use the relative nanosleep
+// and accumulate until the deadline, which is a good fallback for some other
+// platforms as well.
+//
+// https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/nanosleep.2.html
+static bool iree_wait_until_impl(iree_time_t deadline_ns) {
+ iree_time_t now_ns = iree_time_now();
+ while (now_ns < deadline_ns) {
+ iree_time_t delta_ns = deadline_ns - now_ns;
+ struct timespec abs_ts = {
+ .tv_sec = (time_t)(delta_ns / 1000000000ull),
+ .tv_nsec = (long)(delta_ns % 1000000000ull),
+ };
+ int ret = nanosleep(&abs_ts, NULL);
+ if (ret != 0) return false;
+ now_ns = iree_time_now();
+ }
+ return true;
+}
+
+#else
+
+// No waiting available; just pretend like we did. This will cause programs
+// using timers to run as fast as possible but without having a way to delay
+// time there's not much else they could do.
+static bool iree_wait_until_impl(iree_time_t deadline_ns) { return true; }
+
+#endif // (platforms)
+
+bool iree_wait_until(iree_time_t deadline_ns) {
+ // Can't wait forever - or for the past.
+ if (deadline_ns == IREE_TIME_INFINITE_FUTURE) return false;
+ if (deadline_ns == IREE_TIME_INFINITE_PAST) return true;
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(
+ z0, (uint64_t)iree_absolute_deadline_to_timeout_ns(deadline_ns));
+
+ // NOTE: we want to use sleep APIs with absolute times as that makes retrying
+ // on spurious wakes easier; if we using relative timeouts we need to ensure
+ // we don't drift.
+ bool did_wait = iree_wait_until_impl(deadline_ns);
+
+ IREE_TRACE_ZONE_END(z0);
+ return did_wait;
+}
diff --git a/runtime/src/iree/base/time.h b/runtime/src/iree/base/time.h
new file mode 100644
index 0000000..89cad70
--- /dev/null
+++ b/runtime/src/iree/base/time.h
@@ -0,0 +1,194 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_TIME_H_
+#define IREE_BASE_TIME_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/attributes.h"
+#include "iree/base/config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// A point in time represented as nanoseconds since unix epoch.
+// TODO(benvanik): pick something easy to get into/out-of time_t/etc.
+typedef int64_t iree_time_t;
+
+// A time in the infinite past used to indicate "already happened".
+// This forces APIs that wait for a point in time to act as a poll and always
+// return IREE_STATUS_DEADLINE_EXCEEDED instead of blocking the caller.
+#define IREE_TIME_INFINITE_PAST INT64_MIN
+
+// A time in the infinite future used to indicate "never".
+// This causes APIs that wait for a point in time to wait however long is needed
+// to satisfy the wait condition.
+#define IREE_TIME_INFINITE_FUTURE INT64_MAX
+
+// A duration represented as relative nanoseconds.
+typedef int64_t iree_duration_t;
+
+// A zero-length duration.
+// Like IREE_TIME_INFINITE_PAST this forces APIs that would wait to instead
+// return IREE_STATUS_DEADLINE_EXCEEDED immediately.
+#define IREE_DURATION_ZERO 0
+
+// An infinite-length duration.
+// Like IREE_TIME_INFINITE_FUTURE this causes APIs that wait to do so until
+// their wait condition is satisfied without returning early.
+#define IREE_DURATION_INFINITE INT64_MAX
+
+// Returns the current system time in unix nanoseconds.
+// Depending on the system architecture and power mode this time may have a
+// very coarse granularity (on the order of microseconds to milliseconds).
+//
+// The system timer may not be monotonic; users should ensure when comparing
+// times they check for negative values in case the time moves backwards.
+IREE_API_EXPORT iree_time_t iree_time_now(void);
+
+// Converts a relative timeout duration to an absolute deadline time.
+// This handles the special cases of IREE_DURATION_ZERO and
+// IREE_DURATION_INFINITE to avoid extraneous time queries.
+IREE_API_EXPORT iree_time_t
+iree_relative_timeout_to_deadline_ns(iree_duration_t timeout_ns);
+
+// Converts an absolute deadline time to a relative timeout duration in nanos.
+// This handles the special cases of IREE_TIME_INFINITE_PAST and
+// IREE_TIME_INFINITE_FUTURE to avoid extraneous time queries.
+IREE_API_EXPORT iree_duration_t
+iree_absolute_deadline_to_timeout_ns(iree_time_t deadline_ns);
+
+// Converts an absolute deadline time to a relative timeout duration in millis.
+// This handles the special cases of IREE_TIME_INFINITE_PAST and
+// IREE_TIME_INFINITE_FUTURE to avoid extraneous time queries.
+IREE_API_EXPORT uint32_t
+iree_absolute_deadline_to_timeout_ms(iree_time_t deadline_ns);
+
+typedef enum iree_timeout_type_e {
+ // Timeout is defined by an absolute value `deadline_ns`.
+ IREE_TIMEOUT_ABSOLUTE = 0,
+ // Timeout is defined by a relative value `timeout_ns`.
+ IREE_TIMEOUT_RELATIVE = 1,
+} iree_timeout_type_t;
+
+// A timeout defined either by an absolute or relative value.
+typedef struct iree_timeout_t {
+ iree_timeout_type_t type;
+ iree_time_t nanos;
+} iree_timeout_t;
+
+// Returns a timeout that will be exceeded immediately.
+// This can be used with APIs that would otherwise wait to cause them to poll.
+//
+// Example:
+// status = iree_wait_for_signal_or_timeout(&obj, iree_immediate_timeout());
+// if (iree_status_is_deadline_exceeded(status)) {
+// // Would have waited indicating the signal has not occurred. If the
+// // timeout was not immediate the call would have blocked the caller.
+// }
+static inline iree_timeout_t iree_immediate_timeout(void) {
+ iree_timeout_t timeout = {IREE_TIMEOUT_ABSOLUTE, IREE_TIME_INFINITE_PAST};
+ return timeout;
+}
+
+// Returns true if the |timeout| indicates an immediate/polling/nonblocking
+// timeout.
+static inline bool iree_timeout_is_immediate(iree_timeout_t timeout) {
+ return timeout.type == IREE_TIMEOUT_ABSOLUTE
+ ? timeout.nanos == IREE_TIME_INFINITE_PAST
+ : timeout.nanos == IREE_DURATION_ZERO;
+}
+
+// Returns a timeout that will never be reached.
+// This can be used with APIs that can wait to disable the early
+// deadline-exceeded returns when a condition is not met. It should be used with
+// care as it can complicate program state and make termination more prone to
+// hangs. On the other hand, it's really useful to not bother with actual
+// deadlines. YMMV.
+static inline iree_timeout_t iree_infinite_timeout(void) {
+ iree_timeout_t timeout = {IREE_TIMEOUT_ABSOLUTE, IREE_TIME_INFINITE_FUTURE};
+ return timeout;
+}
+
+// Returns true if the |timeout| indicates an infinite/forever blocking timeout.
+static inline bool iree_timeout_is_infinite(iree_timeout_t timeout) {
+ return timeout.type == IREE_TIMEOUT_ABSOLUTE
+ ? timeout.nanos == IREE_TIME_INFINITE_FUTURE
+ : timeout.nanos == IREE_DURATION_INFINITE;
+}
+
+// Defines an absolute timeout with the given time in nanoseconds.
+static inline iree_timeout_t iree_make_deadline(iree_time_t deadline_ns) {
+ iree_timeout_t timeout = {IREE_TIMEOUT_ABSOLUTE, deadline_ns};
+ return timeout;
+}
+
+// Defines a relative timeout with the given time in nanoseconds.
+static inline iree_timeout_t iree_make_timeout_ns(iree_duration_t timeout_ns) {
+ iree_timeout_t timeout = {IREE_TIMEOUT_RELATIVE, timeout_ns};
+ return timeout;
+}
+
+// Defines a relative timeout with the given time in milliseconds.
+static inline iree_timeout_t iree_make_timeout_ms(iree_duration_t timeout_ms) {
+ iree_timeout_t timeout = {
+ IREE_TIMEOUT_RELATIVE,
+ timeout_ms == IREE_DURATION_INFINITE ? IREE_DURATION_INFINITE
+ : timeout_ms * 1000000,
+ };
+ return timeout;
+}
+
+// Converts a timeout from relative to absolute (if it is).
+//
+// Absolute timeouts (deadlines) are better for long-running tasks or when
+// making calls that may complete in stages as relative ones will tend to skew;
+// if a wait is performed with a relative timeout of 10ms but it takes 5ms to
+// get from the origin of the call to the actual wait using the timeout then
+// the total latency of the call may be 15ms (5ms to prepare + 10ms on the
+// wait). Instead if an absolute deadline is used the caller can ensure that
+// the total time spent in the operation happens regardless of the intervening
+// work that happens.
+//
+// For this reason IREE internal APIs try to convert to absolute times and users
+// may be able to reduce overhead by populating the times as absolute to start
+// with via iree_make_deadline.
+static inline void iree_convert_timeout_to_absolute(iree_timeout_t* timeout) {
+ if (timeout->type == IREE_TIMEOUT_RELATIVE) {
+ timeout->type = IREE_TIMEOUT_ABSOLUTE;
+ timeout->nanos = iree_relative_timeout_to_deadline_ns(timeout->nanos);
+ }
+}
+
+// Returns an absolute deadline in nanoseconds from the given timeout.
+static inline iree_time_t iree_timeout_as_deadline_ns(iree_timeout_t timeout) {
+ return timeout.type == IREE_TIMEOUT_ABSOLUTE
+ ? timeout.nanos
+ : iree_relative_timeout_to_deadline_ns(timeout.nanos);
+}
+
+// Returns the earliest timeout between |lhs| and |rhs|.
+static inline iree_timeout_t iree_timeout_min(iree_timeout_t lhs,
+ iree_timeout_t rhs) {
+ iree_convert_timeout_to_absolute(&lhs);
+ iree_convert_timeout_to_absolute(&rhs);
+ return iree_make_deadline(lhs.nanos < rhs.nanos ? lhs.nanos : rhs.nanos);
+}
+
+// Waits until |deadline_ns| (or longer), putting the calling thread to sleep.
+// The precision of this varies across platforms and may have a minimum
+// granularity anywhere between microsecond to milliseconds.
+// Returns true if the sleep completed successfully and false if it was aborted.
+bool iree_wait_until(iree_time_t deadline_ns);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_TIME_H_
diff --git a/runtime/src/iree/base/tracing.cc b/runtime/src/iree/base/tracing.cc
new file mode 100644
index 0000000..32e2826
--- /dev/null
+++ b/runtime/src/iree/base/tracing.cc
@@ -0,0 +1,205 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/tracing.h"
+
+#include "iree/base/target_platform.h"
+
+// Textually include the Tracy implementation.
+// We do this here instead of relying on an external build target so that we can
+// ensure our configuration specified in tracing.h is picked up.
+#if IREE_TRACING_FEATURES != 0
+#include "third_party/tracy/TracyClient.cpp"
+#endif // IREE_TRACING_FEATURES
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#if defined(TRACY_ENABLE) && defined(IREE_PLATFORM_WINDOWS)
+static HANDLE iree_dbghelp_mutex;
+void IREEDbgHelpInit(void) {
+ iree_dbghelp_mutex = CreateMutex(NULL, FALSE, NULL);
+}
+void IREEDbgHelpLock(void) {
+ WaitForSingleObject(iree_dbghelp_mutex, INFINITE);
+}
+void IREEDbgHelpUnlock(void) { ReleaseMutex(iree_dbghelp_mutex); }
+#endif // TRACY_ENABLE && IREE_PLATFORM_WINDOWS
+
+#if IREE_TRACING_FEATURES != 0
+
+void iree_tracing_set_thread_name_impl(const char* name) {
+ tracy::SetThreadName(name);
+}
+
+iree_zone_id_t iree_tracing_zone_begin_impl(
+ const iree_tracing_location_t* src_loc, const char* name,
+ size_t name_length) {
+ const iree_zone_id_t zone_id = tracy::GetProfiler().GetNextZoneId();
+
+#ifndef TRACY_NO_VERIFY
+ {
+ TracyLfqPrepareC(tracy::QueueType::ZoneValidation);
+ tracy::MemWrite(&item->zoneValidation.id, zone_id);
+ TracyLfqCommitC;
+ }
+#endif // TRACY_NO_VERIFY
+
+ {
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
+ TracyLfqPrepareC(tracy::QueueType::ZoneBeginCallstack);
+#else
+ TracyLfqPrepareC(tracy::QueueType::ZoneBegin);
+#endif // IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
+ tracy::MemWrite(&item->zoneBegin.time, tracy::Profiler::GetTime());
+ tracy::MemWrite(&item->zoneBegin.srcloc,
+ reinterpret_cast<uint64_t>(src_loc));
+ TracyLfqCommitC;
+ }
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
+ tracy::GetProfiler().SendCallstack(IREE_TRACING_MAX_CALLSTACK_DEPTH);
+#endif // IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
+
+ if (name_length) {
+#ifndef TRACY_NO_VERIFY
+ {
+ TracyLfqPrepareC(tracy::QueueType::ZoneValidation);
+ tracy::MemWrite(&item->zoneValidation.id, zone_id);
+ TracyLfqCommitC;
+ }
+#endif // TRACY_NO_VERIFY
+ auto name_ptr = reinterpret_cast<char*>(tracy::tracy_malloc(name_length));
+ memcpy(name_ptr, name, name_length);
+ TracyLfqPrepareC(tracy::QueueType::ZoneName);
+ tracy::MemWrite(&item->zoneTextFat.text,
+ reinterpret_cast<uint64_t>(name_ptr));
+ tracy::MemWrite(&item->zoneTextFat.size,
+ static_cast<uint64_t>(name_length));
+ TracyLfqCommitC;
+ }
+
+ return zone_id;
+}
+
+iree_zone_id_t iree_tracing_zone_begin_external_impl(
+ const char* file_name, size_t file_name_length, uint32_t line,
+ const char* function_name, size_t function_name_length, const char* name,
+ size_t name_length) {
+ uint64_t src_loc = tracy::Profiler::AllocSourceLocation(
+ line, file_name, file_name_length, function_name, function_name_length,
+ name, name_length);
+
+ const iree_zone_id_t zone_id = tracy::GetProfiler().GetNextZoneId();
+
+#ifndef TRACY_NO_VERIFY
+ {
+ TracyLfqPrepareC(tracy::QueueType::ZoneValidation);
+ tracy::MemWrite(&item->zoneValidation.id, zone_id);
+ TracyLfqCommitC;
+ }
+#endif // TRACY_NO_VERIFY
+
+ {
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
+ TracyLfqPrepareC(tracy::QueueType::ZoneBeginAllocSrcLocCallstack);
+#else
+ TracyLfqPrepareC(tracy::QueueType::ZoneBeginAllocSrcLoc);
+#endif // IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
+ tracy::MemWrite(&item->zoneBegin.time, tracy::Profiler::GetTime());
+ tracy::MemWrite(&item->zoneBegin.srcloc, src_loc);
+ TracyLfqCommitC;
+ }
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
+ tracy::GetProfiler().SendCallstack(IREE_TRACING_MAX_CALLSTACK_DEPTH);
+#endif // IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
+
+ return zone_id;
+}
+
+void iree_tracing_set_plot_type_impl(const char* name_literal,
+ uint8_t plot_type) {
+ tracy::Profiler::ConfigurePlot(name_literal,
+ static_cast<tracy::PlotFormatType>(plot_type));
+}
+
+void iree_tracing_plot_value_i64_impl(const char* name_literal, int64_t value) {
+ tracy::Profiler::PlotData(name_literal, value);
+}
+
+void iree_tracing_plot_value_f32_impl(const char* name_literal, float value) {
+ tracy::Profiler::PlotData(name_literal, value);
+}
+
+void iree_tracing_plot_value_f64_impl(const char* name_literal, double value) {
+ tracy::Profiler::PlotData(name_literal, value);
+}
+
+void iree_tracing_mutex_announce(const iree_tracing_location_t* src_loc,
+ uint32_t* out_lock_id) {
+ uint32_t lock_id =
+ tracy::GetLockCounter().fetch_add(1, std::memory_order_relaxed);
+ assert(lock_id != std::numeric_limits<uint32_t>::max());
+ *out_lock_id = lock_id;
+
+ auto item = tracy::Profiler::QueueSerial();
+ tracy::MemWrite(&item->hdr.type, tracy::QueueType::LockAnnounce);
+ tracy::MemWrite(&item->lockAnnounce.id, lock_id);
+ tracy::MemWrite(&item->lockAnnounce.time, tracy::Profiler::GetTime());
+ tracy::MemWrite(&item->lockAnnounce.lckloc,
+ reinterpret_cast<uint64_t>(src_loc));
+ tracy::MemWrite(&item->lockAnnounce.type, tracy::LockType::Lockable);
+ tracy::Profiler::QueueSerialFinish();
+}
+
+void iree_tracing_mutex_terminate(uint32_t lock_id) {
+ auto item = tracy::Profiler::QueueSerial();
+ tracy::MemWrite(&item->hdr.type, tracy::QueueType::LockTerminate);
+ tracy::MemWrite(&item->lockTerminate.id, lock_id);
+ tracy::MemWrite(&item->lockTerminate.time, tracy::Profiler::GetTime());
+ tracy::Profiler::QueueSerialFinish();
+}
+
+void iree_tracing_mutex_before_lock(uint32_t lock_id) {
+ auto item = tracy::Profiler::QueueSerial();
+ tracy::MemWrite(&item->hdr.type, tracy::QueueType::LockWait);
+ tracy::MemWrite(&item->lockWait.thread, tracy::GetThreadHandle());
+ tracy::MemWrite(&item->lockWait.id, lock_id);
+ tracy::MemWrite(&item->lockWait.time, tracy::Profiler::GetTime());
+ tracy::Profiler::QueueSerialFinish();
+}
+
+void iree_tracing_mutex_after_lock(uint32_t lock_id) {
+ auto item = tracy::Profiler::QueueSerial();
+ tracy::MemWrite(&item->hdr.type, tracy::QueueType::LockObtain);
+ tracy::MemWrite(&item->lockObtain.thread, tracy::GetThreadHandle());
+ tracy::MemWrite(&item->lockObtain.id, lock_id);
+ tracy::MemWrite(&item->lockObtain.time, tracy::Profiler::GetTime());
+ tracy::Profiler::QueueSerialFinish();
+}
+
+void iree_tracing_mutex_after_try_lock(uint32_t lock_id, bool was_acquired) {
+ if (was_acquired) {
+ iree_tracing_mutex_after_lock(lock_id);
+ }
+}
+
+void iree_tracing_mutex_after_unlock(uint32_t lock_id) {
+ auto item = tracy::Profiler::QueueSerial();
+ tracy::MemWrite(&item->hdr.type, tracy::QueueType::LockRelease);
+ tracy::MemWrite(&item->lockRelease.thread, tracy::GetThreadHandle());
+ tracy::MemWrite(&item->lockRelease.id, lock_id);
+ tracy::MemWrite(&item->lockRelease.time, tracy::Profiler::GetTime());
+ tracy::Profiler::QueueSerialFinish();
+}
+
+#endif // IREE_TRACING_FEATURES
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
diff --git a/runtime/src/iree/base/tracing.h b/runtime/src/iree/base/tracing.h
new file mode 100644
index 0000000..9a879ae
--- /dev/null
+++ b/runtime/src/iree/base/tracing.h
@@ -0,0 +1,502 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Utilities for runtime tracing support.
+// These allow the various runtime subsystems to insert trace events, attach
+// metadata to events or allocations, and control tracing verbosity.
+//
+// Tracing features can be enabled with either an IREE_TRACING_MODE define that
+// allows predefined tracing modes or individual IREE_TRACING_FEATURE_* flags
+// set on IREE_TRACING_FEATURES when a more custom set of features is
+// required. Exact feature support may vary on platform and toolchain.
+//
+// The tracing infrastructure is currently designed to target the Tracy
+// profiler: https://github.com/wolfpld/tracy
+// Tracy's profiler UI allowing for streaming captures and analysis can be
+// downloaded from: https://github.com/wolfpld/tracy/releases
+// The manual provided on the releases page contains more information about how
+// Tracy works, its limitations, and how to operate the UI.
+//
+// NOTE: this header is used both from C and C++ code and only conditionally
+// enables the C++ when in a valid context. Do not use C++ features or include
+// other files that are not C-compatible.
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "iree/base/attributes.h"
+#include "iree/base/config.h"
+
+#ifndef IREE_BASE_TRACING_H_
+#define IREE_BASE_TRACING_H_
+
+//===----------------------------------------------------------------------===//
+// IREE_TRACING_FEATURE_* flags and options
+//===----------------------------------------------------------------------===//
+
+// Enables IREE_TRACE_* macros for instrumented tracing.
+#define IREE_TRACING_FEATURE_INSTRUMENTATION (1 << 0)
+
+// Captures callstacks up to IREE_TRACING_MAX_CALLSTACK_DEPTH at all
+// IREE_TRACE_* events. This has a significant performance impact and should
+// only be enabled when tracking down missing instrumentation.
+#define IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS (1 << 1)
+
+// Tracks all allocations (we know about) via new/delete/malloc/free.
+// This allows fine-grained allocation and usage tracking down to the code that
+// performed the allocations. Allocations or frees that are performed outside of
+// the IREE API or runtime library will not be tracked and unbalanced usage
+// (allocating with IREE's API then freeing with stdlib free, for example) will
+// cause Tracy to become very unhappy.
+#define IREE_TRACING_FEATURE_ALLOCATION_TRACKING (1 << 2)
+
+// Captures callstacks up to IREE_TRACING_MAX_CALLSTACK_DEPTH at all allocation
+// events when allocation tracking is enabled.
+#define IREE_TRACING_FEATURE_ALLOCATION_CALLSTACKS (1 << 3)
+
+// Tracks fast locks in all cases (both contended and uncontended).
+// This may introduce contention where there would otherwise be none as what
+// would be a handful of instructions and little memory access may become
+// hundreds. To see only locks under contention use
+// IREE_TRACING_FEATURE_SLOW_LOCKS.
+#define IREE_TRACING_FEATURE_FAST_LOCKS (1 << 4)
+
+// Tracks slow locks that end up going to the OS for waits/wakes in futexes.
+// Uncontended locks will not be displayed and only waits will be visible in the
+// Tracy UI.
+#define IREE_TRACING_FEATURE_SLOW_LOCKS (1 << 5)
+
+// Forwards log messages to traces, which will be visible under "Messages" in
+// the Tracy UI.
+#define IREE_TRACING_FEATURE_LOG_MESSAGES (1 << 6)
+
+#if !defined(IREE_TRACING_MAX_CALLSTACK_DEPTH)
+// Tracing functions that capture stack traces will only capture up to N frames.
+// The overhead for stack walking scales linearly with the number of frames
+// captured and can increase the cost of an event capture by orders of
+// magnitude.
+// Minimum: 0 (disable)
+// Maximum: 62
+#define IREE_TRACING_MAX_CALLSTACK_DEPTH 16
+#endif // IREE_TRACING_MAX_CALLSTACK_DEPTH
+
+//===----------------------------------------------------------------------===//
+// IREE_TRACING_MODE simple setting
+//===----------------------------------------------------------------------===//
+
+// Set IREE_TRACING_FEATURES based on IREE_TRACING_MODE if the user hasn't
+// overridden it with more specific settings.
+//
+// IREE_TRACING_MODE = 0: tracing disabled
+// IREE_TRACING_MODE = 1: instrumentation, log messages, and basic statistics
+// IREE_TRACING_MODE = 2: same as 1 with added allocation tracking
+// IREE_TRACING_MODE = 3: same as 2 with callstacks for allocations
+// IREE_TRACING_MODE = 4: same as 3 with callstacks for all instrumentation
+#if !defined(IREE_TRACING_FEATURES)
+#if defined(IREE_TRACING_MODE) && IREE_TRACING_MODE == 1
+#define IREE_TRACING_FEATURES \
+ (IREE_TRACING_FEATURE_INSTRUMENTATION | IREE_TRACING_FEATURE_LOG_MESSAGES)
+#undef IREE_TRACING_MAX_CALLSTACK_DEPTH
+#define IREE_TRACING_MAX_CALLSTACK_DEPTH 0
+#elif defined(IREE_TRACING_MODE) && IREE_TRACING_MODE == 2
+#define IREE_TRACING_FEATURES \
+ (IREE_TRACING_FEATURE_INSTRUMENTATION | \
+ IREE_TRACING_FEATURE_ALLOCATION_TRACKING | \
+ IREE_TRACING_FEATURE_LOG_MESSAGES)
+#elif defined(IREE_TRACING_MODE) && IREE_TRACING_MODE == 3
+#define IREE_TRACING_FEATURES \
+ (IREE_TRACING_FEATURE_INSTRUMENTATION | \
+ IREE_TRACING_FEATURE_ALLOCATION_TRACKING | \
+ IREE_TRACING_FEATURE_ALLOCATION_CALLSTACKS | \
+ IREE_TRACING_FEATURE_LOG_MESSAGES)
+#elif defined(IREE_TRACING_MODE) && IREE_TRACING_MODE >= 4
+#define IREE_TRACING_FEATURES \
+ (IREE_TRACING_FEATURE_INSTRUMENTATION | \
+ IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS | \
+ IREE_TRACING_FEATURE_ALLOCATION_TRACKING | \
+ IREE_TRACING_FEATURE_ALLOCATION_CALLSTACKS | \
+ IREE_TRACING_FEATURE_LOG_MESSAGES)
+#else
+#define IREE_TRACING_FEATURES 0
+#endif // IREE_TRACING_MODE
+#endif // !IREE_TRACING_FEATURES
+
+//===----------------------------------------------------------------------===//
+// Tracy configuration
+//===----------------------------------------------------------------------===//
+// NOTE: order matters here as we are including files that require/define.
+
+// Enable Tracy only when we are using tracing features.
+#if IREE_TRACING_FEATURES != 0
+#define TRACY_ENABLE 1
+#endif // IREE_TRACING_FEATURES
+
+// Disable zone nesting verification in release builds.
+// The verification makes it easy to find unbalanced zones but doubles the cost
+// (at least) of each zone recorded. Run in debug builds to verify new
+// instrumentation is correct before capturing traces in release builds.
+#if defined(NDEBUG)
+#define TRACY_NO_VERIFY 1
+#endif // NDEBUG
+
+// Force callstack capture on all zones (even those without the C suffix).
+#if (IREE_TRACING_FEATURES & \
+ IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS) || \
+ (IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_ALLOCATION_CALLSTACKS)
+#define TRACY_CALLSTACK 1
+#endif // IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
+
+// Guard tracy use of DbgHelp on Windows via IREEDbgHelp* functions.
+// All our own usage of DbgHelp must be guarded with the same lock.
+#define TRACY_DBGHELP_LOCK IREEDbgHelp
+
+// Disable frame image capture to avoid the DXT compression code and the frame
+// capture worker thread.
+#define TRACY_NO_FRAME_IMAGE 1
+
+// We don't care about vsync events as they can pollute traces and don't have
+// much meaning in our workloads. If integrators still want them we can expose
+// this as a tracing feature flag.
+#define TRACY_NO_VSYNC_CAPTURE 1
+
+// Flush the settings we have so far; settings after this point will be
+// overriding values set by Tracy itself.
+#if defined(TRACY_ENABLE)
+#include "third_party/tracy/TracyC.h" // IWYU pragma: export
+#endif
+
+// Disable callstack capture if our depth is 0; this allows us to avoid any
+// expensive capture (and all the associated dependencies) if we aren't going to
+// use it. Note that this means that unless code is instrumented we won't be
+// able to tell what's happening in the Tracy UI.
+#if IREE_TRACING_MAX_CALLSTACK_DEPTH == 0
+#undef TRACY_CALLSTACK
+#endif // IREE_TRACING_MAX_CALLSTACK_DEPTH
+
+//===----------------------------------------------------------------------===//
+// C API used for Tracy control
+//===----------------------------------------------------------------------===//
+// These functions are implementation details and should not be called directly.
+// Always use the macros (or C++ RAII types).
+
+// Local zone ID used for the C IREE_TRACE_ZONE_* macros.
+typedef uint32_t iree_zone_id_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#if IREE_TRACING_FEATURES
+
+void iree_tracing_set_thread_name_impl(const char* name);
+
+typedef struct ___tracy_source_location_data iree_tracing_location_t;
+
+#ifdef __cplusplus
+#define iree_tracing_make_zone_ctx(zone_id) \
+ TracyCZoneCtx { zone_id, 1 }
+#else
+#define iree_tracing_make_zone_ctx(zone_id) \
+ (TracyCZoneCtx) { zone_id, 1 }
+#endif // __cplusplus
+
+IREE_MUST_USE_RESULT iree_zone_id_t
+iree_tracing_zone_begin_impl(const iree_tracing_location_t* src_loc,
+ const char* name, size_t name_length);
+IREE_MUST_USE_RESULT iree_zone_id_t iree_tracing_zone_begin_external_impl(
+ const char* file_name, size_t file_name_length, uint32_t line,
+ const char* function_name, size_t function_name_length, const char* name,
+ size_t name_length);
+
+void iree_tracing_set_plot_type_impl(const char* name_literal,
+ uint8_t plot_type);
+void iree_tracing_plot_value_i64_impl(const char* name_literal, int64_t value);
+void iree_tracing_plot_value_f32_impl(const char* name_literal, float value);
+void iree_tracing_plot_value_f64_impl(const char* name_literal, double value);
+
+void iree_tracing_mutex_announce(const iree_tracing_location_t* src_loc,
+ uint32_t* out_lock_id);
+void iree_tracing_mutex_terminate(uint32_t lock_id);
+void iree_tracing_mutex_before_lock(uint32_t lock_id);
+void iree_tracing_mutex_after_lock(uint32_t lock_id);
+void iree_tracing_mutex_after_try_lock(uint32_t lock_id, bool was_acquired);
+void iree_tracing_mutex_after_unlock(uint32_t lock_id);
+
+#endif // IREE_TRACING_FEATURES
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// Instrumentation macros (C)
+//===----------------------------------------------------------------------===//
+
+// Matches Tracy's PlotFormatType enum.
+enum {
+ // Values will be displayed as plain numbers.
+ IREE_TRACING_PLOT_TYPE_NUMBER = 0,
+ // Treats the values as memory sizes. Will display kilobytes, megabytes, etc.
+ IREE_TRACING_PLOT_TYPE_MEMORY = 1,
+ // Values will be displayed as percentage with value 100 being equal to 100%.
+ IREE_TRACING_PLOT_TYPE_PERCENTAGE = 2,
+};
+
+// Colors used for messages based on the level provided to the macro.
+enum {
+ IREE_TRACING_MESSAGE_LEVEL_ERROR = 0xFF0000u,
+ IREE_TRACING_MESSAGE_LEVEL_WARNING = 0xFFFF00u,
+ IREE_TRACING_MESSAGE_LEVEL_INFO = 0xFFFFFFu,
+ IREE_TRACING_MESSAGE_LEVEL_VERBOSE = 0xC0C0C0u,
+ IREE_TRACING_MESSAGE_LEVEL_DEBUG = 0x00FF00u,
+};
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+// Sets an application-specific payload that will be stored in the trace.
+// This can be used to fingerprint traces to particular versions and denote
+// compilation options or configuration. The given string value will be copied.
+#define IREE_TRACE_SET_APP_INFO(value, value_length) \
+ ___tracy_emit_message_appinfo(value, value_length)
+
+// Sets the current thread name to the given string value.
+// This will only set the thread name as it appears in the tracing backend and
+// not set the OS thread name as it would appear in a debugger.
+// The C-string |name| will be copied and does not need to be a literal.
+#define IREE_TRACE_SET_THREAD_NAME(name) iree_tracing_set_thread_name_impl(name)
+
+// Evalutes the expression code only if tracing is enabled.
+//
+// Example:
+// struct {
+// IREE_TRACE(uint32_t trace_only_value);
+// } my_object;
+// IREE_TRACE(my_object.trace_only_value = 5);
+#define IREE_TRACE(expr) expr
+
+// Begins a new zone with the parent function name.
+#define IREE_TRACE_ZONE_BEGIN(zone_id) \
+ IREE_TRACE_ZONE_BEGIN_NAMED(zone_id, NULL)
+
+// Begins a new zone with the given compile-time literal name.
+#define IREE_TRACE_ZONE_BEGIN_NAMED(zone_id, name_literal) \
+ static const iree_tracing_location_t TracyConcat( \
+ __tracy_source_location, __LINE__) = {name_literal, __FUNCTION__, \
+ __FILE__, (uint32_t)__LINE__, 0}; \
+ iree_zone_id_t zone_id = iree_tracing_zone_begin_impl( \
+ &TracyConcat(__tracy_source_location, __LINE__), NULL, 0);
+
+// Begins a new zone with the given runtime dynamic string name.
+// The |value| string will be copied into the trace buffer.
+#define IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(zone_id, name, name_length) \
+ static const iree_tracing_location_t TracyConcat( \
+ __tracy_source_location, __LINE__) = {0, __FUNCTION__, __FILE__, \
+ (uint32_t)__LINE__, 0}; \
+ iree_zone_id_t zone_id = iree_tracing_zone_begin_impl( \
+ &TracyConcat(__tracy_source_location, __LINE__), (name), (name_length));
+
+// Begins an externally defined zone with a dynamic source location.
+// The |file_name|, |function_name|, and optional |name| strings will be copied
+// into the trace buffer and do not need to persist.
+#define IREE_TRACE_ZONE_BEGIN_EXTERNAL( \
+ zone_id, file_name, file_name_length, line, function_name, \
+ function_name_length, name, name_length) \
+ iree_zone_id_t zone_id = iree_tracing_zone_begin_external_impl( \
+ file_name, file_name_length, line, function_name, function_name_length, \
+ name, name_length)
+
+// Sets the dynamic color of the zone to an XXBBGGRR value.
+#define IREE_TRACE_ZONE_SET_COLOR(zone_id, color_xbgr) \
+ ___tracy_emit_zone_color(iree_tracing_make_zone_ctx(zone_id), color_xbgr);
+
+// Appends an integer value to the parent zone. May be called multiple times.
+#define IREE_TRACE_ZONE_APPEND_VALUE(zone_id, value) \
+ ___tracy_emit_zone_value(iree_tracing_make_zone_ctx(zone_id), value);
+
+// Appends a string value to the parent zone. May be called multiple times.
+// The |value| string will be copied into the trace buffer.
+#define IREE_TRACE_ZONE_APPEND_TEXT(...) \
+ IREE_TRACE_IMPL_GET_VARIADIC_((__VA_ARGS__, \
+ IREE_TRACE_ZONE_APPEND_TEXT_STRING_VIEW, \
+ IREE_TRACE_ZONE_APPEND_TEXT_CSTRING)) \
+ (__VA_ARGS__)
+#define IREE_TRACE_ZONE_APPEND_TEXT_CSTRING(zone_id, value) \
+ IREE_TRACE_ZONE_APPEND_TEXT_STRING_VIEW(zone_id, value, strlen(value))
+#define IREE_TRACE_ZONE_APPEND_TEXT_STRING_VIEW(zone_id, value, value_length) \
+ ___tracy_emit_zone_text(iree_tracing_make_zone_ctx(zone_id), value, \
+ value_length)
+
+// Ends the current zone. Must be passed the |zone_id| from the _BEGIN.
+#define IREE_TRACE_ZONE_END(zone_id) \
+ ___tracy_emit_zone_end(iree_tracing_make_zone_ctx(zone_id))
+
+// Ends the current zone before returning on a failure.
+// Sugar for IREE_TRACE_ZONE_END+IREE_RETURN_IF_ERROR.
+#define IREE_RETURN_AND_END_ZONE_IF_ERROR(zone_id, ...) \
+ IREE_RETURN_AND_EVAL_IF_ERROR(IREE_TRACE_ZONE_END(zone_id), __VA_ARGS__)
+
+// Configures the named plot with an IREE_TRACING_PLOT_TYPE_* representation.
+#define IREE_TRACE_SET_PLOT_TYPE(name_literal, plot_type) \
+ iree_tracing_set_plot_type_impl(name_literal, plot_type)
+// Plots a value in the named plot group as an integer.
+#define IREE_TRACE_PLOT_VALUE_I64(name_literal, value) \
+ iree_tracing_plot_value_i64_impl(name_literal, value)
+// Plots a value in the named plot group as a single-precision float.
+#define IREE_TRACE_PLOT_VALUE_F32(name_literal, value) \
+ iree_tracing_plot_value_f32_impl(name_literal, value)
+// Plots a value in the named plot group as a double-precision float.
+#define IREE_TRACE_PLOT_VALUE_F64(name_literal, value) \
+ iree_tracing_plot_value_f64_impl(name_literal, value)
+
+// Demarcates an advancement of the top-level unnamed frame group.
+#define IREE_TRACE_FRAME_MARK() ___tracy_emit_frame_mark(NULL)
+// Demarcates an advancement of a named frame group.
+#define IREE_TRACE_FRAME_MARK_NAMED(name_literal) \
+ ___tracy_emit_frame_mark(name_literal)
+// Begins a discontinuous frame in a named frame group.
+// Must be properly matched with a IREE_TRACE_FRAME_MARK_NAMED_END.
+#define IREE_TRACE_FRAME_MARK_BEGIN_NAMED(name_literal) \
+ ___tracy_emit_frame_mark_start(name_literal)
+// Ends a discontinuous frame in a named frame group.
+#define IREE_TRACE_FRAME_MARK_END_NAMED(name_literal) \
+ ___tracy_emit_frame_mark_end(name_literal)
+
+// Logs a message at the given logging level to the trace.
+// The message text must be a compile-time string literal.
+#define IREE_TRACE_MESSAGE(level, value_literal) \
+ ___tracy_emit_messageLC(value_literal, IREE_TRACING_MESSAGE_LEVEL_##level, 0)
+// Logs a message with the given color to the trace.
+// Standard colors are defined as IREE_TRACING_MESSAGE_LEVEL_* values.
+// The message text must be a compile-time string literal.
+#define IREE_TRACE_MESSAGE_COLORED(color, value_literal) \
+ ___tracy_emit_messageLC(value_literal, color, 0)
+// Logs a dynamically-allocated message at the given logging level to the trace.
+// The string |value| will be copied into the trace buffer.
+#define IREE_TRACE_MESSAGE_DYNAMIC(level, value, value_length) \
+ ___tracy_emit_messageC(value, value_length, \
+ IREE_TRACING_MESSAGE_LEVEL_##level, 0)
+// Logs a dynamically-allocated message with the given color to the trace.
+// Standard colors are defined as IREE_TRACING_MESSAGE_LEVEL_* values.
+// The string |value| will be copied into the trace buffer.
+#define IREE_TRACE_MESSAGE_DYNAMIC_COLORED(color, value, value_length) \
+ ___tracy_emit_messageC(value, value_length, color, 0)
+
+// Utilities:
+#define IREE_TRACE_IMPL_GET_VARIADIC_HELPER_(_1, _2, _3, NAME, ...) NAME
+#define IREE_TRACE_IMPL_GET_VARIADIC_(args) \
+ IREE_TRACE_IMPL_GET_VARIADIC_HELPER_ args
+
+#else
+#define IREE_TRACE_SET_APP_INFO(value, value_length)
+#define IREE_TRACE_SET_THREAD_NAME(name)
+#define IREE_TRACE(expr)
+#define IREE_TRACE_ZONE_BEGIN(zone_id)
+#define IREE_TRACE_ZONE_BEGIN_NAMED(zone_id, name_literal)
+#define IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(zone_id, name, name_length)
+#define IREE_TRACE_ZONE_BEGIN_EXTERNAL( \
+ zone_id, file_name, file_name_length, line, function_name, \
+ function_name_length, name, name_length)
+#define IREE_TRACE_ZONE_SET_COLOR(zone_id, color_xrgb)
+#define IREE_TRACE_ZONE_APPEND_VALUE(zone_id, value)
+#define IREE_TRACE_ZONE_APPEND_TEXT(zone_id, ...)
+#define IREE_TRACE_ZONE_APPEND_TEXT_CSTRING(zone_id, value)
+#define IREE_TRACE_ZONE_APPEND_TEXT_STRING_VIEW(zone_id, value, value_length)
+#define IREE_TRACE_ZONE_END(zone_id)
+#define IREE_RETURN_AND_END_ZONE_IF_ERROR(zone_id, ...) \
+ IREE_RETURN_IF_ERROR(__VA_ARGS__)
+#define IREE_TRACE_SET_PLOT_TYPE(name_literal, plot_type)
+#define IREE_TRACE_PLOT_VALUE_I64(name_literal, value)
+#define IREE_TRACE_PLOT_VALUE_F32(name_literal, value)
+#define IREE_TRACE_PLOT_VALUE_F64(name_literal, value)
+#define IREE_TRACE_FRAME_MARK()
+#define IREE_TRACE_FRAME_MARK_NAMED(name_literal)
+#define IREE_TRACE_FRAME_MARK_BEGIN_NAMED(name_literal)
+#define IREE_TRACE_FRAME_MARK_END_NAMED(name_literal)
+#define IREE_TRACE_MESSAGE(level, value_literal)
+#define IREE_TRACE_MESSAGE_COLORED(color, value_literal)
+#define IREE_TRACE_MESSAGE_DYNAMIC(level, value, value_length)
+#define IREE_TRACE_MESSAGE_DYNAMIC_COLORED(color, value, value_length)
+#endif // IREE_TRACING_FEATURE_INSTRUMENTATION
+
+//===----------------------------------------------------------------------===//
+// Allocation tracking macros (C/C++)
+//===----------------------------------------------------------------------===//
+//
+// IREE_TRACE_ALLOC: records an malloc.
+// IREE_TRACE_FREE: records a free.
+//
+// NOTE: realloc must be recorded as a FREE/ALLOC pair.
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_ALLOCATION_TRACKING
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_ALLOCATION_CALLSTACKS
+
+#define IREE_TRACE_ALLOC(ptr, size) \
+ ___tracy_emit_memory_alloc_callstack(ptr, size, \
+ IREE_TRACING_MAX_CALLSTACK_DEPTH, 0)
+#define IREE_TRACE_FREE(ptr) \
+ ___tracy_emit_memory_free_callstack(ptr, IREE_TRACING_MAX_CALLSTACK_DEPTH, 0)
+#define IREE_TRACE_ALLOC_NAMED(name, ptr, size) \
+ ___tracy_emit_memory_alloc_callstack_named( \
+ ptr, size, IREE_TRACING_MAX_CALLSTACK_DEPTH, 0, name)
+#define IREE_TRACE_FREE_NAMED(name, ptr) \
+ ___tracy_emit_memory_free_callstack_named( \
+ ptr, IREE_TRACING_MAX_CALLSTACK_DEPTH, 0, name)
+
+#else
+
+#define IREE_TRACE_ALLOC(ptr, size) ___tracy_emit_memory_alloc(ptr, size, 0)
+#define IREE_TRACE_FREE(ptr) ___tracy_emit_memory_free(ptr, 0)
+#define IREE_TRACE_ALLOC_NAMED(name, ptr, size) \
+ ___tracy_emit_memory_alloc_named(ptr, size, 0, name)
+#define IREE_TRACE_FREE_NAMED(name, ptr) \
+ ___tracy_emit_memory_free_named(ptr, 0, name)
+
+#endif // IREE_TRACING_FEATURE_ALLOCATION_CALLSTACKS
+
+#else
+#define IREE_TRACE_ALLOC(ptr, size)
+#define IREE_TRACE_FREE(ptr)
+#define IREE_TRACE_ALLOC_NAMED(name, ptr, size)
+#define IREE_TRACE_FREE_NAMED(name, ptr)
+#endif // IREE_TRACING_FEATURE_ALLOCATION_TRACKING
+
+//===----------------------------------------------------------------------===//
+// Instrumentation C++ RAII types, wrappers, and macros
+//===----------------------------------------------------------------------===//
+
+#ifdef __cplusplus
+
+#if defined(TRACY_ENABLE)
+#include "third_party/tracy/Tracy.hpp" // IWYU pragma: export
+#endif
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+// TODO(#1886): update these to tracy and drop the 0.
+#define IREE_TRACE_SCOPE() ZoneScoped
+#define IREE_TRACE_SCOPE_DYNAMIC(name_cstr) \
+ ZoneTransientN(___tracy_scoped_zone, name_cstr, true)
+#define IREE_TRACE_SCOPE0(name_literal) ZoneScopedN(name_literal)
+#define IREE_TRACE_EVENT
+#define IREE_TRACE_EVENT0
+
+#else
+#define IREE_TRACE_THREAD_ENABLE(name)
+#define IREE_TRACE_SCOPE()
+#define IREE_TRACE_SCOPE_DYNAMIC(name_string_view)
+#define IREE_TRACE_SCOPE0(name_literal)
+#define IREE_TRACE_EVENT(void)
+#define IREE_TRACE_EVENT0
+#endif // IREE_TRACING_FEATURE_INSTRUMENTATION
+
+// TODO(benvanik): macros for LockableCtx / Lockable mutex tracking.
+
+#endif // __cplusplus
+
+#endif // IREE_BASE_TRACING_H_
diff --git a/runtime/src/iree/base/wait_source.c b/runtime/src/iree/base/wait_source.c
new file mode 100644
index 0000000..b626a69
--- /dev/null
+++ b/runtime/src/iree/base/wait_source.c
@@ -0,0 +1,117 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/wait_source.h"
+
+#include "iree/base/assert.h"
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// iree_wait_source_t
+//===----------------------------------------------------------------------===//
+
+// NOTE: iree_wait_source_import lives in iree/base/internal/wait_handle.c
+// for now as that lets us compile out native wait handle support at a coarse
+// level.
+
+IREE_API_EXPORT iree_status_t iree_wait_source_export(
+ iree_wait_source_t wait_source, iree_wait_primitive_type_t target_type,
+ iree_timeout_t timeout, iree_wait_primitive_t* out_wait_primitive) {
+ IREE_ASSERT_ARGUMENT(out_wait_primitive);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status = iree_ok_status();
+ if (IREE_LIKELY(wait_source.ctl)) {
+ const iree_wait_source_export_params_t params = {
+ .target_type = target_type,
+ .timeout = timeout,
+ };
+ status = wait_source.ctl(wait_source, IREE_WAIT_SOURCE_COMMAND_EXPORT,
+ ¶ms, (void**)out_wait_primitive);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_wait_source_query(
+ iree_wait_source_t wait_source, iree_status_code_t* out_wait_status_code) {
+ IREE_ASSERT_ARGUMENT(out_wait_status_code);
+ *out_wait_status_code = IREE_STATUS_OK;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status = iree_ok_status();
+ if (IREE_LIKELY(wait_source.ctl)) {
+ status = wait_source.ctl(wait_source, IREE_WAIT_SOURCE_COMMAND_QUERY, NULL,
+ (void**)out_wait_status_code);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_wait_source_wait_one(
+ iree_wait_source_t wait_source, iree_timeout_t timeout) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Capture time as an absolute value as we don't know when it's going to run.
+ iree_convert_timeout_to_absolute(&timeout);
+
+ iree_status_t status = iree_ok_status();
+ if (IREE_LIKELY(wait_source.ctl)) {
+ const iree_wait_source_wait_params_t params = {
+ .timeout = timeout,
+ };
+ status = wait_source.ctl(wait_source, IREE_WAIT_SOURCE_COMMAND_WAIT_ONE,
+ ¶ms, NULL);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_wait_source_delay
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_wait_source_delay_ctl(
+ iree_wait_source_t wait_source, iree_wait_source_command_t command,
+ const void* params, void** inout_ptr) {
+ iree_time_t delay_deadline_ns = (iree_time_t)wait_source.data;
+ switch (command) {
+ case IREE_WAIT_SOURCE_COMMAND_QUERY: {
+ iree_status_code_t* out_wait_status_code = (iree_status_code_t*)inout_ptr;
+ *out_wait_status_code = iree_time_now() >= delay_deadline_ns
+ ? IREE_STATUS_OK
+ : IREE_STATUS_DEFERRED;
+ return iree_ok_status();
+ }
+ case IREE_WAIT_SOURCE_COMMAND_WAIT_ONE: {
+ iree_time_t timeout_deadline_ns = iree_timeout_as_deadline_ns(
+ ((const iree_wait_source_wait_params_t*)params)->timeout);
+ if (timeout_deadline_ns > delay_deadline_ns) {
+ // Delay is before timeout and we can perform a simple sleep.
+ return iree_wait_until(delay_deadline_ns)
+ ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_DEFERRED);
+ } else {
+ // Timeout is before deadline, just wait for the deadline. We _may_
+ // wake after the delay deadline but can't be sure.
+ iree_wait_until(timeout_deadline_ns);
+ return iree_time_now() >= delay_deadline_ns
+ ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+ return iree_status_from_code(IREE_STATUS_DEFERRED);
+ }
+ case IREE_WAIT_SOURCE_COMMAND_EXPORT:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "delay wait sources cannot be exported");
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unhandled wait source command");
+ }
+}
diff --git a/runtime/src/iree/base/wait_source.h b/runtime/src/iree/base/wait_source.h
new file mode 100644
index 0000000..4aceb49
--- /dev/null
+++ b/runtime/src/iree/base/wait_source.h
@@ -0,0 +1,336 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_WAIT_SOURCE_H_
+#define IREE_BASE_WAIT_SOURCE_H_
+
+#include "iree/base/attributes.h"
+#include "iree/base/status.h"
+#include "iree/base/target_platform.h"
+#include "iree/base/time.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_wait_primitive_t
+//===----------------------------------------------------------------------===//
+
+#if IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+// Bare metal/no synchronization available; wait handles are no-oped.
+#define IREE_WAIT_HANDLE_DISABLED 1
+#elif defined(IREE_PLATFORM_WINDOWS)
+// Though Windows can support pipes no one uses them so for simplicity we only
+// exposes HANDLEs.
+#define IREE_HAVE_WAIT_TYPE_WIN32_HANDLE 1
+#elif defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_LINUX)
+// Treat Android and modern linux as (mostly) the same.
+#define IREE_HAVE_WAIT_TYPE_EVENTFD 1
+#define IREE_HAVE_WAIT_TYPE_PIPE 1
+#else
+// BSD/Darwin/etc all have pipe.
+#define IREE_HAVE_WAIT_TYPE_PIPE 1
+#endif // IREE_PLATFORM_*
+
+// TODO(benvanik): see if we can get sync file on linux too:
+#if defined(IREE_PLATFORM_ANDROID)
+#define IREE_HAVE_WAIT_TYPE_SYNC_FILE 1
+#endif // IREE_PLATFORM_ANDROID
+
+#if !IREE_SYNCHRONIZATION_DISABLE_UNSAFE
+#define IREE_HAVE_WAIT_TYPE_LOCAL_FUTEX 1
+#endif // threading enabled
+
+// Specifies the type of a system wait primitive.
+// Enums that are unavailable on a platform are still present to allow for
+// platform-independent code to still route wait primitives but actually using
+// them will fail.
+enum iree_wait_primitive_type_bits_t {
+ // Empty handle; immediately resolved.
+ IREE_WAIT_PRIMITIVE_TYPE_NONE = 0u,
+
+ // Android/Linux eventfd handle.
+ // These are akin to pipe() but require only a single handle and have
+ // significantly lower overhead (equivalent if not slightly better than
+ // pthreads condvars).
+ //
+ // eventfds support acting as both semaphores and auto reset events.
+ //
+ // More information:
+ // http://man7.org/linux/man-pages/man2/eventfd.2.html
+ IREE_WAIT_PRIMITIVE_TYPE_EVENT_FD = 1u,
+
+ // Android/Linux sync_file handle (aka 'sync fence').
+ // The handle is allocated indirectly by the device driver via the
+ // <linux/sync_file.h> API. It may be waited upon with poll(), select(), or
+ // epoll() and must be closed with close() when no longer required. If
+ // waiting on multiple sync_files the caller should first merge them
+ // together.
+ //
+ // A sync_file must only be used as fences (one-shot manual reset events).
+ //
+ // More information:
+ // https://www.kernel.org/doc/Documentation/sync_file.txt
+ // https://lwn.net/Articles/702339/
+ // https://source.android.com/devices/graphics/implement-vsync#explicit_synchronization
+ // https://developer.android.com/ndk/reference/group/sync
+ IREE_WAIT_PRIMITIVE_TYPE_SYNC_FILE = 2u,
+
+ // Android/Linux/iOS-compatible POSIX pipe handle.
+ // Two handles are generated: one for transmitting and one for receiving.
+ //
+ // More information:
+ // http://man7.org/linux/man-pages/man2/pipe.2.html
+ IREE_WAIT_PRIMITIVE_TYPE_PIPE = 3u,
+
+ // Windows HANDLE type.
+ // The HANDLE may represent a thread, event, semaphore, timer, etc.
+ //
+ // More information:
+ // https://docs.microsoft.com/en-us/windows/win32/sysinfo/object-categories
+ // https://docs.microsoft.com/en-us/windows/win32/sync/using-event-objects
+ IREE_WAIT_PRIMITIVE_TYPE_WIN32_HANDLE = 4u,
+
+ // Process-local futex.
+ // These are only valid for multi-wait when used with an in-process wait
+ // handle implementation (IREE_WAIT_API == IREE_WAIT_API_INPROC).
+ IREE_WAIT_PRIMITIVE_TYPE_LOCAL_FUTEX = 5u,
+
+ // Placeholder for wildcard queries of primitive types.
+ // On an export request this indicates that the source may export any type it
+ // can.
+ IREE_WAIT_PRIMITIVE_TYPE_ANY = 0xFFu,
+};
+typedef uint8_t iree_wait_primitive_type_t;
+
+// A handle value whose behavior is defined by the iree_wait_primitive_type_t.
+// Only the primitives available on a platform are compiled in as syscalls and
+// other associated operations that act on them aren't available anyway.
+typedef union {
+ int reserved; // to avoid zero-sized unions
+#if defined(IREE_HAVE_WAIT_TYPE_EVENTFD)
+ // IREE_WAIT_PRIMITIVE_TYPE_EVENT_FD
+ struct {
+ int fd;
+ } event;
+#endif // IREE_HAVE_WAIT_TYPE_EVENTFD
+#if defined(IREE_HAVE_WAIT_TYPE_SYNC_FILE)
+ // IREE_WAIT_PRIMITIVE_TYPE_SYNC_FILE
+ struct {
+ int fd;
+ } sync_file;
+#endif // IREE_HAVE_WAIT_TYPE_SYNC_FILE
+#if defined(IREE_HAVE_WAIT_TYPE_PIPE)
+ // IREE_WAIT_PRIMITIVE_TYPE_PIPE
+ union {
+ struct {
+ int read_fd;
+ int write_fd;
+ };
+ int fds[2];
+ } pipe;
+#endif // IREE_HAVE_WAIT_TYPE_PIPE
+#if defined(IREE_HAVE_WAIT_TYPE_WIN32_HANDLE)
+ // IREE_WAIT_PRIMITIVE_TYPE_WIN32_HANDLE
+ struct {
+ uintptr_t handle;
+ } win32;
+#endif // IREE_HAVE_WAIT_TYPE_WIN32_HANDLE
+#if defined(IREE_HAVE_WAIT_TYPE_LOCAL_FUTEX)
+ /*iree_futex_handle_t*/ void* local_futex;
+#endif // IREE_HAVE_WAIT_TYPE_LOCAL_FUTEX
+} iree_wait_primitive_value_t;
+
+// A (type, value) pair describing a system wait primitive handle.
+typedef struct iree_wait_primitive_t {
+ iree_wait_primitive_type_t type;
+ iree_wait_primitive_value_t value;
+} iree_wait_primitive_t;
+
+// Returns a wait primitive with the given (|type|, |value|).
+static inline iree_wait_primitive_t iree_make_wait_primitive(
+ iree_wait_primitive_type_t type, iree_wait_primitive_value_t value) {
+ iree_wait_primitive_t primitive = {type, value};
+ return primitive;
+}
+
+// Returns a wait primitive that will resolve immediately if waited on.
+static inline iree_wait_primitive_t iree_wait_primitive_immediate(void) {
+ iree_wait_primitive_value_t dummy_primitive = {0};
+ return iree_make_wait_primitive(IREE_WAIT_PRIMITIVE_TYPE_NONE,
+ dummy_primitive);
+}
+
+// Returns true if the |wait_primitive| is resolved immediately (empty).
+static inline bool iree_wait_primitive_is_immediate(
+ iree_wait_primitive_t wait_primitive) {
+ return wait_primitive.type == IREE_WAIT_PRIMITIVE_TYPE_NONE;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_wait_source_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_wait_source_t iree_wait_source_t;
+
+// Controls the behavior of an iree_wait_source_ctl_fn_t callback function.
+typedef enum iree_wait_source_command_e {
+ // Queries the state of the wait source.
+ // Returns IREE_STATUS_DEFERRED if the wait source is not yet resolved.
+ //
+ // iree_wait_source_ctl_fn_t:
+ // params: unused
+ // inout_ptr: iree_status_code_t* out_wait_status_code
+ IREE_WAIT_SOURCE_COMMAND_QUERY = 0u,
+
+ // Tries to wait for the wait source to resolve.
+ // Returns IREE_STATUS_DEFERRED if the wait source does not support waiting.
+ //
+ // iree_wait_source_ctl_fn_t:
+ // params: iree_wait_source_wait_params_t
+ // inout_ptr: unused
+ IREE_WAIT_SOURCE_COMMAND_WAIT_ONE,
+
+ // Exports the wait source to a system wait handle.
+ //
+ // iree_wait_source_ctl_fn_t:
+ // params: iree_wait_source_export_params_t
+ // inout_ptr: iree_wait_primitive_t* out_wait_primitive
+ IREE_WAIT_SOURCE_COMMAND_EXPORT,
+} iree_wait_source_command_t;
+
+// Parameters for IREE_WAIT_SOURCE_COMMAND_WAIT_ONE.
+typedef struct iree_wait_source_wait_params_t {
+ // Timeout after which the wait will return even if the wait source is not
+ // resolved with IREE_STATUS_DEADLINE_EXCEEDED.
+ iree_timeout_t timeout;
+} iree_wait_source_wait_params_t;
+
+// Parameters for IREE_WAIT_SOURCE_COMMAND_EXPORT.
+typedef struct iree_wait_source_export_params_t {
+ // Indicates the target handle type of the export operation.
+ iree_wait_primitive_type_t target_type;
+ // Timeout after which the export will return even if the wait source is not
+ // yet available for export with IREE_STATUS_DEADLINE_EXCEEDED.
+ iree_timeout_t timeout;
+} iree_wait_source_export_params_t;
+
+// Function pointer for an iree_wait_source_t control function.
+// |command| provides the operation to perform. Optionally some commands may use
+// |params| to pass additional operation-specific parameters. |inout_ptr| usage
+// is defined by each operation.
+typedef iree_status_t(IREE_API_PTR* iree_wait_source_ctl_fn_t)(
+ iree_wait_source_t wait_source, iree_wait_source_command_t command,
+ const void* params, void** inout_ptr);
+
+// A wait source instance representing some future point in time.
+// Wait sources are promises for a system native wait handle that allow for
+// cheaper queries and waits when the full system wait path is not required.
+//
+// Wait sources may have user-defined implementations or come from system wait
+// handles via iree_wait_source_import.
+typedef struct iree_wait_source_t {
+ union {
+ struct {
+ // Control function data.
+ void* self;
+ // Implementation-defined data identifying the point in time.
+ uint64_t data;
+ };
+ // Large enough to store an iree_wait_handle_t, used when importing a
+ // system wait handle into a wait source.
+ uint64_t storage[2];
+ };
+ // ioctl-style control function servicing wait source commands.
+ // See iree_wait_source_command_t for more information.
+ iree_wait_source_ctl_fn_t ctl;
+} iree_wait_source_t;
+
+// Returns a wait source that will always immediately return as resolved.
+static inline iree_wait_source_t iree_wait_source_immediate(void) {
+ iree_wait_source_t v = {{{NULL, 0ull}}, NULL};
+ return v;
+}
+
+// Returns true if the |wait_source| is immediately resolved.
+// This can be used to neuter waits in lists/sets.
+static inline bool iree_wait_source_is_immediate(
+ iree_wait_source_t wait_source) {
+ return wait_source.ctl == NULL;
+}
+
+// Wait source control function for iree_wait_source_delay.
+IREE_API_EXPORT iree_status_t iree_wait_source_delay_ctl(
+ iree_wait_source_t wait_source, iree_wait_source_command_t command,
+ const void* params, void** inout_ptr);
+
+// Returns a wait source that indicates a delay until a point in time.
+// The source will remain unresolved until the |deadline_ns| is reached or
+// exceeded and afterward return resolved. Export is unavailable.
+static inline iree_wait_source_t iree_wait_source_delay(
+ iree_time_t deadline_ns) {
+ iree_wait_source_t v = {
+ {{NULL, (uint64_t)deadline_ns}},
+ iree_wait_source_delay_ctl,
+ };
+ return v;
+}
+
+// Returns true if the |wait_source| is a timed delay.
+// These are sleeps that can often be handled more intelligently by platforms.
+static inline bool iree_wait_source_is_delay(iree_wait_source_t wait_source) {
+ return wait_source.ctl == iree_wait_source_delay_ctl;
+}
+
+// Imports a system |wait_primitive| into a wait source in |out_wait_source|.
+// Ownership of the wait handle remains will the caller and it must remain valid
+// for the duration the wait source is in use.
+IREE_API_EXPORT iree_status_t iree_wait_source_import(
+ iree_wait_primitive_t wait_primitive, iree_wait_source_t* out_wait_source);
+
+// Exports a |wait_source| to a system wait primitive in |out_wait_primitive|.
+// If the wait source is already resolved then the wait handle will be set to
+// immediate and callers can check it with iree_wait_primitive_is_immediate.
+// If the wait source resolved with a failure then the error status will be
+// returned. The returned wait handle is owned by the wait source and will
+// remain valid for the lifetime of the wait source.
+//
+// Exporting may require a blocking operation and |timeout| can be used to
+// limit its duration.
+//
+// Returns IREE_STATUS_UNAVAILABLE if the requested primitive |target_type| is
+// unavailable on the current platform or from the given wait source.
+// Passing IREE_WAIT_PRIMITIVE_TYPE_ANY will allow the implementation to return
+// any primitive that it can.
+IREE_API_EXPORT iree_status_t iree_wait_source_export(
+ iree_wait_source_t wait_source, iree_wait_primitive_type_t target_type,
+ iree_timeout_t timeout, iree_wait_primitive_t* out_wait_primitive);
+
+// Queries the state of a |wait_source| without waiting.
+// |out_wait_status_code| will indicate the status of the source while the
+// returned value indicates the status of the query. |out_wait_status_code| will
+// be set to IREE_STATUS_DEFERRED if the wait source has not yet resolved and
+// IREE_STATUS_OK otherwise.
+IREE_API_EXPORT iree_status_t iree_wait_source_query(
+ iree_wait_source_t wait_source, iree_status_code_t* out_wait_status_code);
+
+// Blocks the caller and waits for a |wait_source| to resolve.
+// Returns IREE_STATUS_DEADLINE_EXCEEDED if |timeout| is reached before the
+// wait source resolves. If the wait source resolved with a failure then the
+// error status will be returned.
+IREE_API_EXPORT iree_status_t iree_wait_source_wait_one(
+ iree_wait_source_t wait_source, iree_timeout_t timeout);
+
+// TODO(benvanik): iree_wait_source_wait_any/all: allow multiple wait sources
+// that share the same control function. The implementation can decide if it
+// wants to coalesce them or not.
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_WAIT_SOURCE_H_
diff --git a/runtime/src/iree/builtins/BUILD b/runtime/src/iree/builtins/BUILD
new file mode 100644
index 0000000..f27d209
--- /dev/null
+++ b/runtime/src/iree/builtins/BUILD
@@ -0,0 +1,11 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
diff --git a/runtime/src/iree/builtins/CMakeLists.txt b/runtime/src/iree/builtins/CMakeLists.txt
new file mode 100644
index 0000000..954e388
--- /dev/null
+++ b/runtime/src/iree/builtins/CMakeLists.txt
@@ -0,0 +1,13 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/builtins/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/builtins/device/BUILD b/runtime/src/iree/builtins/device/BUILD
new file mode 100644
index 0000000..f670428
--- /dev/null
+++ b/runtime/src/iree/builtins/device/BUILD
@@ -0,0 +1,23 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_runtime_cc_library(
+ name = "device",
+ srcs = [
+ "device_generic.c",
+ ],
+ hdrs = [
+ "device.h",
+ ],
+)
diff --git a/runtime/src/iree/builtins/device/CMakeLists.txt b/runtime/src/iree/builtins/device/CMakeLists.txt
new file mode 100644
index 0000000..b16043e
--- /dev/null
+++ b/runtime/src/iree/builtins/device/CMakeLists.txt
@@ -0,0 +1,25 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/builtins/device/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ device
+ HDRS
+ "device.h"
+ SRCS
+ "device_generic.c"
+ DEPS
+
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/builtins/device/README.md b/runtime/src/iree/builtins/device/README.md
new file mode 100644
index 0000000..288ec4c
--- /dev/null
+++ b/runtime/src/iree/builtins/device/README.md
@@ -0,0 +1,213 @@
+IREE CPU Device Library: `libdevice`
+====================================
+
+This library provides builtin functions to the IREE generated CPU code. It
+covers the role of a compiler runtime library handling things like soft float
+builtin calls produced during code generation and a support library to ease
+implementation of more complex intrinsic-like functionality. The code in this
+library is compiled into bitcode files and embedded inside the IREE compiler
+which then links it into the generated code before emitting the final user
+output.
+
+```
++------------+ +-------+ +-------------------------------+
+| device_*.c | ---> | clang | ---> |+-------------------------------+
++------------+ +-------+ +| libdevice_[arch]_[variant].bc |
+ +-------------------------------+
+ |||
+ vvv
+ +------------+ +---------+ +================+
+ | input.mlir | ---> | codegen | ---> | iree-compile |
+ +------------+ +---------+ +================+
+ |
+ +----------------------------+
+ v v
+ +------------------------+ +----------------------------+
+ | static library (.o/.a) | | dynamic library (.so/.dll) |
+ +------------------------+ +----------------------------+
+```
+
+Good examples of things this library can provide:
+* float16/half support functions
+* MMA-like intrinsics for architecture-optimized tiled matrix multiplies
+* Atomic intrinsics
+
+Bad examples:
+* A full convolution kernel
+* Anything only used in only one particular configuration or target
+* Frequently changing code
+
+### Why Not C++ Passes?
+
+This approach of an external library that is linked in via bitcode is a tradeoff
+that favors a familiar environment for architecture-specific implementations and
+reusable code to custom MLIR passes that directly construct the IR. It will
+always be better from a technical standpoint to directly perform these
+specializations inside compiler passes as all information is available, multiple
+levels of optimization at MLIR `vector` and `llvm` dialect levels can hoist and
+fold aggressively, and specialization is possible using the entire context. It's
+encouraged that work is done there when possible and some of the cases handled
+by this library may end up being done in that environment.
+
+As a reusable library this approach allows for other backends - such as the IREE
+VMVX backend - to share the same optimized implementations. Having standalone
+tests and benchmarks also allows for fast iteration without needing to modify
+the compiler.
+
+The hope is that over time things added here will be moved into the compiler and
+this becomes mostly a lightweight intrinsics library and staging ground for
+experimental features that require quick iteration in C.
+
+## Bitcode Files
+
+The IREE compiler embeds bitcode files and when producing executable libraries
+will select one for linkage based on the specified target machine. As these
+bitcode files can only be produced by a cross-compilation-enabled Clang they are
+built offline and checked into the repository. Future improvements to the
+compiler could also allow for external files to be specified to avoid the need
+to rebuild the compiler however for now this keeps things simple and hermetic.
+
+The naming convention is `libdevice_[arch]_[features].bc`, corresponding to the
+source files of `device_[arch].c` with the features specifying conditional
+target CPU features such as extended instruction sets. When no special features
+are required `generic` is used.
+
+For example, the implementations for all ISA variants of AArch64 would be found
+in a `device_aarch64.c` and an implementation for the baseline ISA
+is compiled into `libdevice_aarch64_generic.bc`. When the dot product
+instructions are available (`-march=armv8.2-a+dotprod`) the more specialized
+`libdevice_aarch64_dotprod.bc` bitcode file would be used.
+
+### Updating Bitcode Files
+
+The bitcode files need to be rebuilt whenever the source is modified, new
+variants are added, or new architectures are targeted. The
+[`bin/build.sh`](bin/build.sh) uses a compatible Clang and LLVM toolchain to
+produce the files in the correct format and location.
+
+Requirements:
+* A modern version of Clang/LLVM (tested with 13)
+* A build of llvm-as with all target architectures linked in
+
+This script could use some usability improvements, but for now a common
+invocation will look like:
+```sh
+LLVM_AS=/usr/bin/llvm-as \
+CLANG=/usr/bin/clang-13 \
+./iree/builtins/device/bin/build.sh
+```
+
+If there are complaints that llvm-as does not support a target architecture then
+the llvm-as included in the IREE CMake distribution should be built and provided
+by way of the `IREE_BUILD_DIR`:
+```sh
+IREE_BUILD_DIR=../iree-build \
+CLANG=/usr/bin/clang-13 \
+./iree/builtins/device/bin/build.sh
+```
+
+After this the newly updated/added bitcode files can be added to git.
+
+### Compiler Bitcode Selection
+
+The logic in the compiler for selecting which bitcode file to use is found in
+[`iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.cpp`](/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.cpp).
+The `lookupDeviceFile` function uses the `llvm::TargetMachine` to query the
+architecture, CPU features, and other properties to choose the corresponding
+bitcode file. If no matching bitcode file is found a fallback of the WebAssembly
+generic implementation is used as its bitcode is generally portable. It's not
+fast, though, and should only be used for correctness testing during bringup.
+
+### Adding an Architecture/ISA Bitcode File
+
+First copy [`device_generic.c`](device_generic.c) and name it consistent with
+the canonical LLVM architecture (the first part of the target triple, e.g. if
+you pass `--target=aarch64-arm-none-eabi` to Clang you'd name it `aarch64`).
+
+From there guard the new file with the architecture-specific preprocessor guards
+and add the inverse to `device_generic.c` to prevent it from being used when the
+source files are globbed.
+
+To build the new bitcode file add a `make_arch_bc` call to [`bin/build.sh`](bin/build.sh).
+The flags provided are passed directly to Clang and can be used to control the
+compilation environment with the requirement being that the corresponding
+selection logic is updated in `Device.cpp`.
+
+Finally update the [`iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.cpp`](/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.cpp)
+file in the compiler to select the new bitcode file based on the
+`llvm::TargetMachine` in the same way that it is produced with `make_arch_bc`.
+
+Ergonomic improvements here would allow for function-level multi-versioning such
+that bitcode files per architecture could be used instead of requiring
+per-feature variants of each bitcode file.
+
+## Engineering Requirements
+
+As this library is directly merged into the compiler-generated code there are
+specific restrictions as to what can be used inherited from the IREE executable
+requirements:
+
+* No mutable globals/static variables or thread-local storage
+* No syscalls
+* No libc calls outside of builtins (like memset/memcpy) - _no mallocs_!
+
+Though the primary usage of the library is through the precompiled bitcode files
+that only need to work with Clang the library may also be built on other
+toolchains such as GCC and MSVC (or older version of Clang). When standard
+intrinsics are used this will generally not be a problem however inline assembly
+may need compiler-specific variants or at least exclusions that fall back to
+generic paths.
+
+### Compile-time Configuration
+
+Preprocessor statements used to control behavior must only use information known
+when the bitcode files are being compiled. This means that if the bitcode file
+being produced is for AArch64 it is safe to use the `__aarch64__` macro.
+Information that is only available after the bitcode file is produced - such as
+in the IREE compiler pipelines - must use link-time configuration.
+
+### Link-time Configuration
+
+As we are producing bitcode files we cannot rely on the C preprocessor for
+changing behavior based on some information only known during linking. In other
+cases we may want to specialize code paths based on knowledge about the context
+in which the kernels are used. To provide this link-time modification ability
+there is support for flags by way of `extern` globals. These globals are either
+specified by the IREE compiler when linking the bitcode or by the hosting
+application when linked statically.
+
+Each flag is defined in `device.h`; for example:
+```c
+extern int libdevice_platform_example_flag;
+```
+
+Any code may then use this flag to condition/control behavior:
+```c
+if (libdevice_platform_example_flag >= 1) {
+ // Do something special.
+}
+```
+
+When linking libdevice statically the flags can be provided by the hosting
+application via compiler defines: `-DLIBDEVICE_PLATFORM_EXAMPLE_FLAG=123`.
+
+When producing bitcode the flags are left symbolic and the IREE compiler
+provides their values:
+```c++
+overridePlatformGlobal(*bitcodeModule, "libdevice_platform_example_flag", 123u);
+```
+
+What flags are useful and how to handle cases where flags are arch-dependent are
+still TBD.
+
+## Testing and Benchmarking
+
+[`tools/libdevice_test.cc`](tools/libdevice_test.cc) provides a gtest runner
+that compares the results of the optimized implementations for the target
+architecture against a reference implementation for correctness.
+
+[`tools/libdevice_benchmark.c`](tools/libdevice_benchmark.c) provides a
+benchmark suite for the optimized implementations of the target architecture.
+
+Both are compiled for the CMake target and can be used to develop
+implementations without the need to rebuild/run the compiler.
diff --git a/runtime/src/iree/builtins/device/bin/BUILD b/runtime/src/iree/builtins/device/bin/BUILD
new file mode 100644
index 0000000..286e32a
--- /dev/null
+++ b/runtime/src/iree/builtins/device/bin/BUILD
@@ -0,0 +1,28 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/embed_data:build_defs.bzl", "c_embed_data")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+c_embed_data(
+ name = "libdevice",
+ srcs = [
+ "libdevice_wasm32_generic.bc",
+ "libdevice_wasm64_generic.bc",
+ ],
+ c_file_output = "libdevice.c",
+ flatten = True,
+ h_file_output = "libdevice.h",
+ identifier = "iree_builtins_libdevice",
+ deps = [
+ "//runtime/src:runtime_defines",
+ ],
+)
diff --git a/runtime/src/iree/builtins/device/bin/CMakeLists.txt b/runtime/src/iree/builtins/device/bin/CMakeLists.txt
new file mode 100644
index 0000000..105bf87
--- /dev/null
+++ b/runtime/src/iree/builtins/device/bin/CMakeLists.txt
@@ -0,0 +1,31 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/builtins/device/bin/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_c_embed_data(
+ NAME
+ libdevice
+ SRCS
+ "libdevice_wasm32_generic.bc"
+ "libdevice_wasm64_generic.bc"
+ DEPS
+
+ C_FILE_OUTPUT
+ "libdevice.c"
+ H_FILE_OUTPUT
+ "libdevice.h"
+ IDENTIFIER
+ "iree_builtins_libdevice"
+ FLATTEN
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/builtins/device/bin/build.sh b/runtime/src/iree/builtins/device/bin/build.sh
new file mode 100644
index 0000000..11f793a
--- /dev/null
+++ b/runtime/src/iree/builtins/device/bin/build.sh
@@ -0,0 +1,67 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Example command line:
+# LLVM_AS=/usr/bin/llvm-as \
+# CLANG=/usr/bin/clang-13 \
+# ./iree/builtins/device/bin/build.sh
+
+set -x
+set -e
+
+CLANG="${CLANG:-clang}"
+# TODO(benvanik): figure out how to get this path from clang itself.
+CLANG_INCLUDE="${CLANG_INCLUDE:-/usr/lib/llvm-13/lib/clang/13.0.0/include/}"
+IREE_SRC_DIR="$(git rev-parse --show-toplevel)"
+IREE_BUILD_DIR="${IREE_BUILD_DIR:-${IREE_SRC_DIR?}/../build}"
+LLVM_AS="${LLVM_AS:-${IREE_BUILD_DIR}/third_party/llvm-project/llvm/bin/llvm-as}"
+
+SCRIPT_DIR="$(realpath `dirname $0`)"
+OUT="${SCRIPT_DIR?}/"
+SRC="${SCRIPT_DIR?}/.."
+
+function make_arch_bc {
+ local ARCH=$1
+ local FEATURES=$2
+ local SOURCE_FILE=$3
+ local FILE_BASENAME="${OUT}/libdevice_${ARCH}_${FEATURES}"
+
+ # Generate an LLVM IR assembly listing so we can easily read the file.
+ # This is not checked in or used by the compiler.
+ ${CLANG?} \
+ "${@:4}" \
+ -isystem "${CLANG_INCLUDE?}" \
+ -std=c17 \
+ -O3 \
+ -fno-ident \
+ -fvisibility=hidden \
+ -nostdinc \
+ -S \
+ -emit-llvm \
+ -fdiscard-value-names \
+ -DIREE_DEVICE_STANDALONE \
+ -o "${FILE_BASENAME}.ll" \
+ -c \
+ "${SRC}/${SOURCE_FILE}"
+
+ # Clang adds a bunch of bad attributes and host-specific information that we
+ # don't want (so we get at least somewhat deterministic builds).
+ sed -i 's/^;.*$//' "${FILE_BASENAME}.ll"
+ sed -i 's/^source_filename.*$//' "${FILE_BASENAME}.ll"
+ sed -i 's/^target datalayout.*$//' "${FILE_BASENAME}.ll"
+ sed -i 's/^target triple.*$//' "${FILE_BASENAME}.ll"
+ sed -i 's/^\(attributes #[0-9]* = {\).*$/\1 inlinehint }/' "${FILE_BASENAME}.ll"
+
+ # Generate a binary bitcode file embedded into the compiler binary.
+ # NOTE: we do this from stdin so that the filename on the user's system is not
+ # embedded in the bitcode file (making it non-deterministic).
+ cat "${FILE_BASENAME}.ll" | ${LLVM_AS} -o="${FILE_BASENAME}.bc"
+}
+
+make_arch_bc "wasm32" "generic" "device_generic.c" \
+ --target=wasm32
+make_arch_bc "wasm64" "generic" "device_generic.c" \
+ --target=wasm64
diff --git a/runtime/src/iree/builtins/device/bin/libdevice_wasm32_generic.bc b/runtime/src/iree/builtins/device/bin/libdevice_wasm32_generic.bc
new file mode 100644
index 0000000..26e2310
--- /dev/null
+++ b/runtime/src/iree/builtins/device/bin/libdevice_wasm32_generic.bc
Binary files differ
diff --git a/runtime/src/iree/builtins/device/bin/libdevice_wasm64_generic.bc b/runtime/src/iree/builtins/device/bin/libdevice_wasm64_generic.bc
new file mode 100644
index 0000000..26e2310
--- /dev/null
+++ b/runtime/src/iree/builtins/device/bin/libdevice_wasm64_generic.bc
Binary files differ
diff --git a/runtime/src/iree/builtins/device/device.h b/runtime/src/iree/builtins/device/device.h
new file mode 100644
index 0000000..4378101
--- /dev/null
+++ b/runtime/src/iree/builtins/device/device.h
@@ -0,0 +1,120 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BUILTINS_DEVICE_DEVICE_H_
+#define IREE_BUILTINS_DEVICE_DEVICE_H_
+
+//===----------------------------------------------------------------------===//
+// A simplified libc/libm-alike that is designed to compile to portable LLVM IR.
+//===----------------------------------------------------------------------===//
+// This library is focused on supporting the subset of LLVM's RuntimeLibcalls
+// that we need in our embedded executable binaries. This means that things like
+// printf, malloc, etc are excluded.
+//
+// See the full list of possible functions here:
+// third_party/llvm-project/llvm/include/llvm/IR/RuntimeLibcalls.def
+//
+// Code here must not use any system headers - as almost all pull in bits/ and
+// various other target-dependent definitions that make the resulting IR
+// non-portable. This means there is no size_t, etc. Any definitions that may
+// come from an std* file must be redefined here with care.
+//
+// Code must also not use any mutable global or thread-local state ala
+// errno/rounding modes/etc. Each of the functions in the library will be called
+// concurrently from multiple threads and from multiple source modules. There
+// must be no mutable static values anywhere.
+//
+// Avoid #ifdef entirely: they indicate a leakage of host build configuration
+// into what is supposed to be a portable module. Anything that requires
+// target-specific conditional logic must be implemented via an extern that
+// can be substituted by the IREE compiler when producing the final
+// target-specific module.
+
+//===----------------------------------------------------------------------===//
+// Configuration
+//===----------------------------------------------------------------------===//
+
+// IREE_DEVICE_STANDALONE:
+// Define to have libdevice's implementation of builtins alias the standard
+// names. If undefined then the host toolchain implementations will be used.
+
+//===----------------------------------------------------------------------===//
+// Attributes and metadata
+//===----------------------------------------------------------------------===//
+
+// Tagged on functions that are part of the public API.
+#ifdef __cplusplus
+#define IREE_DEVICE_EXPORT extern "C"
+#else
+#define IREE_DEVICE_EXPORT
+#endif // __cplusplus
+
+// `restrict` keyword, not supported by some older compilers.
+// We define our own macro in case dependencies use `restrict` differently.
+#if defined(_MSC_VER) && _MSC_VER >= 1900
+#define IREE_DEVICE_RESTRICT __restrict
+#elif defined(_MSC_VER)
+#define IREE_DEVICE_RESTRICT
+#elif defined(__cplusplus)
+#define IREE_DEVICE_RESTRICT __restrict__
+#else
+#define IREE_DEVICE_RESTRICT restrict
+#endif // _MSC_VER
+
+//===----------------------------------------------------------------------===//
+// stdint.h
+//===----------------------------------------------------------------------===//
+// https://pubs.opengroup.org/onlinepubs/009604599/basedefs/stdint.h.html
+// NOTE: no size_t/ptrdiff_t/etc (as they are target dependent).
+
+#if !defined(INT8_MIN)
+
+typedef signed char int8_t;
+typedef short int16_t;
+typedef int int32_t;
+typedef long long int64_t;
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+
+#define INT8_MIN (-127i8 - 1)
+#define INT16_MIN (-32767i16 - 1)
+#define INT32_MIN (-2147483647i32 - 1)
+#define INT64_MIN (-9223372036854775807i64 - 1)
+#define INT8_MAX 127i8
+#define INT16_MAX 32767i16
+#define INT32_MAX 2147483647i32
+#define INT64_MAX 9223372036854775807i64
+#define UINT8_MAX 0xffui8
+#define UINT16_MAX 0xffffui16
+#define UINT32_MAX 0xffffffffui32
+#define UINT64_MAX 0xffffffffffffffffui64
+
+#endif // !INT8_MIN
+
+//===----------------------------------------------------------------------===//
+// Target-specific queries
+//===----------------------------------------------------------------------===//
+// These are substituted with values from the compiler and must not be specified
+// here in C before we generate the IR.
+
+// Do not use: here as an example. Remove once we have any other flag.
+extern int libdevice_platform_example_flag;
+// The value used when not coming from the compiler.
+#define LIBDEVICE_PLATFORM_EXAMPLE_FLAG 0
+
+//===----------------------------------------------------------------------===//
+// Public API
+//===----------------------------------------------------------------------===//
+
+// Converts a 16-bit floating-point value to a 32-bit C `float`.
+IREE_DEVICE_EXPORT float iree_h2f_ieee(short param);
+
+// Converts a 32-bit C `float` value to a 16-bit floating-point value.
+IREE_DEVICE_EXPORT short iree_f2h_ieee(float param);
+
+#endif // IREE_BUILTINS_DEVICE_DEVICE_H_
diff --git a/runtime/src/iree/builtins/device/device_generic.c b/runtime/src/iree/builtins/device/device_generic.c
new file mode 100644
index 0000000..3d55f71
--- /dev/null
+++ b/runtime/src/iree/builtins/device/device_generic.c
@@ -0,0 +1,121 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "device.h"
+
+#if !defined(IREE_DEVICE_STANDALONE)
+int libdevice_platform_example_flag = LIBDEVICE_PLATFORM_EXAMPLE_FLAG;
+#endif // IREE_DEVICE_STANDALONE
+
+IREE_DEVICE_EXPORT float iree_h2f_ieee(short param) {
+ unsigned short expHalf16 = param & 0x7C00;
+ int exp1 = (int)expHalf16;
+ unsigned short mantissa16 = param & 0x03FF;
+ int mantissa1 = (int)mantissa16;
+ int sign = (int)(param & 0x8000);
+ sign = sign << 16;
+
+ // nan or inf
+ if (expHalf16 == 0x7C00) {
+ // nan
+ if (mantissa16 > 0) {
+ int res = (0x7FC00000 | sign);
+ float fres = *((float*)(&res));
+ return fres;
+ }
+ // inf
+ int res = (0x7F800000 | sign);
+ float fres = *((float*)(&res));
+ return fres;
+ }
+ if (expHalf16 != 0) {
+ exp1 += ((127 - 15) << 10); // exponents converted to float32 bias
+ int res = (exp1 | mantissa1);
+ res = res << 13;
+ res = (res | sign);
+ float fres = *((float*)(&res));
+ return fres;
+ }
+
+ int xmm1 = exp1 > (1 << 10) ? exp1 : (1 << 10);
+ xmm1 = (xmm1 << 13);
+ xmm1 += ((127 - 15 - 10) << 23); // add the bias difference to xmm1
+ xmm1 = xmm1 | sign; // Combine with the sign mask
+
+ float res = (float)mantissa1; // Convert mantissa to float
+ res *= *((float*)(&xmm1));
+
+ return res;
+}
+
+IREE_DEVICE_EXPORT short iree_f2h_ieee(float param) {
+ unsigned int param_bit = *((unsigned int*)(¶m));
+ int sign = param_bit >> 31;
+ int mantissa = param_bit & 0x007FFFFF;
+ int exp = ((param_bit & 0x7F800000) >> 23) + 15 - 127;
+ short res;
+ if (exp > 0 && exp < 30) {
+ // use rte rounding mode, round the significand, combine sign, exponent and
+ // significand into a short.
+ res = (sign << 15) | (exp << 10) | ((mantissa + 0x00001000) >> 13);
+ } else if (param_bit == 0) {
+ res = 0;
+ } else {
+ if (exp <= 0) {
+ if (exp < -10) {
+ // value is less than min half float point
+ res = 0;
+ } else {
+ // normalized single, magnitude is less than min normal half float
+ // point.
+ mantissa = (mantissa | 0x00800000) >> (1 - exp);
+ // round to nearest
+ if ((mantissa & 0x00001000) > 0) {
+ mantissa = mantissa + 0x00002000;
+ }
+ // combine sign & mantissa (exp is zero to get denormalized number)
+ res = (sign << 15) | (mantissa >> 13);
+ }
+ } else if (exp == (255 - 127 + 15)) {
+ if (mantissa == 0) {
+ // input float is infinity, return infinity half
+ res = (sign << 15) | 0x7C00;
+ } else {
+ // input float is NaN, return half NaN
+ res = (sign << 15) | 0x7C00 | (mantissa >> 13);
+ }
+ } else {
+ // exp > 0, normalized single, round to nearest
+ if ((mantissa & 0x00001000) > 0) {
+ mantissa = mantissa + 0x00002000;
+ if ((mantissa & 0x00800000) > 0) {
+ mantissa = 0;
+ exp = exp + 1;
+ }
+ }
+ if (exp > 30) {
+ // exponent overflow - return infinity half
+ res = (sign << 15) | 0x7C00;
+ } else {
+ // combine sign, exp and mantissa into normalized half
+ res = (sign << 15) | (exp << 10) | (mantissa >> 13);
+ }
+ }
+ }
+ return res;
+}
+
+#if defined(IREE_DEVICE_STANDALONE)
+
+IREE_DEVICE_EXPORT float __gnu_h2f_ieee(short param) {
+ return iree_h2f_ieee(param);
+}
+
+IREE_DEVICE_EXPORT short __gnu_f2h_ieee(float param) {
+ return iree_f2h_ieee(param);
+}
+
+#endif // IREE_DEVICE_STANDALONE
diff --git a/runtime/src/iree/builtins/device/tools/BUILD b/runtime/src/iree/builtins/device/tools/BUILD
new file mode 100644
index 0000000..de878d0
--- /dev/null
+++ b/runtime/src/iree/builtins/device/tools/BUILD
@@ -0,0 +1,37 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/bazel:cc_binary_benchmark.bzl", "cc_binary_benchmark")
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_test")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+cc_binary_benchmark(
+ name = "libdevice_benchmark",
+ srcs = ["libdevice_benchmark.c"],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base/internal:flags",
+ "//runtime/src/iree/builtins/device",
+ "//runtime/src/iree/testing:benchmark",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "libdevice_test",
+ srcs = ["libdevice_test.cc"],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base/internal:flags",
+ "//runtime/src/iree/builtins/device",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
diff --git a/runtime/src/iree/builtins/device/tools/CMakeLists.txt b/runtime/src/iree/builtins/device/tools/CMakeLists.txt
new file mode 100644
index 0000000..70e68f0
--- /dev/null
+++ b/runtime/src/iree/builtins/device/tools/CMakeLists.txt
@@ -0,0 +1,39 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/builtins/device/tools/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_binary_benchmark(
+ NAME
+ libdevice_benchmark
+ SRCS
+ "libdevice_benchmark.c"
+ DEPS
+ iree::base
+ iree::base::internal::flags
+ iree::builtins::device
+ iree::testing::benchmark
+ TESTONLY
+)
+
+iree_cc_test(
+ NAME
+ libdevice_test
+ SRCS
+ "libdevice_test.cc"
+ DEPS
+ iree::base
+ iree::base::internal::flags
+ iree::builtins::device
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/builtins/device/tools/libdevice_benchmark.c b/runtime/src/iree/builtins/device/tools/libdevice_benchmark.c
new file mode 100644
index 0000000..0814f56
--- /dev/null
+++ b/runtime/src/iree/builtins/device/tools/libdevice_benchmark.c
@@ -0,0 +1,78 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/api.h"
+#include "iree/base/internal/flags.h"
+#include "iree/builtins/device/device.h"
+#include "iree/testing/benchmark.h"
+
+// Example flag; not really useful:
+IREE_FLAG(int32_t, batch_count, 64, "Ops to run per benchmark iteration.");
+
+static iree_status_t iree_h2f_ieee_benchmark(
+ const iree_benchmark_def_t* benchmark_def,
+ iree_benchmark_state_t* benchmark_state) {
+ while (iree_benchmark_keep_running(benchmark_state,
+ /*batch_count=*/FLAG_batch_count)) {
+ for (int i = 0; i < FLAG_batch_count; ++i) {
+ // TODO(benvanik): iree_do_not_optimize barrier.
+ iree_h2f_ieee(0x3400 + i);
+ }
+ }
+ return iree_ok_status();
+}
+
+static iree_status_t iree_f2h_ieee_benchmark(
+ const iree_benchmark_def_t* benchmark_def,
+ iree_benchmark_state_t* benchmark_state) {
+ while (iree_benchmark_keep_running(benchmark_state,
+ /*batch_count=*/FLAG_batch_count)) {
+ for (int i = 0; i < FLAG_batch_count; ++i) {
+ // TODO(benvanik): iree_do_not_optimize barrier.
+ iree_f2h_ieee(0.25f + i);
+ }
+ }
+ return iree_ok_status();
+}
+
+int main(int argc, char** argv) {
+ iree_flags_set_usage(
+ "libdevice_benchmark",
+ "Benchmarks the libdevice implementation of the target machine.\n"
+ "\n");
+
+ iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_UNDEFINED_OK, &argc, &argv);
+ iree_benchmark_initialize(&argc, argv);
+
+ {
+ static const iree_benchmark_def_t benchmark_def = {
+ .flags = IREE_BENCHMARK_FLAG_MEASURE_PROCESS_CPU_TIME |
+ IREE_BENCHMARK_FLAG_USE_REAL_TIME,
+ .time_unit = IREE_BENCHMARK_UNIT_NANOSECOND,
+ .minimum_duration_ns = 0,
+ .iteration_count = 0,
+ .run = iree_h2f_ieee_benchmark,
+ .user_data = NULL,
+ };
+ iree_benchmark_register(IREE_SV("iree_h2f_ieee"), &benchmark_def);
+ }
+
+ {
+ static const iree_benchmark_def_t benchmark_def = {
+ .flags = IREE_BENCHMARK_FLAG_MEASURE_PROCESS_CPU_TIME |
+ IREE_BENCHMARK_FLAG_USE_REAL_TIME,
+ .time_unit = IREE_BENCHMARK_UNIT_NANOSECOND,
+ .minimum_duration_ns = 0,
+ .iteration_count = 0,
+ .run = iree_f2h_ieee_benchmark,
+ .user_data = NULL,
+ };
+ iree_benchmark_register(IREE_SV("iree_f2h_ieee"), &benchmark_def);
+ }
+
+ iree_benchmark_run_specified();
+ return 0;
+}
diff --git a/runtime/src/iree/builtins/device/tools/libdevice_test.cc b/runtime/src/iree/builtins/device/tools/libdevice_test.cc
new file mode 100644
index 0000000..adeed0a
--- /dev/null
+++ b/runtime/src/iree/builtins/device/tools/libdevice_test.cc
@@ -0,0 +1,22 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cstring>
+
+#include "iree/base/api.h"
+#include "iree/builtins/device/device.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+TEST(LibDeviceTest, iree_h2f_ieee) {
+ // Just ensuring that the code links.
+ EXPECT_EQ(0.25f, iree_h2f_ieee(0x3400));
+}
+
+TEST(LibDeviceTest, iree_f2h_ieee) {
+ // Just ensuring that the code links.
+ EXPECT_EQ(0x3400, iree_f2h_ieee(0.25f));
+}
diff --git a/runtime/src/iree/builtins/musl/BUILD b/runtime/src/iree/builtins/musl/BUILD
new file mode 100644
index 0000000..f27d209
--- /dev/null
+++ b/runtime/src/iree/builtins/musl/BUILD
@@ -0,0 +1,11 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
diff --git a/runtime/src/iree/builtins/musl/CMakeLists.txt b/runtime/src/iree/builtins/musl/CMakeLists.txt
new file mode 100644
index 0000000..8da1a73
--- /dev/null
+++ b/runtime/src/iree/builtins/musl/CMakeLists.txt
@@ -0,0 +1,13 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/builtins/musl/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/builtins/musl/Makefile_wasm32.iree b/runtime/src/iree/builtins/musl/Makefile_wasm32.iree
new file mode 100644
index 0000000..b79444b
--- /dev/null
+++ b/runtime/src/iree/builtins/musl/Makefile_wasm32.iree
@@ -0,0 +1,35 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+musldir=${MUSL_DIR}
+include $(musldir)/Makefile
+
+IREE_BASE_SRCS = $(addprefix $(srcdir)/, \
+ src/fenv/fenv.c \
+ src/math/ceilf.c \
+ src/math/floorf.c \
+ src/math/fmaf.c \
+ src/math/fmodf.c \
+ src/math/powf.c \
+ src/math/expf.c \
+ src/math/powf_data.c \
+ src/math/exp2f_data.c \
+ src/math/__math_invalidf.c \
+ src/math/__math_oflowf.c \
+ src/math/__math_uflowf.c \
+ src/math/__math_xflowf.c)
+IREE_BASE_LLS = $(patsubst $(srcdir)/%,%.ll,$(basename $(IREE_BASE_SRCS)))
+IREE_BASE_BCS = $(patsubst $(srcdir)/%,%.bc,$(basename $(IREE_BASE_SRCS)))
+IREE_LL_FILES = $(addprefix obj/, $(IREE_BASE_LLS))
+IREE_CFLAGS=-Xclang -disable-llvm-passes -fno-ident -fvisibility=hidden -target wasm32
+LL_CMD = $(CC) $(CFLAGS_ALL) $(IREE_CFLAGS) -S -emit-llvm -o $@ -c $<
+
+obj/%.ll: $(musldir)/%.c obj/include/bits/alltypes.h
+ $(LL_CMD)
+
+iree: $(IREE_LL_FILES)
+ $(info $$IREE_BASE_SRCS is [${IREE_BASE_SRCS}])
+ $(info $$IREE_LL_FILES is [${IREE_LL_FILES}])
diff --git a/runtime/src/iree/builtins/musl/Makefile_wasm64.iree b/runtime/src/iree/builtins/musl/Makefile_wasm64.iree
new file mode 100644
index 0000000..5e3d956
--- /dev/null
+++ b/runtime/src/iree/builtins/musl/Makefile_wasm64.iree
@@ -0,0 +1,35 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+musldir=${MUSL_DIR}
+include $(musldir)/Makefile
+
+IREE_BASE_SRCS = $(addprefix $(srcdir)/, \
+ src/fenv/fenv.c \
+ src/math/ceilf.c \
+ src/math/floorf.c \
+ src/math/fmaf.c \
+ src/math/fmodf.c \
+ src/math/powf.c \
+ src/math/expf.c \
+ src/math/powf_data.c \
+ src/math/exp2f_data.c \
+ src/math/__math_invalidf.c \
+ src/math/__math_oflowf.c \
+ src/math/__math_uflowf.c \
+ src/math/__math_xflowf.c)
+IREE_BASE_LLS = $(patsubst $(srcdir)/%,%.ll,$(basename $(IREE_BASE_SRCS)))
+IREE_BASE_BCS = $(patsubst $(srcdir)/%,%.bc,$(basename $(IREE_BASE_SRCS)))
+IREE_LL_FILES = $(addprefix obj/, $(IREE_BASE_LLS))
+IREE_CFLAGS=-Xclang -disable-llvm-passes -fno-ident -fvisibility=hidden -target wasm64
+LL_CMD = $(CC) $(CFLAGS_ALL) $(IREE_CFLAGS) -S -emit-llvm -o $@ -c $<
+
+obj/%.ll: $(musldir)/%.c obj/include/bits/alltypes.h
+ $(LL_CMD)
+
+iree: $(IREE_LL_FILES)
+ $(info $$IREE_BASE_SRCS is [${IREE_BASE_SRCS}])
+ $(info $$IREE_LL_FILES is [${IREE_LL_FILES}])
diff --git a/runtime/src/iree/builtins/musl/bin/BUILD b/runtime/src/iree/builtins/musl/bin/BUILD
new file mode 100644
index 0000000..d9a2529
--- /dev/null
+++ b/runtime/src/iree/builtins/musl/bin/BUILD
@@ -0,0 +1,28 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/embed_data:build_defs.bzl", "c_embed_data")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+c_embed_data(
+ name = "libmusl",
+ srcs = [
+ "libmusl_wasm32_generic.bc",
+ "libmusl_wasm64_generic.bc",
+ ],
+ c_file_output = "libmusl.c",
+ flatten = True,
+ h_file_output = "libmusl.h",
+ identifier = "iree_builtins_libmusl",
+ deps = [
+ "//runtime/src:runtime_defines",
+ ],
+)
diff --git a/runtime/src/iree/builtins/musl/bin/CMakeLists.txt b/runtime/src/iree/builtins/musl/bin/CMakeLists.txt
new file mode 100644
index 0000000..433fd58
--- /dev/null
+++ b/runtime/src/iree/builtins/musl/bin/CMakeLists.txt
@@ -0,0 +1,31 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/builtins/musl/bin/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_c_embed_data(
+ NAME
+ libmusl
+ SRCS
+ "libmusl_wasm32_generic.bc"
+ "libmusl_wasm64_generic.bc"
+ DEPS
+
+ C_FILE_OUTPUT
+ "libmusl.c"
+ H_FILE_OUTPUT
+ "libmusl.h"
+ IDENTIFIER
+ "iree_builtins_libmusl"
+ FLATTEN
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/builtins/musl/bin/build.sh b/runtime/src/iree/builtins/musl/bin/build.sh
new file mode 100755
index 0000000..c024088
--- /dev/null
+++ b/runtime/src/iree/builtins/musl/bin/build.sh
@@ -0,0 +1,80 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Example command line:
+# LLVM_AS=/usr/bin/llvm-as \
+# LLVM_LINK=/usr/bin/llvm-link \
+# CLANG=/usr/bin/clang-13 \
+# ./iree/builtins/musl/bin/build.sh
+
+set -x
+set -e
+
+CLANG="${CLANG:-clang}"
+CLANGXX="${CLANGXX:-$(which clang++)}"
+LLVM_AS="${LLVM_AS:-${IREE_BUILD_DIR}/third_party/llvm-project/llvm/bin/llvm-as}"
+LLVM_LINK="${LLVM_LINK:-${IREE_BUILD_DIR}/third_party/llvm-project/llvm/bin/llvm-link}"
+LLVM_OPT="${LLVM_OPT:-${IREE_BUILD_DIR}/third_party/llvm-project/llvm/bin/opt}"
+
+IREE_SRC_DIR="$(git rev-parse --show-toplevel)"
+IREE_BUILD_DIR="${IREE_BUILD_DIR:-${IREE_SRC_DIR?}/../build}"
+
+SCRIPT_DIR="$(realpath `dirname $0`)"
+OUT="${SCRIPT_DIR?}/"
+SRC="${SCRIPT_DIR?}/.."
+
+function make_arch_bc {
+ local ARCH=$1
+ local FEATURES=$2
+ local FILE_BASENAME="${OUT}/libmusl_${ARCH}_${FEATURES}"
+ local MUSL_MAKEFILE="${SCRIPT_DIR?}/../Makefile_${ARCH}.iree"
+
+ # Generate IR with 32-bit target.
+ MUSL_DIR=${IREE_SRC_DIR?}/third_party/musl
+ cd ${MUSL_DIR}
+ rm -rf obj/
+ CC=${CLANG?} CXX=${CLANGXX?} ./configure
+ MUSL_DIR=${MUSL_DIR} make -f ${MUSL_MAKEFILE} iree
+ MUSL_LL_FILES=`find obj/ -name "*.ll"`
+ cp ${MUSL_LL_FILES?} ${OUT}
+ rm ${MUSL_LL_FILES?}
+ cd ${SCRIPT_DIR?}
+
+ ALL_LL_FILES=`find ${OUT} -name "*.ll"`
+
+ cd ${OUT}
+ # git restore ${FILE_BASENAME}.bc
+ for file in ${ALL_LL_FILES}
+ do
+ # Run full LLVM optimizations.
+ # TODO(benvanik): defer this? Some of these opts may not be portable/safe.
+ ${LLVM_OPT?} ${file} -O3 -S -o ${file}.opt.ll
+
+ # Clang adds a bunch of bad attributes and host-specific information that we
+ # don't want (so we get at least somewhat deterministic builds).
+ sed -i 's/^;.*$//' "${file}.opt.ll"
+ sed -i 's/^source_filename.*$//' "${file}.opt.ll"
+ sed -i 's/^target datalayout.*$//' "${file}.opt.ll"
+ sed -i 's/^target triple.*$//' "${file}.opt.ll"
+ sed -i 's/^\(attributes #[0-9]* = {\).*$/\1 inlinehint }/' "${file}.opt.ll"
+
+ # Generate a binary bitcode file embedded into the compiler binary.
+ # NOTE: we do this from stdin so that the filename on the user's system is not
+ # embedded in the bitcode file (making it non-deterministic).
+ cat ${file}.opt.ll | ${LLVM_AS?} -o=${file}.opt.ll.bc
+ rm ${file}.opt.ll
+ done
+ rm ${ALL_LL_FILES}
+
+ ALL_BC_FILES=`ls *.ll.bc`
+ ${LLVM_LINK?} ${ALL_BC_FILES} -o ${FILE_BASENAME}.bc
+ rm ${ALL_BC_FILES}
+}
+
+make_arch_bc "wasm32" "generic" \
+ --target=wasm32
+make_arch_bc "wasm64" "generic" \
+ --target=wasm64
diff --git a/runtime/src/iree/builtins/musl/bin/libmusl_wasm32_generic.bc b/runtime/src/iree/builtins/musl/bin/libmusl_wasm32_generic.bc
new file mode 100644
index 0000000..02ecf00
--- /dev/null
+++ b/runtime/src/iree/builtins/musl/bin/libmusl_wasm32_generic.bc
Binary files differ
diff --git a/runtime/src/iree/builtins/musl/bin/libmusl_wasm64_generic.bc b/runtime/src/iree/builtins/musl/bin/libmusl_wasm64_generic.bc
new file mode 100644
index 0000000..3e6adcf
--- /dev/null
+++ b/runtime/src/iree/builtins/musl/bin/libmusl_wasm64_generic.bc
Binary files differ
diff --git a/runtime/src/iree/hal/BUILD b/runtime/src/iree/hal/BUILD
new file mode 100644
index 0000000..97d9270
--- /dev/null
+++ b/runtime/src/iree/hal/BUILD
@@ -0,0 +1,90 @@
+# Copyright 2019 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# HAL (Hardware Abstraction Layer).
+# Subdirectories contain implementations for different hardware and
+# software backends.
+
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library", "iree_runtime_cc_test")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+#===------------------------------------------------------------------------===#
+# Public API
+#===------------------------------------------------------------------------===#
+
+iree_runtime_cc_library(
+ name = "hal",
+ srcs = [
+ "allocator.c",
+ "allocator.h",
+ "allocator_heap.c",
+ "buffer.c",
+ "buffer.h",
+ "buffer_heap.c",
+ "buffer_heap_impl.h",
+ "buffer_view.c",
+ "buffer_view.h",
+ "buffer_view_util.c",
+ "buffer_view_util.h",
+ "command_buffer.c",
+ "command_buffer.h",
+ "command_buffer_validation.c",
+ "command_buffer_validation.h",
+ "descriptor_set.c",
+ "descriptor_set.h",
+ "descriptor_set_layout.c",
+ "descriptor_set_layout.h",
+ "detail.h",
+ "device.c",
+ "device.h",
+ "driver.c",
+ "driver.h",
+ "driver_registry.c",
+ "driver_registry.h",
+ "event.c",
+ "event.h",
+ "executable.c",
+ "executable.h",
+ "executable_cache.c",
+ "executable_cache.h",
+ "executable_layout.c",
+ "executable_layout.h",
+ "resource.h",
+ "semaphore.c",
+ "semaphore.h",
+ "string_util.c",
+ "string_util.h",
+ ],
+ hdrs = [
+ "api.h",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal",
+ "//runtime/src/iree/base/internal:synchronization",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "string_util_test",
+ srcs = ["string_util_test.cc"],
+ deps = [
+ ":hal",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:cc",
+ "//runtime/src/iree/base/internal:span",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
diff --git a/runtime/src/iree/hal/CMakeLists.txt b/runtime/src/iree/hal/CMakeLists.txt
new file mode 100644
index 0000000..a4e2bbb
--- /dev/null
+++ b/runtime/src/iree/hal/CMakeLists.txt
@@ -0,0 +1,81 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ hal
+ HDRS
+ "api.h"
+ SRCS
+ "allocator.c"
+ "allocator.h"
+ "allocator_heap.c"
+ "buffer.c"
+ "buffer.h"
+ "buffer_heap.c"
+ "buffer_heap_impl.h"
+ "buffer_view.c"
+ "buffer_view.h"
+ "buffer_view_util.c"
+ "buffer_view_util.h"
+ "command_buffer.c"
+ "command_buffer.h"
+ "command_buffer_validation.c"
+ "command_buffer_validation.h"
+ "descriptor_set.c"
+ "descriptor_set.h"
+ "descriptor_set_layout.c"
+ "descriptor_set_layout.h"
+ "detail.h"
+ "device.c"
+ "device.h"
+ "driver.c"
+ "driver.h"
+ "driver_registry.c"
+ "driver_registry.h"
+ "event.c"
+ "event.h"
+ "executable.c"
+ "executable.h"
+ "executable_cache.c"
+ "executable_cache.h"
+ "executable_layout.c"
+ "executable_layout.h"
+ "resource.h"
+ "semaphore.c"
+ "semaphore.h"
+ "string_util.c"
+ "string_util.h"
+ DEPS
+ iree::base
+ iree::base::core_headers
+ iree::base::internal
+ iree::base::internal::synchronization
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ string_util_test
+ SRCS
+ "string_util_test.cc"
+ DEPS
+ ::hal
+ iree::base
+ iree::base::cc
+ iree::base::internal::span
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/hal/README.md b/runtime/src/iree/hal/README.md
new file mode 100644
index 0000000..f50befa
--- /dev/null
+++ b/runtime/src/iree/hal/README.md
@@ -0,0 +1,18 @@
+# IREE Hardware Abstraction Layer (HAL)
+
+The IREE HAL expresses a low-level abstraction over modern compute APIs like
+Vulkan (CPUs count too!). Each implementation of the HAL interface can:
+
+* Enumerate and query devices and their capabilities
+* Define executable code that runs on the device
+* Allocate unified or discrete memory and provide cache control
+* Organize work into sequences for deferred submission
+* Provide explicit synchronization primitives for ordering submissions
+
+Refer to IREE's
+[presentations and talks](../../README.md#presentations-and-talks) for further
+details.
+
+## Testing
+
+See the [cts/ folder](./cts/) for the HAL Conformance Test Suite.
diff --git a/runtime/src/iree/hal/allocator.c b/runtime/src/iree/hal/allocator.c
new file mode 100644
index 0000000..daa418b
--- /dev/null
+++ b/runtime/src/iree/hal/allocator.c
@@ -0,0 +1,176 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/allocator.h"
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/detail.h"
+#include "iree/hal/resource.h"
+
+IREE_API_EXPORT iree_status_t iree_hal_allocator_statistics_format(
+ const iree_hal_allocator_statistics_t* statistics,
+ iree_string_builder_t* builder) {
+#if IREE_STATISTICS_ENABLE
+
+ // This could be prettier/have nice number formatting/etc.
+
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ builder,
+ " HOST_LOCAL: %12" PRIdsz "B peak / %12" PRIdsz
+ "B allocated / %12" PRIdsz "B freed / %12" PRIdsz "B live\n",
+ statistics->host_bytes_peak, statistics->host_bytes_allocated,
+ statistics->host_bytes_freed,
+ (statistics->host_bytes_allocated - statistics->host_bytes_freed)));
+
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ builder,
+ "DEVICE_LOCAL: %12" PRIdsz "B peak / %12" PRIdsz
+ "B allocated / %12" PRIdsz "B freed / %12" PRIdsz "B live\n",
+ statistics->device_bytes_peak, statistics->device_bytes_allocated,
+ statistics->device_bytes_freed,
+ (statistics->device_bytes_allocated - statistics->device_bytes_freed)));
+
+#else
+ // No-op when disabled.
+#endif // IREE_STATISTICS_ENABLE
+ return iree_ok_status();
+}
+
+#define _VTABLE_DISPATCH(allocator, method_name) \
+ IREE_HAL_VTABLE_DISPATCH(allocator, iree_hal_allocator, method_name)
+
+IREE_HAL_API_RETAIN_RELEASE(allocator);
+
+IREE_API_EXPORT iree_allocator_t iree_hal_allocator_host_allocator(
+ const iree_hal_allocator_t* IREE_RESTRICT allocator) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ return _VTABLE_DISPATCH(allocator, host_allocator)(allocator);
+}
+
+IREE_API_EXPORT
+iree_status_t iree_hal_allocator_trim(
+ iree_hal_allocator_t* IREE_RESTRICT allocator) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = _VTABLE_DISPATCH(allocator, trim)(allocator);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT void iree_hal_allocator_query_statistics(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_allocator_statistics_t* IREE_RESTRICT out_statistics) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ memset(out_statistics, 0, sizeof(*out_statistics));
+ IREE_STATISTICS({
+ _VTABLE_DISPATCH(allocator, query_statistics)(allocator, out_statistics);
+ });
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_allocator_statistics_fprint(
+ FILE* file, iree_hal_allocator_t* IREE_RESTRICT allocator) {
+#if IREE_STATISTICS_ENABLE
+ iree_hal_allocator_statistics_t statistics;
+ iree_hal_allocator_query_statistics(allocator, &statistics);
+
+ iree_string_builder_t builder;
+ iree_string_builder_initialize(iree_hal_allocator_host_allocator(allocator),
+ &builder);
+
+ // TODO(benvanik): query identifier for the allocator so we can denote which
+ // device is being reported.
+ iree_status_t status = iree_string_builder_append_cstring(
+ &builder, "[[ iree_hal_allocator_t memory statistics ]]\n");
+
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_allocator_statistics_format(&statistics, &builder);
+ }
+
+ if (iree_status_is_ok(status)) {
+ fprintf(file, "%.*s", (int)iree_string_builder_size(&builder),
+ iree_string_builder_buffer(&builder));
+ }
+
+ iree_string_builder_deinitialize(&builder);
+ return status;
+#else
+ // No-op.
+ return iree_ok_status();
+#endif // IREE_STATISTICS_ENABLE
+}
+
+IREE_API_EXPORT iree_hal_buffer_compatibility_t
+iree_hal_allocator_query_compatibility(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_buffer_params_t params, iree_device_size_t allocation_size) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ iree_hal_buffer_params_canonicalize(¶ms);
+ return _VTABLE_DISPATCH(allocator, query_compatibility)(allocator, ¶ms,
+ allocation_size);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_allocator_allocate_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_buffer_params_t params, iree_device_size_t allocation_size,
+ iree_const_byte_span_t initial_data,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ IREE_ASSERT_ARGUMENT(out_buffer);
+ *out_buffer = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_hal_buffer_params_canonicalize(¶ms);
+ iree_status_t status = _VTABLE_DISPATCH(allocator, allocate_buffer)(
+ allocator, ¶ms, allocation_size, initial_data, out_buffer);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT void iree_hal_allocator_deallocate_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT allocator, iree_hal_buffer_t* buffer) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ IREE_ASSERT_ARGUMENT(buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ _VTABLE_DISPATCH(allocator, deallocate_buffer)(allocator, buffer);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_allocator_import_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_buffer_params_t params,
+ iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+ iree_hal_buffer_release_callback_t release_callback,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ IREE_ASSERT_ARGUMENT(external_buffer);
+ IREE_ASSERT_ARGUMENT(out_buffer);
+ *out_buffer = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_hal_buffer_params_canonicalize(¶ms);
+ iree_status_t status = _VTABLE_DISPATCH(allocator, import_buffer)(
+ allocator, ¶ms, external_buffer, release_callback, out_buffer);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_allocator_export_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_buffer_t* IREE_RESTRICT buffer,
+ iree_hal_external_buffer_type_t requested_type,
+ iree_hal_external_buffer_flags_t requested_flags,
+ iree_hal_external_buffer_t* IREE_RESTRICT out_external_buffer) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ IREE_ASSERT_ARGUMENT(buffer);
+ IREE_ASSERT_ARGUMENT(out_external_buffer);
+ memset(out_external_buffer, 0, sizeof(*out_external_buffer));
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = _VTABLE_DISPATCH(allocator, export_buffer)(
+ allocator, buffer, requested_type, requested_flags, out_external_buffer);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/allocator.h b/runtime/src/iree/hal/allocator.h
new file mode 100644
index 0000000..92c11df
--- /dev/null
+++ b/runtime/src/iree/hal/allocator.h
@@ -0,0 +1,538 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_ALLOCATOR_H_
+#define IREE_HAL_ALLOCATOR_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/buffer.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// Types and Enums
+//===----------------------------------------------------------------------===//
+
+// A bitmap indicating logical device queue affinity.
+// Used to direct submissions to specific device queues or locate memory nearby
+// where it will be used. The meaning of the bits in the bitmap is
+// implementation-specific: a bit may represent a logical queue in an underlying
+// API such as a VkQueue or a physical queue such as a discrete virtual device.
+//
+// Bitwise operations can be performed on affinities; for example AND'ing two
+// affinities will produce the intersection and OR'ing will produce the union.
+// This enables just-in-time selection as a command buffer could be made
+// available to some set of queues when recorded and then AND'ed with an actual
+// set of queues to execute on during submission.
+typedef uint64_t iree_hal_queue_affinity_t;
+
+// Specifies that any queue may be selected.
+#define IREE_HAL_QUEUE_AFFINITY_ANY ((iree_hal_queue_affinity_t)(-1))
+
+// Parameters defining how a buffer should be allocated.
+//
+// Designed to be zero-initialized: any field with a 0 value will be assigned
+// a default as indicated in the field description.
+//
+// For ergonomics when used from C++ w/o named initializers the first field is
+// the most commonly used so that it can be initialized by location:
+// some_fn(..., {IREE_HAL_BUFFER_USAGE_FOO}, ...)
+typedef struct iree_hal_buffer_params_t {
+ // Specifies the usage allowed by HAL APIs and aids in memory placement.
+ // Devices may have different memory types for different usage and require
+ // the intended usage to be declared upon allocation. It's always best to
+ // limit the allowed usage bits to precisely what the actual usage will be to
+ // avoid additional copies, synchronization, and expensive emulation.
+ //
+ // If 0 then the usage will default to all usage modes.
+ iree_hal_buffer_usage_t usage;
+
+ // Specifies the access allowed to the memory via the HAL APIs.
+ // For example, if the IREE_HAL_MEMORY_ACCESS_WRITE bit is not set then any
+ // API call that would write to the memory will fail (such as
+ // iree_hal_command_buffer_update_buffer). This does not limit any untrusted
+ // dispatch or external use of the buffer and should not be treated as a
+ // memory protection mechanism.
+ //
+ // If 0 then the access will be set as IREE_HAL_MEMORY_ACCESS_ALL.
+ iree_hal_memory_access_t access;
+
+ // Specifies the memory type properties used for selecting a memory space.
+ // This should often be IREE_HAL_MEMORY_TYPE_OPTIMAL to allow the allocator
+ // to place the allocation based on usage bits but can be specified if the
+ // exact memory type must be used for compatibility with external code.
+ //
+ // If 0 then the type will be set as IREE_HAL_MEMORY_TYPE_OPTIMAL.
+ iree_hal_memory_type_t type;
+
+ // Queue affinity bitmap indicating which queues may access this buffer.
+ // For NUMA devices this can be used to more tightly scope the allocation to
+ // particular device memory and provide better pool placement. When a device
+ // supports peering or replication the affinity bitmap will be used to choose
+ // which subdevices require configuration.
+ //
+ // If 0 then the buffer will be available on any queue as if
+ // IREE_HAL_QUEUE_AFFINITY_ANY was specified.
+ iree_hal_queue_affinity_t queue_affinity;
+
+ // Minimum alignment, in bytes, of the resulting allocation.
+ // The actual alignment may be any value greater-than-or-equal-to this value.
+ //
+ // If 0 then the alignment will be decided by the allocator based on optimal
+ // device parameters.
+ iree_device_size_t min_alignment;
+} iree_hal_buffer_params_t;
+
+// Canonicalizes |params| fields when zero initialization is used.
+static inline void iree_hal_buffer_params_canonicalize(
+ iree_hal_buffer_params_t* params) {
+ if (!params->usage) {
+ params->usage =
+ IREE_HAL_BUFFER_USAGE_DISPATCH | IREE_HAL_BUFFER_USAGE_TRANSFER;
+ }
+ if (!params->access) {
+ params->access = IREE_HAL_MEMORY_ACCESS_ALL;
+ }
+ if (!params->queue_affinity) {
+ params->queue_affinity = IREE_HAL_QUEUE_AFFINITY_ANY;
+ }
+}
+
+// Returns |params| with the given |usage| bits OR'ed in.
+static inline iree_hal_buffer_params_t iree_hal_buffer_params_with_usage(
+ const iree_hal_buffer_params_t params, iree_hal_buffer_usage_t usage) {
+ iree_hal_buffer_params_t result = params;
+ if (!result.usage) {
+ result.usage =
+ IREE_HAL_BUFFER_USAGE_DISPATCH | IREE_HAL_BUFFER_USAGE_TRANSFER;
+ }
+ result.usage |= usage;
+ return result;
+}
+
+// A bitfield indicating compatible behavior for buffers in an allocator.
+enum iree_hal_buffer_compatibility_bits_t {
+ // Indicates (in the absence of other bits) the buffer is not compatible with
+ // the allocator or device at all. Any attempts to use the buffer for any
+ // usage will fail. This will happen if the buffer is device-local to another
+ // device without peering and not visible to the host.
+ IREE_HAL_BUFFER_COMPATIBILITY_NONE = 0u,
+
+ // Indicates that the allocator could allocate new buffers of this type and
+ // usage natively. Allocations with the queried parameters may still fail due
+ // to runtime conditions (out of memory, fragmentation, etc) but are otherwise
+ // valid.
+ IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE = 1u << 0,
+
+ // Indicates that the allocator could import external buffers of this type and
+ // usage natively. Imports may fail due to runtime conditions (out of handles,
+ // invalid pointer address spaces/page parameters, etc) but are otherwise
+ // valid.
+ IREE_HAL_BUFFER_COMPATIBILITY_IMPORTABLE = 1u << 1,
+
+ // Indicates that the allocator could export external buffers of this type and
+ // usage natively. Exports may fail due to runtime conditions (out of handles,
+ // etc) but are otherwise valid.
+ IREE_HAL_BUFFER_COMPATIBILITY_EXPORTABLE = 1u << 2,
+
+ // Indicates that the buffer can be used as a transfer source or target on the
+ // a device queue (such as being the source or target of a DMA operation,
+ // etc). If not set then the buffer may still be usable for
+ // iree_hal_buffer_map_copy but not with queued operations.
+ IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER = 1u << 10,
+
+ // Indicates that the buffer can be used as an input/output to a dispatch.
+ IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_DISPATCH = 1u << 11,
+};
+typedef uint32_t iree_hal_buffer_compatibility_t;
+
+// Defines the type of an external buffer handle.
+// Each type may only be usable in a subset of implementations and platforms and
+// may even vary based on the runtime device properties or buffer instance.
+//
+// See the notes on each type for requirements; compatibility often requires
+// the handle to check and trying to import/export is the most reliable way to
+// check for support.
+//
+// The Vulkan documentation on external memory covers a lot of the design
+// decisions made here:
+// https://www.khronos.org/registry/vulkan/specs/1.3-extensions/man/html/VK_KHR_external_memory.html
+typedef enum iree_hal_external_buffer_type_e {
+ IREE_HAL_EXTERNAL_BUFFER_TYPE_NONE = 0,
+
+ // A host pointer allocated from an external allocator.
+ // An imported/exported buffer does not own a reference to the memory and the
+ // caller is responsible for ensuring the memory remains live for as long as
+ // the iree_hal_buffer_t referencing it.
+ //
+ // CUDA:
+ // Requires device support.
+ // Uses cuMemHostRegister / cuMemHostUnregister.
+ // The memory type specified on import/export determines the required device
+ // capabilities.
+ //
+ // Vulkan:
+ // Requires VK_EXT_external_memory_host.
+ // Requires device support.
+ // Uses VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT.
+ IREE_HAL_EXTERNAL_BUFFER_TYPE_HOST_ALLOCATION = 1,
+
+ // A driver/device-specific POSIX file descriptor handle.
+ // The handle supports dup, dup2, close, and transport using the SCM_RIGHTS
+ // control message. All other usage with system APIs is undefined.
+ // An imported/exported handle owns a reference to the underlying allocator
+ // memory. May only be shared with the same underlying driver and device
+ //
+ // CUDA:
+ // Requires device support.
+ // Uses CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD.
+ //
+ // Vulkan:
+ // Requires device support.
+ // Uses VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT.
+ IREE_HAL_EXTERNAL_BUFFER_TYPE_OPAQUE_FD = 2,
+
+ // A driver/device-specific Win32 HANDLE.
+ // The handle supports DuplicateHandle, CompareObjectHandles, CloseHandle, and
+ // Get/SetHandleInformation. All other usage with system APIs is undefined.
+ // An imported/exported handle owns a reference to the underlying allocator
+ // memory. Must only be shared with the same underlying driver and device.
+ //
+ // CUDA:
+ // Requires device support.
+ // Uses CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32.
+ //
+ // Vulkan:
+ // Requires device support.
+ // Uses VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT.
+ IREE_HAL_EXTERNAL_BUFFER_TYPE_OPAQUE_WIN32 = 3,
+
+ // TODO(benvanik): additional memory types:
+ // shared memory fd (shmem)/mapped file
+ // VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT
+ // VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID
+} iree_hal_external_buffer_type_t;
+
+// Flags for controlling iree_hal_external_buffer_t implementation details.
+enum iree_hal_external_buffer_flag_bits_t {
+ IREE_HAL_EXTERNAL_BUFFER_FLAG_NONE = 0u,
+};
+typedef uint32_t iree_hal_external_buffer_flags_t;
+
+// Handle to a typed external buffer.
+// This is a non-owning reference and the underlying allocation must remain
+// valid for as long as the handle is in use. Some buffer types support internal
+// referencing counting but in general ownership remains with the caller.
+// See the type enum for more information.
+typedef struct iree_hal_external_buffer_t {
+ // Type of the resource used to interpret the handle.
+ iree_hal_external_buffer_type_t type;
+ // Flags indicating buffer compatibility.
+ iree_hal_external_buffer_flags_t flags;
+ // Total size of the external resource in bytes.
+ iree_device_size_t size;
+ union {
+ // IREE_HAL_EXTERNAL_BUFFER_TYPE_HOST_ALLOCATION
+ struct {
+ // Host memory pointer.
+ void* ptr;
+ } host_allocation;
+ // IREE_HAL_EXTERNAL_BUFFER_TYPE_OPAQUE_FD
+ struct {
+ int fd;
+ } opaque_fd;
+ // IREE_HAL_EXTERNAL_BUFFER_TYPE_OPAQUE_WIN32
+ struct {
+ void* handle;
+ } opaque_win32;
+ } handle;
+} iree_hal_external_buffer_t;
+
+typedef void(IREE_API_PTR* iree_hal_buffer_release_fn_t)(
+ void* user_data, iree_hal_buffer_t* buffer);
+
+// A callback issued when a buffer is released.
+typedef struct {
+ // Callback function pointer.
+ iree_hal_buffer_release_fn_t fn;
+ // User data passed to the callback function. Unowned.
+ void* user_data;
+} iree_hal_buffer_release_callback_t;
+
+// Returns a no-op buffer release callback that implies that no cleanup is
+// required.
+static inline iree_hal_buffer_release_callback_t
+iree_hal_buffer_release_callback_null(void) {
+ iree_hal_buffer_release_callback_t callback = {NULL, NULL};
+ return callback;
+}
+
+//===----------------------------------------------------------------------===//
+// Statistics/reporting
+//===----------------------------------------------------------------------===//
+
+// Aggregate allocation statistics.
+typedef struct iree_hal_allocator_statistics_t {
+#if IREE_STATISTICS_ENABLE
+ iree_device_size_t host_bytes_peak;
+ iree_device_size_t host_bytes_allocated;
+ iree_device_size_t host_bytes_freed;
+ iree_device_size_t device_bytes_peak;
+ iree_device_size_t device_bytes_allocated;
+ iree_device_size_t device_bytes_freed;
+ // TODO(benvanik): mapping information (discarded, mapping ranges,
+ // flushed/invalidated, etc).
+#else
+ int reserved;
+#endif // IREE_STATISTICS_ENABLE
+} iree_hal_allocator_statistics_t;
+
+// Formats allocator statistics as a pretty-printed multi-line string.
+IREE_API_EXPORT iree_status_t iree_hal_allocator_statistics_format(
+ const iree_hal_allocator_statistics_t* statistics,
+ iree_string_builder_t* builder);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_allocator_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_allocator_t iree_hal_allocator_t;
+
+// Retains the given |allocator| for the caller.
+IREE_API_EXPORT void iree_hal_allocator_retain(iree_hal_allocator_t* allocator);
+
+// Releases the given |allocator| from the caller.
+IREE_API_EXPORT void iree_hal_allocator_release(
+ iree_hal_allocator_t* allocator);
+
+// Returns the host allocator used for allocating host objects.
+IREE_API_EXPORT iree_allocator_t iree_hal_allocator_host_allocator(
+ const iree_hal_allocator_t* IREE_RESTRICT allocator);
+
+// Trims cached/unused pooled buffers, if any.
+IREE_API_EXPORT
+iree_status_t iree_hal_allocator_trim(
+ iree_hal_allocator_t* IREE_RESTRICT allocator);
+
+// Queries the aggregate statistics from the allocator since creation.
+// Thread-safe; statistics are captured at the time the call is made.
+//
+// NOTE: statistics may be compiled out in some configurations and this call
+// will become a memset(0).
+IREE_API_EXPORT void iree_hal_allocator_query_statistics(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_allocator_statistics_t* IREE_RESTRICT out_statistics);
+
+// Prints the current allocation statistics of |allocator| to |file|.
+// No-op if statistics are not enabled (IREE_STATISTICS_ENABLE).
+IREE_API_EXPORT iree_status_t iree_hal_allocator_statistics_fprint(
+ FILE* file, iree_hal_allocator_t* IREE_RESTRICT allocator);
+
+// Returns a bitmask indicating what operations with buffers of the given type
+// are available on the allocator.
+//
+// For buffers allocated from the given allocator it's expected that the result
+// will always be non-NONE. For buffers that originate from another allocator
+// there may be limited support for cross-device usage.
+//
+// Returning IREE_HAL_BUFFER_COMPATIBILITY_NONE indicates that the buffer must
+// be transferred externally into a buffer compatible with the device the
+// allocator services.
+IREE_API_EXPORT iree_hal_buffer_compatibility_t
+iree_hal_allocator_query_compatibility(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_buffer_params_t params, iree_device_size_t allocation_size);
+
+// Allocates a buffer from the allocator.
+// If |initial_data| is provided then the bytes will be copied into the device
+// buffer. To avoid the copy when device-accessible constant data is used prefer
+// iree_hal_allocator_import_buffer when available.
+//
+// The memory type of the buffer returned may differ from the requested value
+// if the device can provide more functionality; for example, if requesting
+// IREE_HAL_MEMORY_TYPE_HOST_VISIBLE but the memory is really host cached you
+// may get a buffer back with IREE_HAL_MEMORY_TYPE_HOST_VISIBLE |
+// IREE_HAL_MEMORY_TYPE_HOST_CACHED. The only requirement is that the buffer
+// satisfy the required bits.
+//
+// |out_buffer| must be released by the caller.
+// Fails if the memory type requested for the given usage cannot be serviced.
+// Callers can use iree_hal_allocator_query_compatibility to decide their memory
+// use strategy.
+IREE_API_EXPORT iree_status_t iree_hal_allocator_allocate_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_buffer_params_t params, iree_device_size_t allocation_size,
+ iree_const_byte_span_t initial_data, iree_hal_buffer_t** out_buffer);
+
+// TODO(benvanik): iree_hal_allocator_query_external_buffer_compatibility to
+// check for support without needing an external buffer already. There's a few
+// usage modes and it'd be nice to have a single function for it to keep the
+// interface slimmer.
+
+// Imports an externally-owned |external_buffer| to a buffer handle.
+// See notes on iree_hal_external_buffer_type_t for ownership information;
+// depending on the type the caller may be responsible for ensuring the external
+// buffer remains valid for the duration it is in use by the returned
+// iree_hal_buffer_t. The returned external buffer may only be usable with the
+// same driver/device.
+//
+// iree_hal_allocator_query_compatibility can be used to query whether a
+// buffer can be imported when using the given memory type and usage. A
+// compatibility result containing IREE_HAL_BUFFER_COMPATIBILITY_IMPORTABLE
+// means the import _may_ succeed however if the pointer/page range is not in a
+// supported mode (no read access, etc) this call will fail with
+// IREE_STATUS_OUT_OF_RANGE.
+//
+// An optional |release_callback| can be provided to allow the caller to listen
+// for when the underlying resource is no longer in use by the HAL. This can
+// be used to perform lifetime management or flushing.
+//
+// |out_buffer| must be released by the caller.
+// Fails with IREE_STATUS_UNAVAILABLE if the allocator cannot import the buffer
+// into the given memory type. This may be due to unavailable device/platform
+// capabilities or the memory type the external buffer was allocated with.
+IREE_API_EXPORT iree_status_t iree_hal_allocator_import_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_buffer_params_t params,
+ iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+ iree_hal_buffer_release_callback_t release_callback,
+ iree_hal_buffer_t** out_buffer);
+
+// Exports an allocator-owned |buffer| to an external buffer handle.
+// See the notes on iree_hal_external_buffer_type_t for ownership information.
+// Upon successful return the caller is responsible for any required lifetime
+// management on the external buffer which may include ensuring that the
+// provided source |buffer| is kept live. The returned external buffer may only
+// be usable with the same driver/device.
+//
+// Fails with IREE_STATUS_UNAVAILABLE if the allocator cannot export the buffer
+// into the external type. This may be due to unavailable device/platform
+// capabilities or the memory type the buffer was allocated with.
+IREE_API_EXPORT iree_status_t iree_hal_allocator_export_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_buffer_t* IREE_RESTRICT buffer,
+ iree_hal_external_buffer_type_t requested_type,
+ iree_hal_external_buffer_flags_t requested_flags,
+ iree_hal_external_buffer_t* IREE_RESTRICT out_external_buffer);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_heap_allocator_t
+//===----------------------------------------------------------------------===//
+
+// Creates a host-local heap allocator that can be used when buffers are
+// required that will not interact with a real hardware device (such as those
+// used in file IO or tests). Buffers allocated with this will not be compatible
+// with real device allocators and will likely incur a copy (or failure) if
+// used.
+//
+// The buffers created from the allocator will use |host_allocator| for their
+// metadata and |data_allocator| for their device storage allocations. If the
+// two are the same the buffers will be allocated in a single flat slab.
+IREE_API_EXPORT iree_status_t iree_hal_allocator_create_heap(
+ iree_string_view_t identifier, iree_allocator_t data_allocator,
+ iree_allocator_t host_allocator, iree_hal_allocator_t** out_allocator);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_allocator_t implementation details
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_allocator_vtable_t {
+ void(IREE_API_PTR* destroy)(iree_hal_allocator_t* IREE_RESTRICT allocator);
+
+ iree_allocator_t(IREE_API_PTR* host_allocator)(
+ const iree_hal_allocator_t* IREE_RESTRICT allocator);
+
+ iree_status_t(IREE_API_PTR* trim)(
+ iree_hal_allocator_t* IREE_RESTRICT allocator);
+
+ void(IREE_API_PTR* query_statistics)(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_allocator_statistics_t* IREE_RESTRICT out_statistics);
+
+ iree_hal_buffer_compatibility_t(IREE_API_PTR* query_compatibility)(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_device_size_t allocation_size);
+
+ iree_status_t(IREE_API_PTR* allocate_buffer)(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_device_size_t allocation_size, iree_const_byte_span_t initial_data,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer);
+
+ void(IREE_API_PTR* deallocate_buffer)(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_buffer_t* IREE_RESTRICT buffer);
+
+ iree_status_t(IREE_API_PTR* import_buffer)(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+ iree_hal_buffer_release_callback_t release_callback,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer);
+
+ iree_status_t(IREE_API_PTR* export_buffer)(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_buffer_t* IREE_RESTRICT buffer,
+ iree_hal_external_buffer_type_t requested_type,
+ iree_hal_external_buffer_flags_t requested_flags,
+ iree_hal_external_buffer_t* IREE_RESTRICT out_external_buffer);
+} iree_hal_allocator_vtable_t;
+IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_allocator_vtable_t);
+
+IREE_API_EXPORT void iree_hal_allocator_destroy(
+ iree_hal_allocator_t* IREE_RESTRICT allocator);
+
+IREE_API_EXPORT void iree_hal_allocator_deallocate_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT allocator,
+ iree_hal_buffer_t* IREE_RESTRICT buffer);
+
+#if IREE_STATISTICS_ENABLE
+
+// Records a buffer allocation to |statistics|.
+static inline void iree_hal_allocator_statistics_record_alloc(
+ iree_hal_allocator_statistics_t* statistics,
+ iree_hal_memory_type_t memory_type, iree_device_size_t allocation_size) {
+ if (iree_all_bits_set(memory_type, IREE_HAL_MEMORY_TYPE_HOST_LOCAL)) {
+ statistics->host_bytes_allocated += allocation_size;
+ statistics->host_bytes_peak =
+ iree_max(statistics->host_bytes_peak, statistics->host_bytes_allocated -
+ statistics->host_bytes_freed);
+ } else {
+ statistics->device_bytes_allocated += allocation_size;
+ statistics->device_bytes_peak = iree_max(
+ statistics->device_bytes_peak,
+ statistics->device_bytes_allocated - statistics->device_bytes_freed);
+ }
+}
+
+// Records a buffer deallocation to |statistics|.
+static inline void iree_hal_allocator_statistics_record_free(
+ iree_hal_allocator_statistics_t* statistics,
+ iree_hal_memory_type_t memory_type, iree_device_size_t allocation_size) {
+ if (iree_all_bits_set(memory_type, IREE_HAL_MEMORY_TYPE_HOST_LOCAL)) {
+ statistics->host_bytes_freed += allocation_size;
+ } else {
+ statistics->device_bytes_freed += allocation_size;
+ }
+}
+
+#else
+#define iree_hal_allocator_statistics_record_alloc(...)
+#define iree_hal_allocator_statistics_record_free(...)
+#endif // IREE_STATISTICS_ENABLE
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_ALLOCATOR_H_
diff --git a/runtime/src/iree/hal/allocator_heap.c b/runtime/src/iree/hal/allocator_heap.c
new file mode 100644
index 0000000..7b53c27
--- /dev/null
+++ b/runtime/src/iree/hal/allocator_heap.c
@@ -0,0 +1,238 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/allocator.h"
+#include "iree/hal/buffer.h"
+#include "iree/hal/buffer_heap_impl.h"
+#include "iree/hal/resource.h"
+
+typedef struct iree_hal_heap_allocator_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+ iree_allocator_t data_allocator;
+ iree_string_view_t identifier;
+ IREE_STATISTICS(iree_hal_heap_allocator_statistics_t statistics;)
+} iree_hal_heap_allocator_t;
+
+static const iree_hal_allocator_vtable_t iree_hal_heap_allocator_vtable;
+
+iree_hal_heap_allocator_t* iree_hal_heap_allocator_cast(
+ iree_hal_allocator_t* IREE_RESTRICT base_value) {
+ return (iree_hal_heap_allocator_t*)base_value;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_allocator_create_heap(
+ iree_string_view_t identifier, iree_allocator_t data_allocator,
+ iree_allocator_t host_allocator, iree_hal_allocator_t** out_allocator) {
+ IREE_ASSERT_ARGUMENT(out_allocator);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ *out_allocator = NULL;
+
+ iree_hal_heap_allocator_t* allocator = NULL;
+ iree_host_size_t total_size =
+ iree_sizeof_struct(*allocator) + identifier.size;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&allocator);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_heap_allocator_vtable,
+ &allocator->resource);
+ allocator->host_allocator = host_allocator;
+ allocator->data_allocator = data_allocator;
+ iree_string_view_append_to_buffer(
+ identifier, &allocator->identifier,
+ (char*)allocator + iree_sizeof_struct(*allocator));
+
+ IREE_STATISTICS({
+ // All start initialized to zero.
+ iree_slim_mutex_initialize(&allocator->statistics.mutex);
+ });
+
+ *out_allocator = (iree_hal_allocator_t*)allocator;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_heap_allocator_destroy(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
+ iree_hal_heap_allocator_t* allocator =
+ iree_hal_heap_allocator_cast(base_allocator);
+ iree_allocator_t host_allocator = allocator->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ IREE_STATISTICS(iree_slim_mutex_deinitialize(&allocator->statistics.mutex));
+
+ iree_allocator_free(host_allocator, allocator);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_allocator_t iree_hal_heap_allocator_host_allocator(
+ const iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
+ iree_hal_heap_allocator_t* allocator =
+ (iree_hal_heap_allocator_t*)base_allocator;
+ return allocator->host_allocator;
+}
+
+static iree_status_t iree_hal_heap_allocator_trim(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
+ return iree_ok_status();
+}
+
+static void iree_hal_heap_allocator_query_statistics(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_hal_allocator_statistics_t* IREE_RESTRICT out_statistics) {
+ IREE_STATISTICS({
+ iree_hal_heap_allocator_t* allocator =
+ iree_hal_heap_allocator_cast(base_allocator);
+ iree_slim_mutex_lock(&allocator->statistics.mutex);
+ memcpy(out_statistics, &allocator->statistics.base,
+ sizeof(*out_statistics));
+ iree_slim_mutex_unlock(&allocator->statistics.mutex);
+ });
+}
+
+static iree_hal_buffer_compatibility_t
+iree_hal_heap_allocator_query_compatibility(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_device_size_t allocation_size) {
+ // All buffers can be allocated on the heap and all heap-accessible buffers
+ // can be imported/exported.
+ iree_hal_buffer_compatibility_t compatibility =
+ IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE |
+ IREE_HAL_BUFFER_COMPATIBILITY_IMPORTABLE |
+ IREE_HAL_BUFFER_COMPATIBILITY_EXPORTABLE;
+
+ // Buffers can only be used on the queue if they are device visible.
+ // This is not a strict requirement of heap buffers but matches devices that
+ // have discrete memory spaces (remoting/sandboxed, GPUs, etc) and makes it
+ // much easier to find issues of buffer definition with local devices that
+ // will cause issues when used with real devices.
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) {
+ if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER)) {
+ compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER;
+ }
+ if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_DISPATCH)) {
+ compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_DISPATCH;
+ }
+ }
+
+ return compatibility;
+}
+
+static iree_hal_buffer_params_t iree_hal_heap_allocator_make_compatible(
+ const iree_hal_buffer_params_t* IREE_RESTRICT params) {
+ iree_hal_buffer_params_t result = *params;
+
+ // Always ensure we are host-visible.
+ result.type |= IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+
+ // Host currently uses mapping to copy buffers, which is done a lot.
+ // We could probably remove this mutation by preventing copies in those cases.
+ // TODO(benvanik): check if transfer is still required for DMA copy source.
+ result.usage |=
+ IREE_HAL_BUFFER_USAGE_MAPPING | IREE_HAL_BUFFER_USAGE_TRANSFER;
+
+ return result;
+}
+
+static iree_status_t iree_hal_heap_allocator_allocate_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_device_size_t allocation_size, iree_const_byte_span_t initial_data,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+ iree_hal_heap_allocator_t* allocator =
+ iree_hal_heap_allocator_cast(base_allocator);
+
+ // Coerce options into those required for use by heap-based devices.
+ iree_hal_buffer_params_t compat_params =
+ iree_hal_heap_allocator_make_compatible(params);
+
+ // Allocate the buffer (both the wrapper and the contents).
+ iree_hal_heap_allocator_statistics_t* statistics = NULL;
+ IREE_STATISTICS(statistics = &allocator->statistics);
+ iree_hal_buffer_t* buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_heap_buffer_create(
+ base_allocator, statistics, &compat_params, allocation_size, initial_data,
+ allocator->data_allocator, allocator->host_allocator, &buffer));
+
+ *out_buffer = buffer;
+ return iree_ok_status();
+}
+
+static void iree_hal_heap_allocator_deallocate_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_hal_buffer_t* IREE_RESTRICT base_buffer) {
+ // We don't do any pooling yet.
+ // TODO(benvanik): move stats tracking here.
+ iree_hal_buffer_destroy(base_buffer);
+}
+
+static iree_status_t iree_hal_heap_allocator_import_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+ iree_hal_buffer_release_callback_t release_callback,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+ if (external_buffer->type != IREE_HAL_EXTERNAL_BUFFER_TYPE_HOST_ALLOCATION) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "external buffer type not supported");
+ }
+
+ // Coerce options into those required for use by heap-based devices.
+ iree_hal_buffer_params_t compat_params =
+ iree_hal_heap_allocator_make_compatible(params);
+
+ return iree_hal_heap_buffer_wrap(
+ base_allocator, compat_params.type, compat_params.access,
+ compat_params.usage, external_buffer->size,
+ iree_make_byte_span(external_buffer->handle.host_allocation.ptr,
+ external_buffer->size),
+ release_callback, out_buffer);
+}
+
+static iree_status_t iree_hal_heap_allocator_export_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_hal_buffer_t* IREE_RESTRICT buffer,
+ iree_hal_external_buffer_type_t requested_type,
+ iree_hal_external_buffer_flags_t requested_flags,
+ iree_hal_external_buffer_t* IREE_RESTRICT out_external_buffer) {
+ if (requested_type != IREE_HAL_EXTERNAL_BUFFER_TYPE_HOST_ALLOCATION) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "external buffer type not supported");
+ }
+
+ // Map the entire buffer persistently, if possible.
+ iree_hal_buffer_mapping_t mapping;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range(
+ buffer, IREE_HAL_MAPPING_MODE_PERSISTENT,
+ iree_hal_buffer_allowed_access(buffer), 0, IREE_WHOLE_BUFFER, &mapping));
+
+ // Note that the returned pointer is unowned.
+ out_external_buffer->type = requested_type;
+ out_external_buffer->flags = requested_flags;
+ out_external_buffer->size = mapping.contents.data_length;
+ out_external_buffer->handle.host_allocation.ptr = mapping.contents.data;
+ return iree_ok_status();
+}
+
+static const iree_hal_allocator_vtable_t iree_hal_heap_allocator_vtable = {
+ .destroy = iree_hal_heap_allocator_destroy,
+ .host_allocator = iree_hal_heap_allocator_host_allocator,
+ .trim = iree_hal_heap_allocator_trim,
+ .query_statistics = iree_hal_heap_allocator_query_statistics,
+ .query_compatibility = iree_hal_heap_allocator_query_compatibility,
+ .allocate_buffer = iree_hal_heap_allocator_allocate_buffer,
+ .deallocate_buffer = iree_hal_heap_allocator_deallocate_buffer,
+ .import_buffer = iree_hal_heap_allocator_import_buffer,
+ .export_buffer = iree_hal_heap_allocator_export_buffer,
+};
diff --git a/runtime/src/iree/hal/api.h b/runtime/src/iree/hal/api.h
new file mode 100644
index 0000000..0ca7171
--- /dev/null
+++ b/runtime/src/iree/hal/api.h
@@ -0,0 +1,30 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// See iree/base/api.h for documentation on the API conventions used.
+
+#ifndef IREE_HAL_API_H_
+#define IREE_HAL_API_H_
+
+#include "iree/hal/allocator.h" // IWYU pragma: export
+#include "iree/hal/buffer.h" // IWYU pragma: export
+#include "iree/hal/buffer_view.h" // IWYU pragma: export
+#include "iree/hal/buffer_view_util.h" // IWYU pragma: export
+#include "iree/hal/command_buffer.h" // IWYU pragma: export
+#include "iree/hal/descriptor_set.h" // IWYU pragma: export
+#include "iree/hal/descriptor_set_layout.h" // IWYU pragma: export
+#include "iree/hal/device.h" // IWYU pragma: export
+#include "iree/hal/driver.h" // IWYU pragma: export
+#include "iree/hal/driver_registry.h" // IWYU pragma: export
+#include "iree/hal/event.h" // IWYU pragma: export
+#include "iree/hal/executable.h" // IWYU pragma: export
+#include "iree/hal/executable_cache.h" // IWYU pragma: export
+#include "iree/hal/executable_layout.h" // IWYU pragma: export
+#include "iree/hal/resource.h" // IWYU pragma: export
+#include "iree/hal/semaphore.h" // IWYU pragma: export
+#include "iree/hal/string_util.h" // IWYU pragma: export
+
+#endif // IREE_HAL_API_H_
diff --git a/runtime/src/iree/hal/buffer.c b/runtime/src/iree/hal/buffer.c
new file mode 100644
index 0000000..3af50a6
--- /dev/null
+++ b/runtime/src/iree/hal/buffer.c
@@ -0,0 +1,880 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/buffer.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/allocator.h"
+#include "iree/hal/detail.h"
+
+#define _VTABLE_DISPATCH(buffer, method_name) \
+ IREE_HAL_VTABLE_DISPATCH(buffer, iree_hal_buffer, method_name)
+
+//===----------------------------------------------------------------------===//
+// String utils
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_string_view_t iree_hal_memory_type_format(
+ iree_hal_memory_type_t value, iree_bitfield_string_temp_t* out_temp) {
+ static const iree_bitfield_string_mapping_t mappings[] = {
+ // Combined:
+ {IREE_HAL_MEMORY_TYPE_HOST_LOCAL, IREE_SVL("HOST_LOCAL")},
+ {IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL, IREE_SVL("DEVICE_LOCAL")},
+ // Separate:
+ {IREE_HAL_MEMORY_TYPE_TRANSIENT, IREE_SVL("TRANSIENT")},
+ {IREE_HAL_MEMORY_TYPE_HOST_VISIBLE, IREE_SVL("HOST_VISIBLE")},
+ {IREE_HAL_MEMORY_TYPE_HOST_COHERENT, IREE_SVL("HOST_COHERENT")},
+ {IREE_HAL_MEMORY_TYPE_HOST_CACHED, IREE_SVL("HOST_CACHED")},
+ {IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE, IREE_SVL("DEVICE_VISIBLE")},
+ };
+ return iree_bitfield_format_inline(value, mappings, IREE_ARRAYSIZE(mappings),
+ out_temp);
+}
+
+IREE_API_EXPORT iree_string_view_t iree_hal_memory_access_format(
+ iree_hal_memory_access_t value, iree_bitfield_string_temp_t* out_temp) {
+ static const iree_bitfield_string_mapping_t mappings[] = {
+ // Combined:
+ {IREE_HAL_MEMORY_ACCESS_ALL, IREE_SVL("ALL")},
+ {IREE_HAL_MEMORY_ACCESS_DISCARD_WRITE, IREE_SVL("DISCARD_WRITE")},
+ // Separate:
+ {IREE_HAL_MEMORY_ACCESS_READ, IREE_SVL("READ")},
+ {IREE_HAL_MEMORY_ACCESS_WRITE, IREE_SVL("WRITE")},
+ {IREE_HAL_MEMORY_ACCESS_DISCARD, IREE_SVL("DISCARD")},
+ {IREE_HAL_MEMORY_ACCESS_MAY_ALIAS, IREE_SVL("MAY_ALIAS")},
+ {IREE_HAL_MEMORY_ACCESS_ANY, IREE_SVL("ANY")},
+ };
+ return iree_bitfield_format_inline(value, mappings, IREE_ARRAYSIZE(mappings),
+ out_temp);
+}
+
+IREE_API_EXPORT iree_string_view_t iree_hal_buffer_usage_format(
+ iree_hal_buffer_usage_t value, iree_bitfield_string_temp_t* out_temp) {
+ static const iree_bitfield_string_mapping_t mappings[] = {
+ // Combined:
+ // Separate:
+ {IREE_HAL_BUFFER_USAGE_CONSTANT, IREE_SVL("CONSTANT")},
+ {IREE_HAL_BUFFER_USAGE_TRANSFER, IREE_SVL("TRANSFER")},
+ {IREE_HAL_BUFFER_USAGE_MAPPING, IREE_SVL("MAPPING")},
+ {IREE_HAL_BUFFER_USAGE_DISPATCH, IREE_SVL("DISPATCH")},
+ };
+ return iree_bitfield_format_inline(value, mappings, IREE_ARRAYSIZE(mappings),
+ out_temp);
+}
+
+//===----------------------------------------------------------------------===//
+// Subspan indirection buffer
+//===----------------------------------------------------------------------===//
+
+static const iree_hal_buffer_vtable_t iree_hal_subspan_buffer_vtable;
+
+IREE_API_EXPORT void iree_hal_subspan_buffer_initialize(
+ iree_hal_buffer_t* allocated_buffer, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length, iree_hal_allocator_t* device_allocator,
+ iree_allocator_t host_allocator, iree_hal_buffer_t* out_buffer) {
+ IREE_ASSERT_ARGUMENT(allocated_buffer);
+ IREE_ASSERT_ARGUMENT(out_buffer);
+ iree_hal_buffer_initialize(host_allocator, device_allocator, allocated_buffer,
+ allocated_buffer->allocation_size, byte_offset,
+ byte_length, allocated_buffer->memory_type,
+ allocated_buffer->allowed_access,
+ allocated_buffer->allowed_usage,
+ &iree_hal_subspan_buffer_vtable, out_buffer);
+}
+
+IREE_API_EXPORT void iree_hal_subspan_buffer_deinitialize(
+ iree_hal_buffer_t* buffer) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ iree_hal_buffer_release(buffer->allocated_buffer);
+ buffer->allocated_buffer = NULL;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_subspan_buffer_create(
+ iree_hal_buffer_t* allocated_buffer, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length, iree_hal_allocator_t* device_allocator,
+ iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer) {
+ IREE_ASSERT_ARGUMENT(allocated_buffer);
+ IREE_ASSERT_ARGUMENT(out_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_buffer_t* buffer = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer);
+ if (iree_status_is_ok(status)) {
+ iree_hal_buffer_initialize(
+ host_allocator, device_allocator, allocated_buffer,
+ allocated_buffer->allocation_size, byte_offset, byte_length,
+ allocated_buffer->memory_type, allocated_buffer->allowed_access,
+ allocated_buffer->allowed_usage, &iree_hal_subspan_buffer_vtable,
+ buffer);
+ *out_buffer = buffer;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void iree_hal_subspan_buffer_destroy(iree_hal_buffer_t* base_buffer) {
+ iree_allocator_t host_allocator = base_buffer->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_buffer_release(base_buffer->allocated_buffer);
+ iree_allocator_free(host_allocator, base_buffer);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_subspan_buffer_map_range(
+ iree_hal_buffer_t* buffer, iree_hal_mapping_mode_t mapping_mode,
+ iree_hal_memory_access_t memory_access,
+ iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
+ iree_hal_buffer_mapping_t* mapping) {
+ return _VTABLE_DISPATCH(buffer->allocated_buffer, map_range)(
+ buffer->allocated_buffer, mapping_mode, memory_access, local_byte_offset,
+ local_byte_length, mapping);
+}
+
+static iree_status_t iree_hal_subspan_buffer_unmap_range(
+ iree_hal_buffer_t* buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length, iree_hal_buffer_mapping_t* mapping) {
+ if (!buffer->allocated_buffer) return iree_ok_status();
+ return _VTABLE_DISPATCH(buffer->allocated_buffer, unmap_range)(
+ buffer->allocated_buffer, local_byte_offset, local_byte_length, mapping);
+}
+
+static iree_status_t iree_hal_subspan_buffer_invalidate_range(
+ iree_hal_buffer_t* buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length) {
+ return _VTABLE_DISPATCH(buffer->allocated_buffer, invalidate_range)(
+ buffer->allocated_buffer, local_byte_offset, local_byte_length);
+}
+
+static iree_status_t iree_hal_subspan_buffer_flush_range(
+ iree_hal_buffer_t* buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length) {
+ return _VTABLE_DISPATCH(buffer->allocated_buffer, flush_range)(
+ buffer->allocated_buffer, local_byte_offset, local_byte_length);
+}
+
+static const iree_hal_buffer_vtable_t iree_hal_subspan_buffer_vtable = {
+ .recycle = iree_hal_buffer_recycle,
+ .destroy = iree_hal_subspan_buffer_destroy,
+ .map_range = iree_hal_subspan_buffer_map_range,
+ .unmap_range = iree_hal_subspan_buffer_unmap_range,
+ .invalidate_range = iree_hal_subspan_buffer_invalidate_range,
+ .flush_range = iree_hal_subspan_buffer_flush_range,
+};
+
+//===----------------------------------------------------------------------===//
+// iree_hal_buffer_t
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT void iree_hal_buffer_initialize(
+ iree_allocator_t host_allocator, iree_hal_allocator_t* device_allocator,
+ iree_hal_buffer_t* allocated_buffer, iree_device_size_t allocation_size,
+ iree_device_size_t byte_offset, iree_device_size_t byte_length,
+ iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access,
+ iree_hal_buffer_usage_t allowed_usage,
+ const iree_hal_buffer_vtable_t* vtable, iree_hal_buffer_t* buffer) {
+ iree_hal_resource_initialize(vtable, &buffer->resource);
+ buffer->host_allocator = host_allocator;
+ buffer->device_allocator = device_allocator;
+ buffer->allocated_buffer = allocated_buffer;
+ buffer->allocation_size = allocation_size;
+ buffer->byte_offset = byte_offset;
+ buffer->byte_length = byte_length;
+ buffer->memory_type = memory_type;
+ buffer->allowed_access = allowed_access;
+ buffer->allowed_usage = allowed_usage;
+
+ // Retain the base allocated buffer if it's unique from the buffer we are
+ // initializing.
+ if (allocated_buffer != buffer) {
+ iree_hal_buffer_retain(buffer->allocated_buffer);
+ }
+}
+
+IREE_API_EXPORT void iree_hal_buffer_recycle(iree_hal_buffer_t* buffer) {
+ if (IREE_LIKELY(buffer)) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ if (buffer->device_allocator) {
+ iree_hal_allocator_deallocate_buffer(buffer->device_allocator, buffer);
+ } else {
+ iree_hal_buffer_destroy(buffer);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ }
+}
+
+IREE_API_EXPORT void iree_hal_buffer_destroy(iree_hal_buffer_t* buffer) {
+ if (IREE_LIKELY(buffer)) {
+ IREE_HAL_VTABLE_DISPATCH(buffer, iree_hal_buffer, destroy)
+ (buffer);
+ }
+}
+
+IREE_API_EXPORT void iree_hal_buffer_retain(iree_hal_buffer_t* buffer) {
+ if (IREE_LIKELY(buffer)) {
+ iree_atomic_ref_count_inc(&((iree_hal_resource_t*)(buffer))->ref_count);
+ }
+}
+
+IREE_API_EXPORT void iree_hal_buffer_release(iree_hal_buffer_t* buffer) {
+ if (IREE_LIKELY(buffer) &&
+ iree_atomic_ref_count_dec(&((iree_hal_resource_t*)(buffer))->ref_count) ==
+ 1) {
+ iree_hal_buffer_recycle(buffer);
+ }
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_validate_memory_type(
+ iree_hal_memory_type_t actual_memory_type,
+ iree_hal_memory_type_t expected_memory_type) {
+ if (IREE_UNLIKELY(
+ !iree_all_bits_set(actual_memory_type, expected_memory_type))) {
+#if IREE_STATUS_MODE
+ // Missing one or more bits.
+ iree_bitfield_string_temp_t temp0, temp1;
+ iree_string_view_t actual_memory_type_str =
+ iree_hal_memory_type_format(actual_memory_type, &temp0);
+ iree_string_view_t expected_memory_type_str =
+ iree_hal_memory_type_format(expected_memory_type, &temp1);
+ return iree_make_status(
+ IREE_STATUS_PERMISSION_DENIED,
+ "buffer memory type is not compatible with the requested operation; "
+ "buffer has %.*s, operation requires %.*s",
+ (int)actual_memory_type_str.size, actual_memory_type_str.data,
+ (int)expected_memory_type_str.size, expected_memory_type_str.data);
+#else
+ return iree_status_from_code(IREE_STATUS_PERMISSION_DENIED);
+#endif // IREE_STATUS_MODE
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_validate_access(
+ iree_hal_memory_access_t allowed_memory_access,
+ iree_hal_memory_access_t required_memory_access) {
+ if (iree_all_bits_set(required_memory_access, IREE_HAL_MEMORY_ACCESS_ANY)) {
+ return iree_ok_status();
+ }
+ if (IREE_UNLIKELY(!iree_any_bit_set(
+ required_memory_access,
+ IREE_HAL_MEMORY_ACCESS_READ | IREE_HAL_MEMORY_ACCESS_WRITE))) {
+ // No actual access bits defined.
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "memory access must specify one or more of _READ or _WRITE");
+ } else if (IREE_UNLIKELY(!iree_all_bits_set(allowed_memory_access,
+ required_memory_access))) {
+#if IREE_STATUS_MODE
+ // Bits must match exactly.
+ iree_bitfield_string_temp_t temp0, temp1;
+ iree_string_view_t allowed_memory_access_str =
+ iree_hal_memory_access_format(allowed_memory_access, &temp0);
+ iree_string_view_t required_memory_access_str =
+ iree_hal_memory_access_format(required_memory_access, &temp1);
+ return iree_make_status(
+ IREE_STATUS_PERMISSION_DENIED,
+ "buffer does not support the requested access "
+ "type; buffer allows %.*s, operation requires %.*s",
+ (int)allowed_memory_access_str.size, allowed_memory_access_str.data,
+ (int)required_memory_access_str.size, required_memory_access_str.data);
+#else
+ return iree_status_from_code(IREE_STATUS_PERMISSION_DENIED);
+#endif // IREE_STATUS_MODE
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_buffer_validate_usage(iree_hal_buffer_usage_t allowed_usage,
+ iree_hal_buffer_usage_t required_usage) {
+ if (IREE_UNLIKELY(!iree_all_bits_set(allowed_usage, required_usage))) {
+#if IREE_STATUS_MODE
+ // Missing one or more bits.
+ iree_bitfield_string_temp_t temp0, temp1;
+ iree_string_view_t allowed_usage_str =
+ iree_hal_buffer_usage_format(allowed_usage, &temp0);
+ iree_string_view_t required_usage_str =
+ iree_hal_buffer_usage_format(required_usage, &temp1);
+ return iree_make_status(
+ IREE_STATUS_PERMISSION_DENIED,
+ "requested usage was not specified when the buffer was allocated; "
+ "buffer allows %.*s, operation requires %.*s",
+ (int)allowed_usage_str.size, allowed_usage_str.data,
+ (int)required_usage_str.size, required_usage_str.data);
+#else
+ return iree_status_from_code(IREE_STATUS_PERMISSION_DENIED);
+#endif // IREE_STATUS_MODE
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_validate_range(
+ iree_hal_buffer_t* buffer, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length) {
+ // Check if the start of the range runs off the end of the buffer.
+ if (IREE_UNLIKELY(byte_offset > iree_hal_buffer_byte_length(buffer))) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "attempted to access an address off the end of the valid buffer range "
+ "(offset=%" PRIdsz ", length=%" PRIdsz ", buffer byte_length=%" PRIdsz
+ ")",
+ byte_offset, byte_length, iree_hal_buffer_byte_length(buffer));
+ }
+
+ if (byte_length == 0) {
+ // Fine to have a zero length.
+ return iree_ok_status();
+ }
+
+ // Check if the end runs over the allocation.
+ iree_device_size_t end = byte_offset + byte_length;
+ if (IREE_UNLIKELY(end > iree_hal_buffer_byte_length(buffer))) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "attempted to access an address outside of the valid buffer range "
+ "(offset=%" PRIdsz ", length=%" PRIdsz ", end(inc)=%" PRIdsz
+ ", buffer byte_length=%" PRIdsz ")",
+ byte_offset, byte_length, end - 1, iree_hal_buffer_byte_length(buffer));
+ }
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_buffer_calculate_range(
+ iree_device_size_t base_offset, iree_device_size_t max_length,
+ iree_device_size_t offset, iree_device_size_t length,
+ iree_device_size_t* out_adjusted_offset,
+ iree_device_size_t* out_adjusted_length) {
+ // Check if the start of the range runs off the end of the buffer.
+ if (IREE_UNLIKELY(offset > max_length)) {
+ *out_adjusted_offset = 0;
+ if (out_adjusted_length) *out_adjusted_length = 0;
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "attempted to access an address off the end of the valid buffer "
+ "range (offset=%" PRIdsz ", length=%" PRIdsz
+ ", buffer byte_length=%" PRIdsz ")",
+ offset, length, max_length);
+ }
+
+ // Handle length as IREE_WHOLE_BUFFER by adjusting it (if allowed).
+ if (IREE_UNLIKELY(length == IREE_WHOLE_BUFFER) &&
+ IREE_UNLIKELY(!out_adjusted_length)) {
+ *out_adjusted_offset = 0;
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "IREE_WHOLE_BUFFER may only be used with buffer "
+ "ranges, not external pointer ranges");
+ }
+
+ // Calculate the real ranges adjusted for our region within the allocation.
+ iree_device_size_t adjusted_offset = base_offset + offset;
+ iree_device_size_t adjusted_length =
+ length == IREE_WHOLE_BUFFER ? max_length - offset : length;
+ if (adjusted_length == 0) {
+ // Fine to have a zero length.
+ *out_adjusted_offset = adjusted_offset;
+ if (out_adjusted_length) *out_adjusted_length = adjusted_length;
+ return iree_ok_status();
+ }
+
+ // Check if the end runs over the allocation.
+ iree_device_size_t end = offset + adjusted_length - 1;
+ if (IREE_UNLIKELY(end >= max_length)) {
+ *out_adjusted_offset = 0;
+ if (out_adjusted_length) *out_adjusted_length = 0;
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "attempted to access an address outside of the valid buffer "
+ "range (offset=%" PRIdsz ", adjusted_length=%" PRIdsz ", end=%" PRIdsz
+ ", buffer byte_length=%" PRIdsz ")",
+ offset, adjusted_length, end, max_length);
+ }
+
+ *out_adjusted_offset = adjusted_offset;
+ if (out_adjusted_length) *out_adjusted_length = adjusted_length;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_hal_buffer_overlap_t iree_hal_buffer_test_overlap(
+ iree_hal_buffer_t* lhs_buffer, iree_device_size_t lhs_offset,
+ iree_device_size_t lhs_length, iree_hal_buffer_t* rhs_buffer,
+ iree_device_size_t rhs_offset, iree_device_size_t rhs_length) {
+ if (iree_hal_buffer_allocated_buffer(lhs_buffer) !=
+ iree_hal_buffer_allocated_buffer(rhs_buffer)) {
+ // Not even the same buffers.
+ return IREE_HAL_BUFFER_OVERLAP_DISJOINT;
+ }
+ // Resolve offsets into the underlying allocation.
+ iree_device_size_t lhs_alloc_offset =
+ iree_hal_buffer_byte_offset(lhs_buffer) + lhs_offset;
+ iree_device_size_t rhs_alloc_offset =
+ iree_hal_buffer_byte_offset(rhs_buffer) + rhs_offset;
+ iree_device_size_t lhs_alloc_length =
+ lhs_length == IREE_WHOLE_BUFFER
+ ? iree_hal_buffer_byte_length(lhs_buffer) - lhs_offset
+ : lhs_length;
+ iree_device_size_t rhs_alloc_length =
+ rhs_length == IREE_WHOLE_BUFFER
+ ? iree_hal_buffer_byte_length(rhs_buffer) - rhs_offset
+ : rhs_length;
+ if (!lhs_alloc_length || !rhs_alloc_length) {
+ return IREE_HAL_BUFFER_OVERLAP_DISJOINT;
+ }
+ if (lhs_alloc_offset == rhs_alloc_offset &&
+ lhs_alloc_length == rhs_alloc_length) {
+ return IREE_HAL_BUFFER_OVERLAP_COMPLETE;
+ }
+ return lhs_alloc_offset + lhs_alloc_length > rhs_alloc_offset &&
+ rhs_alloc_offset + rhs_alloc_length > lhs_alloc_offset
+ ? IREE_HAL_BUFFER_OVERLAP_PARTIAL
+ : IREE_HAL_BUFFER_OVERLAP_DISJOINT;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_subspan(
+ iree_hal_buffer_t* buffer, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length, iree_hal_buffer_t** out_buffer) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ IREE_ASSERT_ARGUMENT(out_buffer);
+ *out_buffer = NULL;
+
+ // Fast path: if we are requesting the whole buffer (usually via
+ // IREE_WHOLE_BUFFER) then we can just return the buffer itself.
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_calculate_range(
+ iree_hal_buffer_byte_offset(buffer), iree_hal_buffer_byte_length(buffer),
+ byte_offset, byte_length, &byte_offset, &byte_length));
+ if (byte_offset == 0 && byte_length == iree_hal_buffer_byte_length(buffer)) {
+ iree_hal_buffer_retain(buffer);
+ *out_buffer = buffer;
+ return iree_ok_status();
+ }
+
+ // To avoid heavy nesting of subspans that just add indirection we go to the
+ // parent buffer directly. If we wanted better accounting (to track where
+ // buffers came from) we'd want to avoid this but I'm not sure that's worth
+ // the super deep indirection that could arise.
+ iree_hal_buffer_t* allocated_buffer =
+ iree_hal_buffer_allocated_buffer(buffer);
+ if (allocated_buffer != buffer) {
+ return iree_hal_buffer_subspan(allocated_buffer, byte_offset, byte_length,
+ out_buffer);
+ }
+
+ return iree_hal_subspan_buffer_create(buffer, byte_offset, byte_length,
+ /*device_allocator=*/NULL,
+ buffer->host_allocator, out_buffer);
+}
+
+IREE_API_EXPORT iree_hal_buffer_t* iree_hal_buffer_allocated_buffer(
+ const iree_hal_buffer_t* buffer) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ return buffer->allocated_buffer;
+}
+
+IREE_API_EXPORT iree_device_size_t
+iree_hal_buffer_allocation_size(const iree_hal_buffer_t* buffer) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ return buffer->allocation_size;
+}
+
+IREE_API_EXPORT iree_device_size_t
+iree_hal_buffer_byte_offset(const iree_hal_buffer_t* buffer) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ return buffer->byte_offset;
+}
+
+IREE_API_EXPORT iree_device_size_t
+iree_hal_buffer_byte_length(const iree_hal_buffer_t* buffer) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ return buffer->byte_length;
+}
+
+IREE_API_EXPORT
+iree_hal_memory_type_t iree_hal_buffer_memory_type(
+ const iree_hal_buffer_t* buffer) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ return buffer->memory_type;
+}
+
+IREE_API_EXPORT
+iree_hal_memory_access_t iree_hal_buffer_allowed_access(
+ const iree_hal_buffer_t* buffer) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ return buffer->allowed_access;
+}
+
+IREE_API_EXPORT
+iree_hal_buffer_usage_t iree_hal_buffer_allowed_usage(
+ const iree_hal_buffer_t* buffer) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ return buffer->allowed_usage;
+}
+
+//===----------------------------------------------------------------------===//
+// Transfer
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_map_zero(
+ iree_hal_buffer_t* buffer, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length) {
+ const uint8_t zero = 0;
+ return iree_hal_buffer_map_fill(buffer, byte_offset, byte_length, &zero, 1);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_map_fill(
+ iree_hal_buffer_t* buffer, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length, const void* pattern,
+ iree_host_size_t pattern_length) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ IREE_ASSERT_ARGUMENT(pattern);
+
+ if (IREE_UNLIKELY(pattern_length != 1 && pattern_length != 2 &&
+ pattern_length != 4)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "fill patterns must be 1, 2, or 4 bytes (got %zu)",
+ pattern_length);
+ }
+
+ if (byte_length == 0) {
+ return iree_ok_status(); // No-op.
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_hal_buffer_mapping_t target_mapping = {{0}};
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_buffer_map_range(buffer, IREE_HAL_MAPPING_MODE_SCOPED,
+ IREE_HAL_MEMORY_ACCESS_DISCARD_WRITE,
+ byte_offset, byte_length, &target_mapping));
+ if (byte_length == IREE_WHOLE_BUFFER) {
+ byte_length = target_mapping.contents.data_length;
+ }
+
+ if (IREE_UNLIKELY((byte_offset % pattern_length) != 0) ||
+ IREE_UNLIKELY((byte_length % pattern_length) != 0)) {
+ iree_status_ignore(iree_hal_buffer_unmap_range(&target_mapping));
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "attempting to fill a range with %zu byte values "
+ "that is not aligned (offset=%" PRIdsz
+ ", length=%" PRIdsz ")",
+ pattern_length, byte_offset, byte_length);
+ }
+
+ const uint32_t zero_32 = 0;
+ if (memcmp(pattern, &zero_32, pattern_length) == 0) {
+ // We can turn all-zero values into single-byte fills as that can be much
+ // faster on devices (doing a fill8 vs fill32).
+ pattern_length = 1;
+ }
+
+ iree_status_t status = iree_ok_status();
+ void* data_ptr = target_mapping.contents.data;
+ switch (pattern_length) {
+ case 1: {
+ uint8_t* data = (uint8_t*)data_ptr;
+ uint8_t value_bits = *(const uint8_t*)(pattern);
+ memset(data, value_bits, byte_length);
+ break;
+ }
+ case 2: {
+ uint16_t* data = (uint16_t*)data_ptr;
+ uint16_t value_bits = *(const uint16_t*)(pattern);
+ for (iree_device_size_t i = 0; i < byte_length / sizeof(uint16_t); ++i) {
+ data[i] = value_bits;
+ }
+ break;
+ }
+ case 4: {
+ uint32_t* data = (uint32_t*)data_ptr;
+ uint32_t value_bits = *(const uint32_t*)(pattern);
+ for (iree_device_size_t i = 0; i < byte_length / sizeof(uint32_t); ++i) {
+ data[i] = value_bits;
+ }
+ break;
+ }
+ default:
+ status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "unsupported fill pattern length: %zu",
+ pattern_length);
+ break;
+ }
+
+ if (iree_status_is_ok(status) &&
+ !iree_all_bits_set(iree_hal_buffer_memory_type(buffer),
+ IREE_HAL_MEMORY_TYPE_HOST_COHERENT)) {
+ status = iree_hal_buffer_flush_range(&target_mapping, 0, IREE_WHOLE_BUFFER);
+ }
+
+ status =
+ iree_status_join(status, iree_hal_buffer_unmap_range(&target_mapping));
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_map_read(
+ iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+ void* target_buffer, iree_device_size_t data_length) {
+ if (data_length == 0) {
+ return iree_ok_status(); // No-op.
+ }
+ IREE_ASSERT_ARGUMENT(source_buffer);
+ IREE_ASSERT_ARGUMENT(target_buffer);
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, data_length);
+ iree_hal_buffer_mapping_t source_mapping = {{0}};
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_buffer_map_range(source_buffer, IREE_HAL_MAPPING_MODE_SCOPED,
+ IREE_HAL_MEMORY_ACCESS_READ, source_offset,
+ data_length, &source_mapping));
+
+ memcpy(target_buffer, source_mapping.contents.data, data_length);
+
+ iree_hal_buffer_unmap_range(&source_mapping);
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_map_write(
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ const void* source_buffer, iree_device_size_t data_length) {
+ if (data_length == 0) {
+ return iree_ok_status(); // No-op.
+ }
+ IREE_ASSERT_ARGUMENT(target_buffer);
+ IREE_ASSERT_ARGUMENT(source_buffer);
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, data_length);
+ iree_hal_buffer_mapping_t target_mapping;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_hal_buffer_map_range(target_buffer, IREE_HAL_MAPPING_MODE_SCOPED,
+ IREE_HAL_MEMORY_ACCESS_DISCARD_WRITE,
+ target_offset, data_length, &target_mapping));
+
+ memcpy(target_mapping.contents.data, source_buffer, data_length);
+
+ iree_status_t status = iree_ok_status();
+ if (!iree_all_bits_set(iree_hal_buffer_memory_type(target_buffer),
+ IREE_HAL_MEMORY_TYPE_HOST_COHERENT)) {
+ status = iree_hal_buffer_flush_range(&target_mapping, 0, IREE_WHOLE_BUFFER);
+ }
+
+ iree_hal_buffer_unmap_range(&target_mapping);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_map_copy(
+ iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t data_length) {
+ if (data_length == 0) {
+ return iree_ok_status(); // No-op.
+ }
+ IREE_ASSERT_ARGUMENT(source_buffer);
+ IREE_ASSERT_ARGUMENT(target_buffer);
+
+ // Check for overlap - like memcpy we require that the two ranges don't have
+ // any overlap - because we use memcpy below!
+ if (iree_hal_buffer_test_overlap(source_buffer, source_offset, data_length,
+ target_buffer, target_offset, data_length) !=
+ IREE_HAL_BUFFER_OVERLAP_DISJOINT) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "source and target ranges must not overlap within the same buffer");
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, data_length);
+
+ // Map source, which may have IREE_WHOLE_BUFFER length.
+ iree_hal_buffer_mapping_t source_mapping;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_buffer_map_range(source_buffer, IREE_HAL_MAPPING_MODE_SCOPED,
+ IREE_HAL_MEMORY_ACCESS_READ, source_offset,
+ data_length, &source_mapping));
+
+ // Map target, which may also have IREE_WHOLE_BUFFER length.
+ iree_hal_buffer_mapping_t target_mapping;
+ iree_status_t status =
+ iree_hal_buffer_map_range(target_buffer, IREE_HAL_MAPPING_MODE_SCOPED,
+ IREE_HAL_MEMORY_ACCESS_DISCARD_WRITE,
+ target_offset, data_length, &target_mapping);
+ if (!iree_status_is_ok(status)) {
+ iree_hal_buffer_unmap_range(&source_mapping);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+
+ // Adjust the data length based on the min we have.
+ iree_device_size_t adjusted_data_length = 0;
+ if (data_length == IREE_WHOLE_BUFFER) {
+ // Whole buffer copy requested - that could mean either, so take the min.
+ adjusted_data_length = iree_min(source_mapping.contents.data_length,
+ target_mapping.contents.data_length);
+ } else {
+ // Specific length requested - validate that we have matching lengths.
+ IREE_ASSERT_EQ(source_mapping.contents.data_length,
+ target_mapping.contents.data_length);
+ adjusted_data_length = target_mapping.contents.data_length;
+ }
+
+ // Elide zero length copies. It's been expensive to get to this point just to
+ // bail but we need to have mapped to resolve IREE_WHOLE_BUFFERs that may
+ // result in zero lengths.
+ if (IREE_UNLIKELY(adjusted_data_length == 0)) {
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+ }
+
+ memcpy(target_mapping.contents.data, source_mapping.contents.data,
+ adjusted_data_length);
+
+ if (!iree_all_bits_set(iree_hal_buffer_memory_type(target_buffer),
+ IREE_HAL_MEMORY_TYPE_HOST_COHERENT)) {
+ status =
+ iree_hal_buffer_flush_range(&target_mapping, 0, adjusted_data_length);
+ }
+
+ iree_hal_buffer_unmap_range(&source_mapping);
+ iree_hal_buffer_unmap_range(&target_mapping);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+//===----------------------------------------------------------------------===//
+// Mapping
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_map_range(
+ iree_hal_buffer_t* buffer, iree_hal_mapping_mode_t mapping_mode,
+ iree_hal_memory_access_t memory_access, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length,
+ iree_hal_buffer_mapping_t* out_buffer_mapping) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ IREE_ASSERT_ARGUMENT(out_buffer_mapping);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ memset(out_buffer_mapping, 0, sizeof(*out_buffer_mapping));
+
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_buffer_validate_access(
+ iree_hal_buffer_allowed_access(buffer), memory_access));
+
+ // Persistent mapping requires the buffer was allocated to support it.
+ const bool is_persistent =
+ iree_all_bits_set(mapping_mode, IREE_HAL_MAPPING_MODE_PERSISTENT);
+ if (is_persistent) {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(z0,
+ iree_hal_buffer_validate_memory_type(
+ iree_hal_buffer_memory_type(buffer),
+ IREE_HAL_MEMORY_TYPE_HOST_VISIBLE));
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_hal_buffer_validate_usage(iree_hal_buffer_allowed_usage(buffer),
+ IREE_HAL_BUFFER_USAGE_MAPPING));
+ }
+
+ iree_device_size_t local_byte_offset = 0;
+ iree_device_size_t local_byte_length = 0;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_buffer_calculate_range(
+ iree_hal_buffer_byte_offset(buffer),
+ iree_hal_buffer_byte_length(buffer), byte_offset, byte_length,
+ &local_byte_offset, &local_byte_length));
+
+ out_buffer_mapping->buffer = buffer;
+ out_buffer_mapping->impl.allowed_access = memory_access;
+ out_buffer_mapping->impl.is_persistent = is_persistent ? 1 : 0;
+ out_buffer_mapping->impl.byte_offset = local_byte_offset;
+
+ iree_status_t status = _VTABLE_DISPATCH(buffer, map_range)(
+ buffer, mapping_mode, memory_access, out_buffer_mapping->impl.byte_offset,
+ local_byte_length, out_buffer_mapping);
+
+ if (iree_status_is_ok(status)) {
+ // Scoped mappings retain the buffer until unmapped.
+ if (!is_persistent) iree_hal_buffer_retain(buffer);
+ } else {
+ memset(out_buffer_mapping, 0, sizeof(*out_buffer_mapping));
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_buffer_unmap_range(iree_hal_buffer_mapping_t* buffer_mapping) {
+ IREE_ASSERT_ARGUMENT(buffer_mapping);
+ iree_hal_buffer_t* buffer = buffer_mapping->buffer;
+ if (!buffer) return iree_ok_status();
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status = _VTABLE_DISPATCH(buffer, unmap_range)(
+ buffer, buffer_mapping->impl.byte_offset,
+ buffer_mapping->contents.data_length, buffer_mapping);
+
+ if (!buffer_mapping->impl.is_persistent) {
+ iree_hal_buffer_release(buffer);
+ }
+ memset(buffer_mapping, 0, sizeof(*buffer_mapping));
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_invalidate_range(
+ iree_hal_buffer_mapping_t* buffer_mapping, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length) {
+ IREE_ASSERT_ARGUMENT(buffer_mapping);
+ iree_hal_buffer_t* buffer = buffer_mapping->buffer;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_access(
+ buffer_mapping->impl.allowed_access, IREE_HAL_MEMORY_ACCESS_READ));
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_calculate_range(
+ buffer_mapping->impl.byte_offset, buffer_mapping->contents.data_length,
+ byte_offset, byte_length, &byte_offset, &byte_length));
+ return _VTABLE_DISPATCH(buffer, invalidate_range)(buffer, byte_offset,
+ byte_length);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_flush_range(
+ iree_hal_buffer_mapping_t* buffer_mapping, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length) {
+ IREE_ASSERT_ARGUMENT(buffer_mapping);
+ iree_hal_buffer_t* buffer = buffer_mapping->buffer;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_access(
+ buffer_mapping->impl.allowed_access, IREE_HAL_MEMORY_ACCESS_WRITE));
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_calculate_range(
+ buffer_mapping->impl.byte_offset, buffer_mapping->contents.data_length,
+ byte_offset, byte_length, &byte_offset, &byte_length));
+ return _VTABLE_DISPATCH(buffer, flush_range)(buffer, byte_offset,
+ byte_length);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_mapping_subspan(
+ iree_hal_buffer_mapping_t* buffer_mapping,
+ iree_hal_memory_access_t memory_access, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length, iree_byte_span_t* out_span) {
+ IREE_ASSERT_ARGUMENT(buffer_mapping);
+ IREE_ASSERT_ARGUMENT(out_span);
+ memset(out_span, 0, sizeof(*out_span));
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_access(
+ buffer_mapping->impl.allowed_access, memory_access));
+ iree_device_size_t data_length = 0;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_calculate_range(
+ 0, buffer_mapping->contents.data_length, byte_offset, byte_length,
+ &byte_offset, &data_length));
+ out_span->data_length = data_length;
+ out_span->data = buffer_mapping->contents.data + byte_offset;
+ return iree_ok_status();
+}
diff --git a/runtime/src/iree/hal/buffer.h b/runtime/src/iree/hal/buffer.h
new file mode 100644
index 0000000..2f2a16f
--- /dev/null
+++ b/runtime/src/iree/hal/buffer.h
@@ -0,0 +1,607 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_BUFFER_H_
+#define IREE_HAL_BUFFER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_hal_allocator_t iree_hal_allocator_t;
+
+//===----------------------------------------------------------------------===//
+// Types and Enums
+//===----------------------------------------------------------------------===//
+
+// Whole length of the underlying buffer.
+#define IREE_WHOLE_BUFFER ((iree_device_size_t)(-1))
+
+// A bitfield specifying properties for a memory type.
+enum iree_hal_memory_type_bits_t {
+ IREE_HAL_MEMORY_TYPE_NONE = 0u,
+
+ // Memory is lazily allocated by the device and only exists transiently.
+ // This is the optimal mode for memory used only within a single command
+ // buffer. Transient buffers, even if they have
+ // IREE_HAL_MEMORY_TYPE_HOST_VISIBLE set, should be treated as device-local
+ // and opaque as they may have no memory attached to them outside of the time
+ // they are being evaluated on devices.
+ //
+ // This flag can be treated as a hint in most cases; allocating a buffer with
+ // it set _may_ return the same as if it had not be set. Certain allocation
+ // routines may use the hint to more tightly control reuse or defer wiring the
+ // memory.
+ IREE_HAL_MEMORY_TYPE_TRANSIENT = 1u << 0,
+
+ // Memory allocated with this type can be mapped for host access using
+ // iree_hal_buffer_map_range.
+ IREE_HAL_MEMORY_TYPE_HOST_VISIBLE = 1u << 1,
+
+ // The host cache management commands MappedMemory::Flush and
+ // MappedMemory::Invalidate are not needed to flush host writes
+ // to the device or make device writes visible to the host, respectively.
+ IREE_HAL_MEMORY_TYPE_HOST_COHERENT = 1u << 2,
+
+ // Memory allocated with this type is cached on the host. Host memory
+ // accesses to uncached memory are slower than to cached memory, however
+ // uncached memory is always host coherent. MappedMemory::Flush must be used
+ // to ensure the device has visibility into any changes made on the host and
+ // Invalidate must be used to ensure the host has visibility into any changes
+ // made on the device.
+ IREE_HAL_MEMORY_TYPE_HOST_CACHED = 1u << 3,
+
+ // Memory is accessible as normal host allocated memory.
+ IREE_HAL_MEMORY_TYPE_HOST_LOCAL =
+ IREE_HAL_MEMORY_TYPE_HOST_VISIBLE | IREE_HAL_MEMORY_TYPE_HOST_COHERENT,
+
+ // Memory allocated with this type is visible to the device for execution.
+ // Being device visible does not mean the same thing as
+ // IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL. Though an allocation may be visible to
+ // the device and therefore useable for execution it may require expensive
+ // mapping or implicit transfers.
+ IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE = 1u << 4,
+
+ // Memory allocated with this type is the most efficient for device access.
+ // Devices may support using memory that is not device local via
+ // IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE but doing so can incur non-trivial
+ // performance penalties. Device local memory, on the other hand, is
+ // guaranteed to be fast for all operations.
+ IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL =
+ IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE | (1u << 5),
+};
+typedef uint32_t iree_hal_memory_type_t;
+
+// A bitfield specifying how memory will be accessed in a mapped memory region.
+enum iree_hal_memory_access_bits_t {
+ // Memory is not mapped.
+ IREE_HAL_MEMORY_ACCESS_NONE = 0u,
+ // Memory will be read.
+ // If a buffer is only mapped for reading it may still be possible to write to
+ // it but the results will be undefined (as it may present coherency issues).
+ IREE_HAL_MEMORY_ACCESS_READ = 1u << 0,
+ // Memory will be written.
+ // If a buffer is only mapped for writing it may still be possible to read
+ // from it but the results will be undefined or incredibly slow (as it may
+ // be mapped by the driver as uncached).
+ IREE_HAL_MEMORY_ACCESS_WRITE = 1u << 1,
+ // Memory will be discarded prior to mapping.
+ // The existing contents will be undefined after mapping and must be written
+ // to ensure validity.
+ IREE_HAL_MEMORY_ACCESS_DISCARD = 1u << 2,
+ // Memory will be discarded and completely overwritten in a single operation.
+ IREE_HAL_MEMORY_ACCESS_DISCARD_WRITE =
+ IREE_HAL_MEMORY_ACCESS_WRITE | IREE_HAL_MEMORY_ACCESS_DISCARD,
+ // A flag that can be applied to any access type to indicate that the buffer
+ // storage being accessed may alias with other accesses occurring concurrently
+ // within or across operations. The lack of the flag indicates that the access
+ // is guaranteed not to alias (ala C's `restrict` keyword).
+ IREE_HAL_MEMORY_ACCESS_MAY_ALIAS = 1u << 3,
+ // Memory access may perform any operation and should not be validated.
+ // Used upon access to bypass access verification at the API boundary and
+ // effectively provides a `void*`.
+ // This should only be used by device-side code where it is known-safe to
+ // bypass the access verification.
+ IREE_HAL_MEMORY_ACCESS_ANY = 1u << 4,
+ // Memory may have any operation performed on it.
+ IREE_HAL_MEMORY_ACCESS_ALL = IREE_HAL_MEMORY_ACCESS_READ |
+ IREE_HAL_MEMORY_ACCESS_WRITE |
+ IREE_HAL_MEMORY_ACCESS_DISCARD,
+};
+typedef uint16_t iree_hal_memory_access_t;
+
+// Bitfield that defines how a buffer is intended to be used.
+// Usage allows the driver to appropriately place the buffer for more
+// efficient operations of the specified types.
+enum iree_hal_buffer_usage_bits_t {
+ IREE_HAL_BUFFER_USAGE_NONE = 0u,
+
+ // The buffer, once defined, will not be mapped or updated again.
+ // This should be used for uniform parameter values such as runtime
+ // constants for executables. Doing so may allow drivers to inline values or
+ // represent them in command buffers more efficiently (avoiding memory reads
+ // or swapping, etc).
+ IREE_HAL_BUFFER_USAGE_CONSTANT = 1u << 0,
+
+ // The buffer can be used as the source or target of a transfer command
+ // (CopyBuffer, UpdateBuffer, etc).
+ //
+ // If |IREE_HAL_BUFFER_USAGE_MAPPING| is not specified drivers may safely
+ // assume that the host may never need visibility of this buffer as all
+ // accesses will happen via command buffers.
+ IREE_HAL_BUFFER_USAGE_TRANSFER = 1u << 1,
+
+ // The buffer can be mapped by the host application for reading and writing
+ // without a copy.
+ //
+ // As mapping may require placement in special address ranges or system
+ // calls to enable visibility the driver can use the presence (or lack of)
+ // this flag to perform allocation-type setup and avoid initial mapping
+ // overhead.
+ IREE_HAL_BUFFER_USAGE_MAPPING = 1u << 2,
+
+ // The buffer can be provided as an input or output to an executable.
+ // Buffers of this type may be directly used by drivers during dispatch.
+ IREE_HAL_BUFFER_USAGE_DISPATCH = 1u << 3,
+};
+typedef uint32_t iree_hal_buffer_usage_t;
+
+// Buffer overlap testing results.
+typedef enum iree_hal_buffer_overlap_e {
+ // No overlap between the two buffers.
+ IREE_HAL_BUFFER_OVERLAP_DISJOINT = 0,
+ // Partial overlap between the two buffers.
+ IREE_HAL_BUFFER_OVERLAP_PARTIAL,
+ // Complete overlap between the two buffers (they are the same).
+ IREE_HAL_BUFFER_OVERLAP_COMPLETE,
+} iree_hal_buffer_overlap_t;
+
+// A bitfield specifying buffer transfer behavior.
+enum iree_hal_transfer_buffer_flag_bits_t {
+ // TODO(benvanik): flags controlling blocking, flushing, invalidation, and
+ // persistence. We may also want to set a bit that causes failure on emulated
+ // transfers that would otherwise be really expensive.
+ IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT = 0,
+};
+typedef uint32_t iree_hal_transfer_buffer_flags_t;
+
+// Determines buffer mapping behavior.
+enum iree_hal_mapping_mode_bits_t {
+ // Buffers are mapped as part of a scoped map-access-unmap sequence.
+ // If there are any in-flight operations using the buffer contents are
+ // undefined though they may deceivingly still seem correct under certain
+ // implementations.
+ IREE_HAL_MAPPING_MODE_SCOPED = 1u << 0,
+
+ // Buffers are mapped persistently and concurrently accessible by both the
+ // host and device. Mapping happens once and so long as there are any live
+ // mappings the buffer will remain accessible. Not all implementations or
+ // buffer memory types support this, and even ones that do may not support
+ // coherent cross-device sharing.
+ IREE_HAL_MAPPING_MODE_PERSISTENT = 1u << 1,
+};
+typedef uint32_t iree_hal_mapping_mode_t;
+
+// Implementation-specific mapping data.
+typedef struct iree_hal_buffer_mapping_impl_t {
+ // Byte offset within the buffer where the mapped data begins.
+ iree_device_size_t byte_offset;
+ // Used for validation only.
+ iree_hal_memory_access_t allowed_access;
+ // Tracking flags.
+ uint32_t is_persistent : 1;
+ uint32_t reserved_flags : 31;
+ // Backing implementation data.
+ // For backends that require additional tracking (shadow data structures/etc)
+ // this can be used to store references to them for the duration of the
+ // mapping.
+ uint64_t reserved[1];
+} iree_hal_buffer_mapping_impl_t;
+
+// Reference to a buffer's mapped memory.
+typedef struct iree_hal_buffer_mapping_t {
+ // Contents of the buffer. Behavior is undefined if an access is performed
+ // whose type was not specified during mapping.
+ //
+ // The bytes available may be greater than what was requested if platform
+ // alignment rules require it. Only memory defined by the given span may be
+ // accessed.
+ iree_byte_span_t contents;
+
+ // Buffer providing the backing storage for the mapping.
+ // When mapped with IREE_HAL_MAPPING_MODE_SCOPED the buffer will be retained
+ // until it is unmapped. When mapped with IREE_HAL_MAPPING_MODE_PERSISTENT the
+ // caller is responsible for retaining the buffer.
+ struct iree_hal_buffer_t* buffer;
+
+ // Used internally - do not modify.
+ // Implementations are allowed to use the reserved fields for their own
+ // storage but should otherwise ignore the remaining parts.
+ iree_hal_buffer_mapping_impl_t impl;
+} iree_hal_buffer_mapping_t;
+
+// Formats a memory type bitfield as a string.
+// See iree_bitfield_format for usage.
+IREE_API_EXPORT iree_string_view_t iree_hal_memory_type_format(
+ iree_hal_memory_type_t value, iree_bitfield_string_temp_t* out_temp);
+
+// Formats a memory access bitfield as a string.
+// See iree_bitfield_format for usage.
+IREE_API_EXPORT iree_string_view_t iree_hal_memory_access_format(
+ iree_hal_memory_access_t value, iree_bitfield_string_temp_t* out_temp);
+
+// Formats a buffer usage bitfield as a string.
+// See iree_bitfield_format for usage.
+IREE_API_EXPORT iree_string_view_t iree_hal_buffer_usage_format(
+ iree_hal_buffer_usage_t value, iree_bitfield_string_temp_t* out_temp);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_buffer_t
+//===----------------------------------------------------------------------===//
+
+// Allocated memory buffer wrapper type and utilities.
+//
+// Buffers are the basic unit of memory used by the inference system. They may
+// be allocated such that they are accessible from the host (normal C++ code
+// running on the main CPU), a particular device (such as an accelerator) or
+// family of devices, or from some mix of all of those.
+//
+// The type of memory a buffer is allocated within has implications on it's
+// performance and lifetime. For example if an application attempts to use a
+// host-allocated buffer (IREE_HAL_MEMORY_TYPE_HOST_LOCAL) on an accelerator
+// with discrete memory the accelerator may either be unable to access the
+// memory or take a non-trivial performance hit when attempting to do so
+// (involving setting up kernel mappings, doing DMA transfers, etc). Likewise,
+// trying to access a device-allocated buffer
+// (IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL) may incur similar overhead or not be
+// possible at all. This may be due to restrictions in the memory visibility,
+// address spaces, mixed endianness or pointer widths, and other weirdness.
+//
+// The memory types (defined by a bitfield of iree_hal_memory_type_t values)
+// that a particular context (host or device) may use vary from device to device
+// and must be queried by the application when allocating buffers. It's strongly
+// recommended that the most specific memory type be set as possible. For
+// example allocating a buffer with IREE_HAL_MEMORY_TYPE_HOST_COHERENT even when
+// it will never be used in a way that requires coherency may occupy address
+// space reservations or memory mapping that would otherwise not be needed.
+//
+// As buffers may sometimes not be accessible from the host the base buffer type
+// does not allow for direct void* access and instead buffers must be either
+// manipulated using utility functions (such as ReadData or WriteData) or by
+// mapping them into a host-accessible address space via MapMemory. Buffers must
+// be unmapped before any command may use them.
+//
+// Buffers may equate (roughly) 1:1 with an allocation either from the host heap
+// or a device. iree_hal_buffer_subspan can be used to reference subspans of
+// buffers like std::span - though unlike std::span the returned buffer holds
+// a reference to the parent buffer.
+typedef struct iree_hal_buffer_t iree_hal_buffer_t;
+
+// Returns success iff the buffer was allocated with the given memory type.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_validate_memory_type(
+ iree_hal_memory_type_t actual_memory_type,
+ iree_hal_memory_type_t expected_memory_type);
+
+// Returns success iff the buffer allows the requested access.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_validate_access(
+ iree_hal_memory_access_t allowed_memory_access,
+ iree_hal_memory_access_t required_memory_access);
+
+// Returns success iff the buffer usage allows the given usage type.
+IREE_API_EXPORT iree_status_t
+iree_hal_buffer_validate_usage(iree_hal_buffer_usage_t allowed_usage,
+ iree_hal_buffer_usage_t required_usage);
+
+// Returns success iff the given byte range falls within the valid buffer.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_validate_range(
+ iree_hal_buffer_t* buffer, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length);
+
+// Tests whether the given buffers overlap, including support for subspans.
+// IREE_WHOLE_BUFFER may be used for |lhs_length| and/or |rhs_length| to use the
+// lengths of those buffers, respectively.
+IREE_API_EXPORT iree_hal_buffer_overlap_t iree_hal_buffer_test_overlap(
+ iree_hal_buffer_t* lhs_buffer, iree_device_size_t lhs_offset,
+ iree_device_size_t lhs_length, iree_hal_buffer_t* rhs_buffer,
+ iree_device_size_t rhs_offset, iree_device_size_t rhs_length);
+
+// Returns a reference to a subspan of the |buffer|.
+// If |byte_length| is IREE_WHOLE_BUFFER the remaining bytes in the buffer after
+// |byte_offset| (possibly 0) will be selected.
+//
+// The parent buffer will remain alive for the lifetime of the subspan
+// returned. If the subspan is a small portion this may cause additional
+// memory to remain allocated longer than required.
+//
+// Returns the given |buffer| if the requested span covers the entire range.
+// |out_buffer| must be released by the caller.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_subspan(
+ iree_hal_buffer_t* buffer, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length, iree_hal_buffer_t** out_buffer);
+
+// Retains the given |buffer| for the caller.
+IREE_API_EXPORT void iree_hal_buffer_retain(iree_hal_buffer_t* buffer);
+
+// Releases the given |buffer| from the caller.
+IREE_API_EXPORT void iree_hal_buffer_release(iree_hal_buffer_t* buffer);
+
+// Returns a pointer to the buffer containing the actual allocation.
+// The buffer represents a span of the allocated bytes defined by byte_offset
+// and byte_length. If the provided buffer *is* the allocated buffer then the
+// returned value will be the provided buffer pointer.
+IREE_API_EXPORT iree_hal_buffer_t* iree_hal_buffer_allocated_buffer(
+ const iree_hal_buffer_t* buffer);
+
+// Returns the size of the resource memory allocation in bytes.
+// This may be rounded up from the originally requested size or the ideal
+// size for the resource based on device restrictions.
+IREE_API_EXPORT iree_device_size_t
+iree_hal_buffer_allocation_size(const iree_hal_buffer_t* buffer);
+
+// Returns the offset in bytes of the buffer within its allocated_buffer.
+IREE_API_EXPORT iree_device_size_t
+iree_hal_buffer_byte_offset(const iree_hal_buffer_t* buffer);
+
+// Returns the size in bytes of the buffer.
+IREE_API_EXPORT iree_device_size_t
+iree_hal_buffer_byte_length(const iree_hal_buffer_t* buffer);
+
+// Returns the memory type the buffer was allocated with.
+IREE_API_EXPORT
+iree_hal_memory_type_t iree_hal_buffer_memory_type(
+ const iree_hal_buffer_t* buffer);
+
+// Returns the allowed memory access modes.
+// These may be more strict than the underlying allocation, for example when the
+// buffer is exposing read-only memory that may be in mutable pages.
+IREE_API_EXPORT
+iree_hal_memory_access_t iree_hal_buffer_allowed_access(
+ const iree_hal_buffer_t* buffer);
+
+// Returns the allowed buffer usage modes.
+IREE_API_EXPORT
+iree_hal_buffer_usage_t iree_hal_buffer_allowed_usage(
+ const iree_hal_buffer_t* buffer);
+
+// Sets a range of the buffer to binary zero.
+//
+// Requires that the buffer has the IREE_HAL_BUFFER_USAGE_MAPPING bit set.
+// The byte range in |buffer| will be flushed if needed.
+//
+// It is strongly recommended that buffer operations are performed on transfer
+// queues; using this synchronous function may incur additional cache flushes
+// and synchronous blocking behavior and is not supported on all buffer types.
+// See iree_hal_command_buffer_fill_buffer.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_map_zero(
+ iree_hal_buffer_t* buffer, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length);
+
+// Sets a range of the buffer to the given value.
+// Only |pattern_length| values with 1, 2, or 4 bytes are supported.
+//
+// Requires that the buffer has the IREE_HAL_BUFFER_USAGE_MAPPING bit set.
+// The byte range in |buffer| will be flushed if needed.
+//
+// It is strongly recommended that buffer operations are performed on transfer
+// queues; using this synchronous function may incur additional cache flushes
+// and synchronous blocking behavior and is not supported on all buffer types.
+// See iree_hal_command_buffer_fill_buffer.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_map_fill(
+ iree_hal_buffer_t* buffer, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length, const void* pattern,
+ iree_host_size_t pattern_length);
+
+// Reads a block of data from the buffer at the given offset.
+//
+// Requires that the buffer has the IREE_HAL_BUFFER_USAGE_MAPPING bit set.
+//
+// It is strongly recommended that buffer operations are performed on transfer
+// queues; using this synchronous function may incur additional cache flushes
+// and synchronous blocking behavior and is not supported on all buffer types.
+// See iree_hal_command_buffer_copy_buffer.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_map_read(
+ iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+ void* target_buffer, iree_device_size_t data_length);
+
+// Writes a block of byte data into the buffer at the given offset.
+//
+// Requires that the buffer has the IREE_HAL_BUFFER_USAGE_MAPPING bit set.
+// The byte range in |target_buffer| will be flushed if needed.
+//
+// It is strongly recommended that buffer operations are performed on transfer
+// queues; using this synchronous function may incur additional cache flushes
+// and synchronous blocking behavior and is not supported on all buffer types.
+// See iree_hal_command_buffer_update_buffer and
+// iree_hal_command_buffer_copy_buffer.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_map_write(
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ const void* source_buffer, iree_device_size_t data_length);
+
+// Copies data from the provided |source_buffer| into the |target_buffer|.
+//
+// Requires that both buffers have the IREE_HAL_BUFFER_USAGE_MAPPING bit set.
+// The byte range in |target_buffer| will be flushed if needed. Both buffers
+// need not come from the same device.
+//
+// It is strongly recommended that buffer operations are performed on transfer
+// queues; using this synchronous function may incur additional cache flushes
+// and synchronous blocking behavior and is not supported on all buffer types.
+// See iree_hal_command_buffer_copy_buffer.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_map_copy(
+ iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t data_length);
+
+// Maps the buffer to be accessed as a host pointer into |out_buffer_mapping|.
+// The byte offset and byte length may be adjusted for device alignment.
+// The output data pointer will be properly aligned to the start of the data.
+// Fails if the memory could not be mapped (invalid access type, invalid
+// range, or unsupported memory type).
+//
+// Requires that the buffer has the IREE_HAL_BUFFER_USAGE_MAPPING bit set.
+// If the buffer is not IREE_HAL_MEMORY_TYPE_HOST_COHERENT then the caller must
+// invalidate the byte range they want to access to update the visibility of the
+// mapped memory.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_map_range(
+ iree_hal_buffer_t* buffer, iree_hal_mapping_mode_t mapping_mode,
+ iree_hal_memory_access_t memory_access, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length,
+ iree_hal_buffer_mapping_t* out_buffer_mapping);
+
+// Unmaps the buffer as was previously mapped to |buffer_mapping|.
+//
+// If the buffer is not IREE_HAL_MEMORY_TYPE_HOST_COHERENT then the caller must
+// flush the byte range they want to make available to other threads/devices.
+//
+// May fail, though unlikely to do so for read-only mapping and the result can
+// be safely ignored using iree_status_ignore. If writing then users must check
+// the status to ensure their writes succeeded.
+IREE_API_EXPORT iree_status_t
+iree_hal_buffer_unmap_range(iree_hal_buffer_mapping_t* buffer_mapping);
+
+// Invalidates ranges of non-coherent memory from the host caches.
+// This guarantees that device writes to the memory ranges provided are
+// visible on the host. Use before reading from non-coherent memory.
+//
+// Only required for memory types without IREE_HAL_MEMORY_TYPE_HOST_COHERENT.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_invalidate_range(
+ iree_hal_buffer_mapping_t* buffer_mapping, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length);
+
+// Flushes ranges of non-coherent memory from the host caches.
+// This guarantees that host writes to the memory ranges provided are available
+// for device access. Use after writing to non-coherent memory.
+//
+// Only required for memory types without IREE_HAL_MEMORY_TYPE_HOST_COHERENT.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_flush_range(
+ iree_hal_buffer_mapping_t* buffer_mapping, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length);
+
+// Calculates and returns a byte subspan range within a buffer mapping.
+// The byte range provided is local to the mapping. May return a 0-length span.
+// IREE_WHOLE_BUFFER can be used for |byte_length|.
+//
+// Note that the access requirements of the mapping still hold: if the memory is
+// not host coherent and writeable then the caller must use the
+// iree_hal_buffer_invalidate_range and iree_hal_buffer_flush_range methods to
+// ensure memory is in the expected state.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_mapping_subspan(
+ iree_hal_buffer_mapping_t* buffer_mapping,
+ iree_hal_memory_access_t memory_access, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length, iree_byte_span_t* out_span);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_subspan_buffer_t
+//===----------------------------------------------------------------------===//
+
+// Initializes in-place a subspan buffer stored in |out_buffer|.
+// The reference count of the buffer will be set to 1.
+//
+// This is intended to be used for provably on-stack transient subspans or
+// buffer wrapping where ownership is controlled externally. If the lifetime of
+// the subspan may extend beyond the lifetime of the |out_buffer| storage then
+// iree_hal_subspan_buffer_create must be used instead.
+//
+// iree_hal_subspan_buffer_deinitialize must be used to deinitialize the buffer.
+IREE_API_EXPORT void iree_hal_subspan_buffer_initialize(
+ iree_hal_buffer_t* allocated_buffer, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length, iree_hal_allocator_t* device_allocator,
+ iree_allocator_t host_allocator, iree_hal_buffer_t* out_buffer);
+
+// Deinitializes a subspan buffer that was initialized with
+// iree_hal_subspan_buffer_initialize.
+IREE_API_EXPORT void iree_hal_subspan_buffer_deinitialize(
+ iree_hal_buffer_t* buffer);
+
+// Creates a buffer referencing a subspan of some base allocation.
+// Optionally |device_allocator| can be provided if this subspan references
+// managed buffers that need deallocation callbacks.
+IREE_API_EXPORT iree_status_t iree_hal_subspan_buffer_create(
+ iree_hal_buffer_t* allocated_buffer, iree_device_size_t byte_offset,
+ iree_device_size_t byte_length, iree_hal_allocator_t* device_allocator,
+ iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_buffer_t implementation details
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_buffer_vtable_t {
+ // Must be iree_hal_buffer_recycle.
+ void(IREE_API_PTR* recycle)(iree_hal_buffer_t* buffer);
+ void(IREE_API_PTR* destroy)(iree_hal_buffer_t* buffer);
+
+ iree_status_t(IREE_API_PTR* map_range)(iree_hal_buffer_t* buffer,
+ iree_hal_mapping_mode_t mapping_mode,
+ iree_hal_memory_access_t memory_access,
+ iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length,
+ iree_hal_buffer_mapping_t* mapping);
+
+ iree_status_t(IREE_API_PTR* unmap_range)(iree_hal_buffer_t* buffer,
+ iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length,
+ iree_hal_buffer_mapping_t* mapping);
+
+ iree_status_t(IREE_API_PTR* invalidate_range)(
+ iree_hal_buffer_t* buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length);
+
+ iree_status_t(IREE_API_PTR* flush_range)(
+ iree_hal_buffer_t* buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length);
+} iree_hal_buffer_vtable_t;
+static_assert(offsetof(iree_hal_buffer_vtable_t, recycle) == 0,
+ "iree_hal_resource_vtable_t expects destroy at offset 0, we want "
+ "to recycle instead");
+
+struct iree_hal_buffer_t {
+ // Frequently accessed:
+ iree_hal_resource_t resource; // must be at 0
+ iree_hal_buffer_t* allocated_buffer;
+ iree_device_size_t allocation_size;
+ iree_device_size_t byte_offset;
+ iree_device_size_t byte_length;
+
+ // Rarely accessed:
+ iree_allocator_t host_allocator;
+ iree_hal_allocator_t* device_allocator;
+ // TODO(benvanik): bit pack these; could be ~4 bytes vs 12.
+ iree_hal_memory_type_t memory_type;
+ iree_hal_buffer_usage_t allowed_usage;
+ iree_hal_memory_access_t allowed_access;
+
+ // Implementation-defined flags.
+ uint16_t flags;
+};
+
+IREE_API_EXPORT void iree_hal_buffer_initialize(
+ iree_allocator_t host_allocator, iree_hal_allocator_t* device_allocator,
+ iree_hal_buffer_t* allocated_buffer, iree_device_size_t allocation_size,
+ iree_device_size_t byte_offset, iree_device_size_t byte_length,
+ iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access,
+ iree_hal_buffer_usage_t allowed_usage,
+ const iree_hal_buffer_vtable_t* vtable, iree_hal_buffer_t* buffer);
+
+// Recycles |buffer| by returning it to its allocator (or destroying it).
+// The |buffer| pointer may remain valid if it is returned to a pool but callers
+// must assume its contents are undefined.
+IREE_API_EXPORT void iree_hal_buffer_recycle(iree_hal_buffer_t* buffer);
+
+// Destroys |buffer| and frees its memory.
+// Implementations should use iree_hal_buffer_recycle in their vtables.
+IREE_API_EXPORT void iree_hal_buffer_destroy(iree_hal_buffer_t* buffer);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_BUFFER_H_
diff --git a/runtime/src/iree/hal/buffer_heap.c b/runtime/src/iree/hal/buffer_heap.c
new file mode 100644
index 0000000..47ec037
--- /dev/null
+++ b/runtime/src/iree/hal/buffer_heap.c
@@ -0,0 +1,311 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/allocator.h"
+#include "iree/hal/buffer.h"
+#include "iree/hal/buffer_heap_impl.h"
+#include "iree/hal/resource.h"
+
+typedef enum iree_hal_heap_buffer_storage_mode_e {
+ // Allocated as a [metadata, data] slab.
+ // The base metadata pointer must be freed with iree_allocator_free_aligned.
+ // The data storage is not freed.
+ IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SLAB = 0u,
+ // Allocated as split [metadata] and [data].
+ // The base metadata pointer must be freed with iree_allocator_free.
+ // The data storage must be freed with iree_allocator_free_aligned.
+ IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SPLIT = 1u,
+ // Allocated as split [metadata] and an externally-owned [data].
+ // The base metadata pointer must be freed with iree_allocator_free.
+ // A user-provided buffer release callback is notified that the buffer is no
+ // longer referencing the data.
+ IREE_HAL_HEAP_BUFFER_STORAGE_MODE_EXTERNAL = 2u,
+} iree_hal_heap_buffer_storage_mode_t;
+
+typedef struct iree_hal_heap_buffer_t {
+ // base.flags has the iree_hal_heap_buffer_storage_mode_t.
+ iree_hal_buffer_t base;
+
+ iree_byte_span_t data;
+ union {
+ // Used for IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SPLIT.
+ iree_allocator_t data_allocator;
+ // Used for IREE_HAL_HEAP_BUFFER_STORAGE_MODE_EXTERNAL.
+ iree_hal_buffer_release_callback_t release_callback;
+ };
+
+ // Optional statistics shared with the allocator.
+ IREE_STATISTICS(iree_hal_heap_allocator_statistics_t* statistics;)
+} iree_hal_heap_buffer_t;
+static_assert(sizeof(iree_hal_heap_buffer_t) <= 128,
+ "header should be <= the minimum buffer alignment so that we "
+ "don't introduce internal waste");
+
+static const iree_hal_buffer_vtable_t iree_hal_heap_buffer_vtable;
+
+// Allocates a buffer with the metadata and storage split.
+// This results in an additional host allocation but allows for user-overridden
+// data storage allocations.
+static iree_status_t iree_hal_heap_buffer_allocate_split(
+ iree_device_size_t allocation_size, iree_allocator_t data_allocator,
+ iree_allocator_t host_allocator, iree_hal_heap_buffer_t** out_buffer,
+ iree_byte_span_t* out_data) {
+ // Try allocating the storage first as it's the most likely to fail if OOM.
+ // It must be aligned to the minimum buffer alignment.
+ out_data->data_length = allocation_size;
+ uint8_t* data_ptr = 0;
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc_aligned(
+ data_allocator, allocation_size, IREE_HAL_HEAP_BUFFER_ALIGNMENT,
+ /*offset=*/0, (void**)&data_ptr));
+ IREE_ASSERT_TRUE(iree_host_size_has_alignment(
+ (iree_host_size_t)data_ptr, IREE_HAL_HEAP_BUFFER_ALIGNMENT));
+ out_data->data = data_ptr;
+
+ // Allocate the host metadata wrapper with natural alignment.
+ iree_status_t status = iree_allocator_malloc(
+ host_allocator, sizeof(**out_buffer), (void**)out_buffer);
+ if (!iree_status_is_ok(status)) {
+ // Need to free the storage we just allocated.
+ iree_allocator_free_aligned(data_allocator, out_data->data);
+ }
+ return status;
+}
+
+// Allocates a buffer with the metadata as a prefix to the storage.
+// This results in a single allocation per buffer but requires that both the
+// metadata and storage live together.
+static iree_status_t iree_hal_heap_buffer_allocate_slab(
+ iree_device_size_t allocation_size, iree_allocator_t host_allocator,
+ iree_hal_heap_buffer_t** out_buffer, iree_byte_span_t* out_data) {
+ // The metadata header is always aligned and we want to ensure it's padded
+ // out to the max alignment.
+ iree_hal_heap_buffer_t* buffer = NULL;
+ iree_host_size_t header_size =
+ iree_host_align(iree_sizeof_struct(*buffer), iree_max_align_t);
+ iree_host_size_t total_size = header_size + allocation_size;
+
+ // Allocate with the data starting at offset header_size aligned to the
+ // minimum required buffer alignment. The header itself will still be aligned
+ // to the natural alignment but our buffer alignment is often much larger.
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc_aligned(
+ host_allocator, total_size, IREE_HAL_HEAP_BUFFER_ALIGNMENT, header_size,
+ (void**)&buffer));
+ *out_buffer = buffer;
+
+ // Set bit indicating that we need to free the metadata with
+ // iree_allocator_free_aligned.
+ uint8_t* data_ptr = (uint8_t*)buffer + header_size;
+ IREE_ASSERT_TRUE(iree_host_size_has_alignment(
+ (iree_host_size_t)data_ptr, IREE_HAL_HEAP_BUFFER_ALIGNMENT));
+ *out_data = iree_make_byte_span(data_ptr, allocation_size);
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_heap_buffer_create(
+ iree_hal_allocator_t* allocator,
+ iree_hal_heap_allocator_statistics_t* statistics,
+ const iree_hal_buffer_params_t* params, iree_device_size_t allocation_size,
+ iree_const_byte_span_t initial_data, iree_allocator_t data_allocator,
+ iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ IREE_ASSERT_ARGUMENT(params);
+ IREE_ASSERT_ARGUMENT(out_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // If the data and host allocators are the same we can allocate more
+ // efficiently as a large slab. Otherwise we need to allocate both the
+ // metadata and the storage independently.
+ const bool same_allocator =
+ memcmp(&data_allocator, &host_allocator, sizeof(data_allocator)) == 0;
+
+ iree_hal_heap_buffer_t* buffer = NULL;
+ iree_byte_span_t data = iree_make_byte_span(NULL, 0);
+ iree_status_t status =
+ same_allocator
+ ? iree_hal_heap_buffer_allocate_slab(allocation_size, host_allocator,
+ &buffer, &data)
+ : iree_hal_heap_buffer_allocate_split(allocation_size, data_allocator,
+ host_allocator, &buffer, &data);
+
+ if (iree_status_is_ok(status)) {
+ iree_hal_buffer_initialize(host_allocator, allocator, &buffer->base,
+ allocation_size, 0, allocation_size,
+ params->type, params->access, params->usage,
+ &iree_hal_heap_buffer_vtable, &buffer->base);
+ buffer->data = data;
+
+ if (same_allocator) {
+ buffer->base.flags = IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SLAB;
+ buffer->data_allocator = iree_allocator_null();
+ } else {
+ buffer->base.flags = IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SPLIT;
+ buffer->data_allocator = data_allocator;
+ }
+
+ IREE_STATISTICS({
+ if (statistics != NULL) {
+ buffer->statistics = statistics;
+ iree_slim_mutex_lock(&statistics->mutex);
+ iree_hal_allocator_statistics_record_alloc(
+ &statistics->base, params->type, allocation_size);
+ iree_slim_mutex_unlock(&statistics->mutex);
+ }
+ });
+
+ if (!iree_const_byte_span_is_empty(initial_data)) {
+ const iree_device_size_t initial_length =
+ iree_min(initial_data.data_length, allocation_size);
+ memcpy(buffer->data.data, initial_data.data, initial_length);
+ }
+
+ *out_buffer = &buffer->base;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_status_t iree_hal_heap_buffer_wrap(
+ iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+ iree_hal_memory_access_t allowed_access,
+ iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
+ iree_byte_span_t data, iree_hal_buffer_release_callback_t release_callback,
+ iree_hal_buffer_t** out_buffer) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ IREE_ASSERT_ARGUMENT(out_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ if (!iree_host_size_has_alignment((uintptr_t)data.data,
+ IREE_HAL_HEAP_BUFFER_ALIGNMENT)) {
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "imported heap buffer data must be aligned to %d; got %p",
+ (int)IREE_HAL_HEAP_BUFFER_ALIGNMENT, data.data);
+ }
+
+ iree_allocator_t host_allocator =
+ iree_hal_allocator_host_allocator(allocator);
+ iree_hal_heap_buffer_t* buffer = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer);
+ if (iree_status_is_ok(status)) {
+ iree_hal_buffer_initialize(host_allocator, allocator, &buffer->base,
+ allocation_size, 0, data.data_length,
+ memory_type, allowed_access, allowed_usage,
+ &iree_hal_heap_buffer_vtable, &buffer->base);
+ buffer->data = data;
+
+ // Notify the provided callback when the external data is no longer needed.
+ buffer->base.flags = IREE_HAL_HEAP_BUFFER_STORAGE_MODE_EXTERNAL;
+ buffer->release_callback = release_callback;
+
+ *out_buffer = &buffer->base;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_heap_buffer_destroy(iree_hal_buffer_t* base_buffer) {
+ iree_hal_heap_buffer_t* buffer = (iree_hal_heap_buffer_t*)base_buffer;
+ iree_allocator_t host_allocator = base_buffer->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ IREE_STATISTICS({
+ if (buffer->statistics != NULL) {
+ iree_slim_mutex_lock(&buffer->statistics->mutex);
+ iree_hal_allocator_statistics_record_free(&buffer->statistics->base,
+ base_buffer->memory_type,
+ base_buffer->allocation_size);
+ iree_slim_mutex_unlock(&buffer->statistics->mutex);
+ }
+ });
+
+ switch (buffer->base.flags) {
+ case IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SLAB: {
+ iree_allocator_free_aligned(host_allocator, buffer);
+ break;
+ }
+ case IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SPLIT: {
+ iree_allocator_free(buffer->data_allocator, buffer->data.data);
+ iree_allocator_free(host_allocator, buffer);
+ break;
+ }
+ case IREE_HAL_HEAP_BUFFER_STORAGE_MODE_EXTERNAL: {
+ if (buffer->release_callback.fn) {
+ buffer->release_callback.fn(buffer->release_callback.user_data,
+ base_buffer);
+ }
+ iree_allocator_free(host_allocator, buffer);
+ break;
+ }
+ default:
+ IREE_ASSERT_UNREACHABLE("unhandled buffer storage mode");
+ break;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_heap_buffer_map_range(
+ iree_hal_buffer_t* base_buffer, iree_hal_mapping_mode_t mapping_mode,
+ iree_hal_memory_access_t memory_access,
+ iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
+ iree_hal_buffer_mapping_t* mapping) {
+ iree_hal_heap_buffer_t* buffer = (iree_hal_heap_buffer_t*)base_buffer;
+ mapping->contents = iree_make_byte_span(buffer->data.data + local_byte_offset,
+ local_byte_length);
+
+ // If we mapped for discard scribble over the bytes. This is not a mandated
+ // behavior but it will make debugging issues easier. Alternatively for
+ // heap buffers we could reallocate them such that ASAN yells, but that
+ // would only work if the entire buffer was discarded.
+#ifndef NDEBUG
+ if (iree_any_bit_set(memory_access, IREE_HAL_MEMORY_ACCESS_DISCARD)) {
+ memset(mapping->contents.data, 0xCD, local_byte_length);
+ }
+#endif // !NDEBUG
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_heap_buffer_unmap_range(
+ iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length, iree_hal_buffer_mapping_t* mapping) {
+ // No-op here as we always have the pointer.
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_heap_buffer_invalidate_range(
+ iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length) {
+ iree_atomic_thread_fence(iree_memory_order_acquire);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_heap_buffer_flush_range(
+ iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length) {
+ iree_atomic_thread_fence(iree_memory_order_release);
+ return iree_ok_status();
+}
+
+static const iree_hal_buffer_vtable_t iree_hal_heap_buffer_vtable = {
+ .recycle = iree_hal_buffer_recycle,
+ .destroy = iree_hal_heap_buffer_destroy,
+ .map_range = iree_hal_heap_buffer_map_range,
+ .unmap_range = iree_hal_heap_buffer_unmap_range,
+ .invalidate_range = iree_hal_heap_buffer_invalidate_range,
+ .flush_range = iree_hal_heap_buffer_flush_range,
+};
diff --git a/runtime/src/iree/hal/buffer_heap_impl.h b/runtime/src/iree/hal/buffer_heap_impl.h
new file mode 100644
index 0000000..9481a3d
--- /dev/null
+++ b/runtime/src/iree/hal/buffer_heap_impl.h
@@ -0,0 +1,59 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_BUFFER_HEAP_IMPL_H_
+#define IREE_HAL_BUFFER_HEAP_IMPL_H_
+
+#include "iree/base/api.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/hal/buffer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// Private utilities for working with heap buffers
+//===----------------------------------------------------------------------===//
+
+// Shared heap allocator statistics; owned by a heap allocator.
+// Access to the base statistics must be guarded by |mutex|.
+typedef struct iree_hal_heap_allocator_statistics_t {
+ iree_slim_mutex_t mutex;
+ iree_hal_allocator_statistics_t base;
+} iree_hal_heap_allocator_statistics_t;
+
+// Allocates a new heap buffer from the specified |data_allocator|.
+// |host_allocator| is used for the iree_hal_buffer_t metadata. If both
+// |data_allocator| and |host_allocator| are the same the buffer will be created
+// as a flat slab. |out_buffer| must be released by the caller.
+iree_status_t iree_hal_heap_buffer_create(
+ iree_hal_allocator_t* allocator,
+ iree_hal_heap_allocator_statistics_t* statistics,
+ const iree_hal_buffer_params_t* params, iree_device_size_t allocation_size,
+ iree_const_byte_span_t initial_data, iree_allocator_t data_allocator,
+ iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer);
+
+// Wraps an existing host allocation in a buffer.
+// When the buffer is destroyed the provided |release_callback| will be called.
+//
+// The buffer must be aligned to at least IREE_HAL_HEAP_BUFFER_ALIGNMENT and if
+// it is not the call will fail with IREE_STATUS_OUT_OF_RANGE.
+//
+// |out_buffer| must be released by the caller. |data| must be kept live for the
+// lifetime of the wrapping buffer.
+iree_status_t iree_hal_heap_buffer_wrap(
+ iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+ iree_hal_memory_access_t allowed_access,
+ iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
+ iree_byte_span_t data, iree_hal_buffer_release_callback_t release_callback,
+ iree_hal_buffer_t** out_buffer);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_BUFFER_HEAP_IMPL_H_
diff --git a/runtime/src/iree/hal/buffer_view.c b/runtime/src/iree/hal/buffer_view.c
new file mode 100644
index 0000000..c338235
--- /dev/null
+++ b/runtime/src/iree/hal/buffer_view.c
@@ -0,0 +1,235 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/buffer_view.h"
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/allocator.h"
+#include "iree/hal/buffer_view_util.h"
+#include "iree/hal/resource.h"
+
+struct iree_hal_buffer_view_t {
+ iree_atomic_ref_count_t ref_count;
+ iree_allocator_t host_allocator;
+ iree_hal_buffer_t* buffer;
+ iree_hal_element_type_t element_type;
+ iree_hal_encoding_type_t encoding_type;
+ iree_device_size_t byte_length;
+ iree_host_size_t shape_rank;
+ iree_hal_dim_t shape[];
+};
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_create(
+ iree_hal_buffer_t* buffer, const iree_hal_dim_t* shape,
+ iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type, iree_allocator_t host_allocator,
+ iree_hal_buffer_view_t** out_buffer_view) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ IREE_ASSERT_ARGUMENT(out_buffer_view);
+
+ *out_buffer_view = NULL;
+ if (IREE_UNLIKELY(shape_rank > 0 && !shape)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "no shape dimensions specified");
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Allocate and initialize the iree_hal_buffer_view_t struct.
+ // Note that we have the dynamically-sized shape dimensions on the end.
+ iree_hal_buffer_view_t* buffer_view = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ host_allocator,
+ sizeof(*buffer_view) + sizeof(iree_hal_dim_t) * shape_rank,
+ (void**)&buffer_view);
+ if (iree_status_is_ok(status)) {
+ iree_atomic_ref_count_init(&buffer_view->ref_count);
+ buffer_view->host_allocator = host_allocator;
+ buffer_view->buffer = buffer;
+ iree_hal_buffer_retain(buffer_view->buffer);
+ buffer_view->element_type = element_type;
+ buffer_view->encoding_type = encoding_type;
+ buffer_view->byte_length =
+ iree_hal_element_dense_byte_count(buffer_view->element_type);
+ buffer_view->shape_rank = shape_rank;
+ for (iree_host_size_t i = 0; i < shape_rank; ++i) {
+ buffer_view->shape[i] = shape[i];
+ buffer_view->byte_length *= shape[i];
+ }
+ *out_buffer_view = buffer_view;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT void iree_hal_buffer_view_retain(
+ iree_hal_buffer_view_t* buffer_view) {
+ if (IREE_LIKELY(buffer_view)) {
+ iree_atomic_ref_count_inc(&buffer_view->ref_count);
+ }
+}
+
+IREE_API_EXPORT void iree_hal_buffer_view_release(
+ iree_hal_buffer_view_t* buffer_view) {
+ if (IREE_LIKELY(buffer_view) &&
+ iree_atomic_ref_count_dec(&buffer_view->ref_count) == 1) {
+ iree_hal_buffer_view_destroy(buffer_view);
+ }
+}
+
+IREE_API_EXPORT void iree_hal_buffer_view_destroy(
+ iree_hal_buffer_view_t* buffer_view) {
+ iree_allocator_t host_allocator = buffer_view->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_hal_buffer_release(buffer_view->buffer);
+ iree_allocator_free(host_allocator, buffer_view);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT iree_hal_buffer_t* iree_hal_buffer_view_buffer(
+ const iree_hal_buffer_view_t* buffer_view) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ return buffer_view->buffer;
+}
+
+IREE_API_EXPORT iree_host_size_t
+iree_hal_buffer_view_shape_rank(const iree_hal_buffer_view_t* buffer_view) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ return buffer_view->shape_rank;
+}
+
+IREE_API_EXPORT const iree_hal_dim_t* iree_hal_buffer_view_shape_dims(
+ const iree_hal_buffer_view_t* buffer_view) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ return buffer_view->shape;
+}
+
+IREE_API_EXPORT iree_hal_dim_t iree_hal_buffer_view_shape_dim(
+ const iree_hal_buffer_view_t* buffer_view, iree_host_size_t index) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ if (IREE_UNLIKELY(index > buffer_view->shape_rank)) {
+ return 0;
+ }
+ return buffer_view->shape[index];
+}
+
+IREE_API_EXPORT iree_host_size_t
+iree_hal_buffer_view_element_count(const iree_hal_buffer_view_t* buffer_view) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ iree_host_size_t element_count = 1;
+ for (iree_host_size_t i = 0; i < buffer_view->shape_rank; ++i) {
+ element_count *= buffer_view->shape[i];
+ }
+ return element_count;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_shape(
+ const iree_hal_buffer_view_t* buffer_view, iree_host_size_t rank_capacity,
+ iree_hal_dim_t* out_shape, iree_host_size_t* out_shape_rank) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ IREE_ASSERT_ARGUMENT(out_shape);
+ if (out_shape_rank) {
+ *out_shape_rank = 0;
+ }
+
+ if (out_shape_rank) {
+ *out_shape_rank = buffer_view->shape_rank;
+ }
+ if (rank_capacity < buffer_view->shape_rank) {
+ // Not an error; just a size query.
+ return iree_status_from_code(IREE_STATUS_OUT_OF_RANGE);
+ }
+
+ for (iree_host_size_t i = 0; i < buffer_view->shape_rank; ++i) {
+ out_shape[i] = buffer_view->shape[i];
+ }
+
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_reshape(
+ iree_hal_buffer_view_t* buffer_view, const iree_hal_dim_t* shape,
+ iree_host_size_t shape_rank) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ IREE_ASSERT_ARGUMENT(shape);
+
+ if (shape_rank != buffer_view->shape_rank) {
+ // Rank changes require reallocation of the structure as we inline the
+ // shape dimensions. We could lighten this restriction to allow for rank
+ // reduction but knowing that rank changes aren't allowed is easier than
+ // remembering all the conditions in which they may be.
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "buffer view reshapes must have the same rank; "
+ "target=%zu, existing=%zu",
+ shape_rank, buffer_view->shape_rank);
+ }
+
+ iree_device_size_t new_element_count = 1;
+ for (iree_host_size_t i = 0; i < shape_rank; ++i) {
+ new_element_count *= shape[i];
+ }
+ iree_device_size_t old_element_count =
+ iree_hal_buffer_view_element_count(buffer_view);
+ if (new_element_count != old_element_count) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "buffer view reshapes must have the same element "
+ "count; target=%" PRIdsz ", existing=%" PRIdsz,
+ new_element_count, old_element_count);
+ }
+
+ for (iree_host_size_t i = 0; i < shape_rank; ++i) {
+ buffer_view->shape[i] = shape[i];
+ }
+
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_hal_element_type_t
+iree_hal_buffer_view_element_type(const iree_hal_buffer_view_t* buffer_view) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ return buffer_view->element_type;
+}
+
+IREE_API_EXPORT iree_host_size_t
+iree_hal_buffer_view_element_size(const iree_hal_buffer_view_t* buffer_view) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ return iree_hal_element_dense_byte_count(buffer_view->element_type);
+}
+
+IREE_API_EXPORT iree_hal_encoding_type_t
+iree_hal_buffer_view_encoding_type(const iree_hal_buffer_view_t* buffer_view) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ return buffer_view->encoding_type;
+}
+
+IREE_API_EXPORT iree_device_size_t
+iree_hal_buffer_view_byte_length(const iree_hal_buffer_view_t* buffer_view) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ return buffer_view->byte_length;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_compute_offset(
+ const iree_hal_buffer_view_t* buffer_view, const iree_hal_dim_t* indices,
+ iree_host_size_t indices_count, iree_device_size_t* out_offset) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ return iree_hal_buffer_compute_view_offset(
+ buffer_view->shape, buffer_view->shape_rank, buffer_view->element_type,
+ buffer_view->encoding_type, indices, indices_count, out_offset);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_compute_range(
+ const iree_hal_buffer_view_t* buffer_view,
+ const iree_hal_dim_t* start_indices, iree_host_size_t indices_count,
+ const iree_hal_dim_t* lengths, iree_host_size_t lengths_count,
+ iree_device_size_t* out_start_offset, iree_device_size_t* out_length) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ return iree_hal_buffer_compute_view_range(
+ buffer_view->shape, buffer_view->shape_rank, buffer_view->element_type,
+ buffer_view->encoding_type, start_indices, indices_count, lengths,
+ lengths_count, out_start_offset, out_length);
+}
diff --git a/runtime/src/iree/hal/buffer_view.h b/runtime/src/iree/hal/buffer_view.h
new file mode 100644
index 0000000..5a483e1
--- /dev/null
+++ b/runtime/src/iree/hal/buffer_view.h
@@ -0,0 +1,272 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_BUFFER_VIEW_H_
+#define IREE_HAL_BUFFER_VIEW_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/buffer.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// Types and Enums
+//===----------------------------------------------------------------------===//
+
+// NOTE: these values must be in sync with
+// iree/compiler/Dialect/HAL/IR/HALTypes.cpp
+
+enum iree_hal_numerical_type_bits_t {
+ // Opaque or unknown - bytes cannot be interpreted. Indexing is still allowed
+ // so long as the bit width of the elements is known.
+ IREE_HAL_NUMERICAL_TYPE_UNKNOWN = 0x00u,
+
+ // Signless integer-like.
+ IREE_HAL_NUMERICAL_TYPE_INTEGER = 0x10u,
+ // Signed integer.
+ IREE_HAL_NUMERICAL_TYPE_INTEGER_SIGNED =
+ IREE_HAL_NUMERICAL_TYPE_INTEGER | 0x01u,
+ // Unsigned integer.
+ IREE_HAL_NUMERICAL_TYPE_INTEGER_UNSIGNED =
+ IREE_HAL_NUMERICAL_TYPE_INTEGER | 0x02u,
+
+ // Float-like.
+ IREE_HAL_NUMERICAL_TYPE_FLOAT = 0x20,
+ // IEEE754-compatible floating point semantics.
+ IREE_HAL_NUMERICAL_TYPE_FLOAT_IEEE = IREE_HAL_NUMERICAL_TYPE_FLOAT | 0x01u,
+ // 'Brain' floating point semantics (currently only bf16).
+ IREE_HAL_NUMERICAL_TYPE_FLOAT_BRAIN = IREE_HAL_NUMERICAL_TYPE_FLOAT | 0x02u,
+};
+typedef uint8_t iree_hal_numerical_type_t;
+
+#define IREE_HAL_ELEMENT_TYPE_VALUE(numerical_type, bit_count) \
+ (((uint32_t)(numerical_type) << 24) | (uint32_t)(bit_count))
+
+// Composes an iree_hal_element_type_t value with the given attributes.
+#define iree_hal_make_element_type(numerical_type, bit_count) \
+ (iree_hal_element_type_t)( \
+ IREE_HAL_ELEMENT_TYPE_VALUE(numerical_type, bit_count))
+
+// Returns the numerical type of the element, if known and not opaque.
+#define iree_hal_element_numerical_type(element_type) \
+ (iree_hal_numerical_type_t)((uint32_t)(element_type) >> 24)
+
+// Returns true if |element_type| is opaque and cannot be interpreted.
+#define iree_hal_element_numerical_type_is_opaque(element_type) \
+ (iree_hal_element_numerical_type(element_type) == \
+ IREE_HAL_NUMERICAL_TYPE_UNKNOWN)
+
+// Returns true if |element_type| is an integer of some width and semantics.
+#define iree_hal_element_numerical_type_is_integer(element_type) \
+ iree_all_bits_set(iree_hal_element_numerical_type(element_type), \
+ IREE_HAL_NUMERICAL_TYPE_INTEGER)
+
+// Returns true if |element_type| is a float of some width and semantics.
+#define iree_hal_element_numerical_type_is_float(element_type) \
+ iree_all_bits_set(iree_hal_element_numerical_type(element_type), \
+ IREE_HAL_NUMERICAL_TYPE_FLOAT)
+
+// TODO(#8193): split out logical and physical bit widths.
+// Returns the bit width of each element.
+#define iree_hal_element_bit_count(element_type) (size_t)((element_type)&0xFF)
+
+// Returns true if the element is byte-aligned.
+// Sub-byte aligned types such as i4 require user handling of the packing.
+#define iree_hal_element_is_byte_aligned(element_type) \
+ (iree_hal_element_bit_count(element_type) % 8 == 0)
+
+// Returns the number of bytes each |element_type| consumes in memory.
+// This is only valid when the encoding type is dense as sub-byte bit widths
+// may be packed in various forms (for example, i4 may be stored as nibbles
+// where each byte in memory contains two elements).
+#define iree_hal_element_dense_byte_count(element_type) \
+ ((iree_hal_element_bit_count(element_type) + 8 - 1) / 8)
+
+// Returns true if the given |element_type| represents an integer of exactly
+// |bit_width|. This ignores the signedness of the integer type.
+#define iree_hal_element_type_is_integer(element_type, bit_width) \
+ (iree_hal_element_numerical_type_is_integer(element_type) && \
+ iree_hal_element_bit_count(element_type) == (bit_width))
+
+// Defines the element type of a buffer in a standard format.
+//
+// Composed as a 32-bit bitfield to allow for opaque data types. Use
+// iree_hal_make_element_type to make a bitfield with the appropriate ordering.
+//
+// MSB ----------------------------------------------- LSB
+// [numerical type] [reserved] [reserved] [number of bits]
+//
+// clang-format off
+enum iree_hal_element_types_t {
+ IREE_HAL_ELEMENT_TYPE_NONE = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_UNKNOWN, 0), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_OPAQUE_8 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_UNKNOWN, 8), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_OPAQUE_16 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_UNKNOWN, 16), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_OPAQUE_32 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_UNKNOWN, 32), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_OPAQUE_64 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_UNKNOWN, 64), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_INT_4 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER, 4), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_SINT_4 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_SIGNED, 4), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_UINT_4 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_UNSIGNED, 4), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_INT_8 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER, 8), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_SINT_8 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_SIGNED, 8), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_UINT_8 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_UNSIGNED, 8), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_INT_16 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER, 16), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_SINT_16 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_SIGNED, 16), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_UINT_16 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_UNSIGNED, 16), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_INT_32 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER, 32), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_SINT_32 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_SIGNED, 32), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_UINT_32 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_UNSIGNED, 32), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_INT_64 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER, 64), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_SINT_64 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_SIGNED, 64), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_UINT_64 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_UNSIGNED, 64), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_FLOAT_16 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_FLOAT_IEEE, 16), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_FLOAT_32 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_FLOAT_IEEE, 32), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_FLOAT_64 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_FLOAT_IEEE, 64), // NOLINT
+ IREE_HAL_ELEMENT_TYPE_BFLOAT_16 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_FLOAT_BRAIN, 16), // NOLINT
+};
+typedef uint32_t iree_hal_element_type_t;
+// clang-format on
+
+// Defines the encoding type of a buffer when known.
+enum iree_hal_encoding_types_t {
+ // Encoding is unknown or unspecified. Generic interpretation of the buffer
+ // contents is not possible.
+ IREE_HAL_ENCODING_TYPE_OPAQUE = 0,
+ // Encoding is a densely-packed numpy/C-style row-major format.
+ // All elements are contiguous in memory.
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR = 1,
+ // TODO(#6762): sparse encodings we care about (_SPARSE_CSR)
+ // We will likely want to make this a bitfield like the element type is that
+ // we can more easily distinguish between encoding types that we can use for
+ // certain operations; for example, size calculations on a DENSE_ROW_MAJOR
+ // and DENSE_COLUMN_MAJOR would be easier to perform if we had a bit to test
+ // for whether it's dense.
+};
+typedef uint32_t iree_hal_encoding_type_t;
+
+// A dimension within a shape.
+typedef int32_t iree_hal_dim_t;
+
+//===----------------------------------------------------------------------===//
+// iree_hal_buffer_view_t
+//===----------------------------------------------------------------------===//
+
+// A shaped and typed view into a storage buffer.
+// This is the closest thing to a "tensor" we have, and it's purely used to ease
+// application code and not treated special internally by IREE. They are
+// effectively just `tuple(shape, type, buffer)`, and if the application is
+// already tracking this information in its own structures this entire type can
+// be ignored.
+typedef struct iree_hal_buffer_view_t iree_hal_buffer_view_t;
+
+// Creates a buffer view with the given |buffer|.
+// |out_buffer_view| must be released by the caller.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_create(
+ iree_hal_buffer_t* buffer, const iree_hal_dim_t* shape,
+ iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type, iree_allocator_t host_allocator,
+ iree_hal_buffer_view_t** out_buffer_view);
+
+// Retains the given |buffer_view| for the caller.
+IREE_API_EXPORT void iree_hal_buffer_view_retain(
+ iree_hal_buffer_view_t* buffer_view);
+
+// Releases the given |buffer_view| from the caller.
+IREE_API_EXPORT void iree_hal_buffer_view_release(
+ iree_hal_buffer_view_t* buffer_view);
+
+// Returns the buffer underlying the buffer view.
+// The caller must retain the returned buffer if they want to continue using it.
+//
+// NOTE: the returned buffer length will almost always be larger than the valid
+// bytes representing this buffer view due to padding. Always query the actual
+// valid length with iree_hal_buffer_view_byte_length instead of assuming the
+// buffer is already clamped.
+IREE_API_EXPORT iree_hal_buffer_t* iree_hal_buffer_view_buffer(
+ const iree_hal_buffer_view_t* buffer_view);
+
+// Returns the rank of the shape associated with the buffer view.
+IREE_API_EXPORT iree_host_size_t
+iree_hal_buffer_view_shape_rank(const iree_hal_buffer_view_t* buffer_view);
+
+// Returns a pointer to the shape dimensions; the array limit is defined by
+// iree_hal_buffer_view_shape_rank.
+IREE_API_EXPORT const iree_hal_dim_t* iree_hal_buffer_view_shape_dims(
+ const iree_hal_buffer_view_t* buffer_view);
+
+// Returns the value of the given dimension.
+IREE_API_EXPORT iree_hal_dim_t iree_hal_buffer_view_shape_dim(
+ const iree_hal_buffer_view_t* buffer_view, iree_host_size_t index);
+
+// Returns the dimensions of the shape in |out_shape| and its rank in
+// |out_shape_rank|. |rank_capacity| indicates the number of dimensions
+// available in the |out_shape| buffer. If there is not enough capacity to store
+// all of the dimensions IREE_STATUS_OUT_OF_RANGE is returned.
+// |out_shape_rank| can be omitted if the rank is already known.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_shape(
+ const iree_hal_buffer_view_t* buffer_view, iree_host_size_t rank_capacity,
+ iree_hal_dim_t* out_shape, iree_host_size_t* out_shape_rank);
+
+// Performs a **metadata update-only** reshape.
+// The new rank and element count must match the existing values. The buffer
+// contents are left untouched; if the buffer is not dense this may make the
+// contents undefined.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_reshape(
+ iree_hal_buffer_view_t* buffer_view, const iree_hal_dim_t* shape,
+ iree_host_size_t shape_rank);
+
+// Returns the total number of elements stored in the view.
+IREE_API_EXPORT iree_host_size_t
+iree_hal_buffer_view_element_count(const iree_hal_buffer_view_t* buffer_view);
+
+// Returns the element type of the buffer.
+IREE_API_EXPORT iree_hal_element_type_t
+iree_hal_buffer_view_element_type(const iree_hal_buffer_view_t* buffer_view);
+
+// Returns the size of each element in the buffer view in bytes.
+// Note that not all buffers are contiguous or densely packed.
+IREE_API_EXPORT iree_host_size_t
+iree_hal_buffer_view_element_size(const iree_hal_buffer_view_t* buffer_view);
+
+// Returns the encoding type of the buffer.
+IREE_API_EXPORT iree_hal_encoding_type_t
+iree_hal_buffer_view_encoding_type(const iree_hal_buffer_view_t* buffer_view);
+
+// Returns the total size of the specified view in bytes.
+// Note that not all buffers are contiguous or densely packed.
+IREE_API_EXPORT iree_device_size_t
+iree_hal_buffer_view_byte_length(const iree_hal_buffer_view_t* buffer_view);
+
+// Calculates a byte offset into the |buffer_view| at the given indices.
+// Requires that the encoding and element type support indexing.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_compute_offset(
+ const iree_hal_buffer_view_t* buffer_view, const iree_hal_dim_t* indices,
+ iree_host_size_t indices_count, iree_device_size_t* out_offset);
+
+// Calculates a byte range into the |buffer_view| of the given contiguous range.
+// Requires that the encoding and element type support indexing.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_compute_range(
+ const iree_hal_buffer_view_t* buffer_view,
+ const iree_hal_dim_t* start_indices, iree_host_size_t indices_count,
+ const iree_hal_dim_t* lengths, iree_host_size_t lengths_count,
+ iree_device_size_t* out_start_offset, iree_device_size_t* out_length);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_buffer_view_t implementation details
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT void iree_hal_buffer_view_destroy(
+ iree_hal_buffer_view_t* buffer_view);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_BUFFER_VIEW_H_
diff --git a/runtime/src/iree/hal/buffer_view_util.c b/runtime/src/iree/hal/buffer_view_util.c
new file mode 100644
index 0000000..a791c0e
--- /dev/null
+++ b/runtime/src/iree/hal/buffer_view_util.c
@@ -0,0 +1,573 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/buffer_view_util.h"
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/allocator.h"
+#include "iree/hal/resource.h"
+#include "iree/hal/string_util.h"
+
+//===----------------------------------------------------------------------===//
+// Buffer view math
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_compute_view_size(
+ const iree_hal_dim_t* shape, iree_host_size_t shape_rank,
+ iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type,
+ iree_device_size_t* out_allocation_size) {
+ IREE_ASSERT_ARGUMENT(!shape_rank || shape);
+ IREE_ASSERT_ARGUMENT(out_allocation_size);
+ *out_allocation_size = 0;
+
+ iree_device_size_t byte_length = 0;
+
+ switch (encoding_type) {
+ case IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR: {
+ if (IREE_UNLIKELY(iree_hal_element_bit_count(element_type) == 0) ||
+ IREE_UNLIKELY(!iree_hal_element_is_byte_aligned(element_type))) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "opaque and sub-byte aligned element types cannot be indexed");
+ }
+ byte_length = iree_hal_element_dense_byte_count(element_type);
+ for (iree_host_size_t i = 0; i < shape_rank; ++i) {
+ byte_length *= shape[i];
+ }
+ break;
+ }
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unimplemented encoding type size calculation");
+ }
+
+ *out_allocation_size = byte_length;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_compute_view_offset(
+ const iree_hal_dim_t* shape, iree_host_size_t shape_rank,
+ iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type, const iree_hal_dim_t* indices,
+ iree_host_size_t indices_count, iree_device_size_t* out_offset) {
+ IREE_ASSERT_ARGUMENT(shape);
+ IREE_ASSERT_ARGUMENT(indices);
+ IREE_ASSERT_ARGUMENT(out_offset);
+ *out_offset = 0;
+ if (IREE_UNLIKELY(encoding_type != IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "only dense encodings support view range computation");
+ } else if (IREE_UNLIKELY(iree_hal_element_bit_count(element_type) == 0) ||
+ IREE_UNLIKELY(!iree_hal_element_is_byte_aligned(element_type))) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "opaque and sub-byte aligned element types cannot be indexed");
+ } else if (IREE_UNLIKELY(shape_rank != indices_count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "shape rank/indices mismatch: %zu != %zu",
+ shape_rank, indices_count);
+ }
+
+ iree_device_size_t offset = 0;
+ for (iree_host_size_t i = 0; i < indices_count; ++i) {
+ if (IREE_UNLIKELY(indices[i] >= shape[i])) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "index[%zu] out of bounds: %d >= %d", i,
+ indices[i], shape[i]);
+ }
+ iree_device_size_t axis_offset = indices[i];
+ for (iree_host_size_t j = i + 1; j < shape_rank; ++j) {
+ axis_offset *= shape[j];
+ }
+ offset += axis_offset;
+ }
+ offset *= iree_hal_element_dense_byte_count(element_type);
+
+ *out_offset = offset;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_compute_view_range(
+ const iree_hal_dim_t* shape, iree_host_size_t shape_rank,
+ iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type, const iree_hal_dim_t* start_indices,
+ iree_host_size_t indices_count, const iree_hal_dim_t* lengths,
+ iree_host_size_t lengths_count, iree_device_size_t* out_start_offset,
+ iree_device_size_t* out_length) {
+ IREE_ASSERT_ARGUMENT(shape);
+ IREE_ASSERT_ARGUMENT(start_indices);
+ IREE_ASSERT_ARGUMENT(lengths);
+ IREE_ASSERT_ARGUMENT(out_start_offset);
+ IREE_ASSERT_ARGUMENT(out_length);
+ *out_start_offset = 0;
+ *out_length = 0;
+ if (IREE_UNLIKELY(encoding_type != IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "only dense encodings support view range computation");
+ } else if (IREE_UNLIKELY(iree_hal_element_bit_count(element_type) == 0) ||
+ IREE_UNLIKELY(!iree_hal_element_is_byte_aligned(element_type))) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "opaque and sub-byte aligned element types cannot be indexed");
+ } else if (IREE_UNLIKELY(indices_count != lengths_count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "indices/lengths mismatch: %zu != %zu",
+ indices_count, lengths_count);
+ } else if (IREE_UNLIKELY(shape_rank != indices_count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "shape rank/indices mismatch: %zu != %zu",
+ shape_rank, indices_count);
+ }
+
+ iree_hal_dim_t* end_indices =
+ iree_alloca(shape_rank * sizeof(iree_hal_dim_t));
+ iree_device_size_t element_size =
+ iree_hal_element_dense_byte_count(element_type);
+ iree_device_size_t subspan_length = element_size;
+ for (iree_host_size_t i = 0; i < lengths_count; ++i) {
+ subspan_length *= lengths[i];
+ end_indices[i] = start_indices[i] + lengths[i] - 1;
+ }
+
+ iree_device_size_t start_byte_offset = 0;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_compute_view_offset(
+ shape, shape_rank, element_type, encoding_type, start_indices,
+ indices_count, &start_byte_offset));
+ iree_device_size_t end_byte_offset = 0;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_compute_view_offset(
+ shape, shape_rank, element_type, encoding_type, end_indices, shape_rank,
+ &end_byte_offset));
+
+ // Non-contiguous regions not yet implemented. Will be easier to detect when
+ // we have strides.
+ iree_device_size_t offset_length =
+ end_byte_offset - start_byte_offset + element_size;
+ if (subspan_length != offset_length) {
+ return iree_make_status(
+ IREE_STATUS_UNIMPLEMENTED,
+ "non-contiguous range region computation not implemented");
+ }
+
+ *out_start_offset = start_byte_offset;
+ *out_length = subspan_length;
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// Buffer view allocation and generation
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_allocate_buffer(
+ iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
+ iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type,
+ iree_hal_buffer_params_t buffer_params, iree_const_byte_span_t initial_data,
+ iree_hal_buffer_view_t** out_buffer_view) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ IREE_ASSERT_ARGUMENT(out_buffer_view);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_hal_buffer_params_canonicalize(&buffer_params);
+
+ iree_device_size_t allocation_size = 0;
+ iree_status_t status = iree_hal_buffer_compute_view_size(
+ shape, shape_rank, element_type, encoding_type, &allocation_size);
+
+ iree_hal_buffer_t* buffer = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_allocator_allocate_buffer(
+ allocator, buffer_params, allocation_size, initial_data, &buffer);
+ }
+
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_buffer_view_create(
+ buffer, shape, shape_rank, element_type, encoding_type,
+ iree_hal_allocator_host_allocator(allocator), out_buffer_view);
+ }
+
+ iree_hal_buffer_release(buffer);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static iree_status_t iree_hal_buffer_view_generate_buffer_in_situ(
+ iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
+ iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type,
+ iree_hal_buffer_params_t buffer_params,
+ iree_hal_buffer_view_generator_callback_t callback, void* user_data,
+ iree_hal_buffer_view_t** out_buffer_view) {
+ // Allocate the buffer view and entire buffer contents with the target memory
+ // type and the mapping bits.
+ iree_hal_buffer_view_t* buffer_view = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_view_allocate_buffer(
+ allocator, shape, shape_rank, element_type, encoding_type,
+ iree_hal_buffer_params_with_usage(buffer_params,
+ IREE_HAL_BUFFER_USAGE_MAPPING),
+ iree_const_byte_span_empty(), &buffer_view));
+
+ // Map the buffer into host-visible memory.
+ iree_hal_buffer_mapping_t buffer_mapping = {{0}};
+ iree_status_t status = iree_hal_buffer_map_range(
+ iree_hal_buffer_view_buffer(buffer_view), IREE_HAL_MAPPING_MODE_SCOPED,
+ IREE_HAL_MEMORY_ACCESS_DISCARD_WRITE, 0, IREE_WHOLE_BUFFER,
+ &buffer_mapping);
+
+ // Generate using the callback directly into the buffer.
+ if (iree_status_is_ok(status)) {
+ status = callback(&buffer_mapping, user_data);
+ }
+
+ status =
+ iree_status_join(status, iree_hal_buffer_unmap_range(&buffer_mapping));
+ if (iree_status_is_ok(status)) {
+ *out_buffer_view = buffer_view;
+ } else {
+ iree_hal_buffer_view_release(buffer_view);
+ }
+ return status;
+}
+
+static iree_status_t iree_hal_buffer_view_generate_buffer_on_host(
+ iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
+ iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type,
+ iree_hal_buffer_params_t buffer_params, iree_device_size_t allocation_size,
+ iree_hal_buffer_view_generator_callback_t callback, void* user_data,
+ iree_hal_buffer_view_t** out_buffer_view) {
+ // Allocate the host memory and generate the contents.
+ iree_allocator_t host_allocator =
+ iree_hal_allocator_host_allocator(allocator);
+ void* host_ptr = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(host_allocator, allocation_size, &host_ptr));
+ iree_hal_buffer_mapping_t mapping = {
+ .contents = iree_make_byte_span(host_ptr, allocation_size),
+ };
+ iree_status_t status = callback(&mapping, user_data);
+ if (!iree_status_is_ok(status)) {
+ iree_allocator_free(host_allocator, host_ptr);
+ return status;
+ }
+
+ // Allocate the buffer with the data we just generated.
+ // We could try importing but that may create buffers that are slower to
+ // access and we want users to opt in to that instead.
+ status = iree_hal_buffer_view_allocate_buffer(
+ allocator, shape, shape_rank, element_type, encoding_type, buffer_params,
+ iree_make_const_byte_span(host_ptr, allocation_size), out_buffer_view);
+
+ iree_allocator_free(host_allocator, host_ptr);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_generate_buffer(
+ iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
+ iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type,
+ iree_hal_buffer_params_t buffer_params,
+ iree_hal_buffer_view_generator_callback_t callback, void* user_data,
+ iree_hal_buffer_view_t** out_buffer_view) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ IREE_ASSERT_ARGUMENT(callback);
+ IREE_ASSERT_ARGUMENT(out_buffer_view);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_hal_buffer_params_canonicalize(&buffer_params);
+
+ // Compute how large of an allocation we need to hold the whole view.
+ iree_device_size_t allocation_size = 0;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_buffer_compute_view_size(shape, shape_rank, element_type,
+ encoding_type, &allocation_size));
+
+ // If we can create the requested memory type with mapping then we'll do that
+ // and avoid needing to allocate the staging memory. If we can't get that
+ // memory type (or the allocator doesn't want us using it) then we'll fall
+ // back to allocation -> generation -> copy.
+ iree_hal_buffer_params_t mappable_params = buffer_params;
+ mappable_params.type |= IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+ mappable_params.usage |= IREE_HAL_BUFFER_USAGE_MAPPING;
+ iree_hal_buffer_compatibility_t compatibility =
+ iree_hal_allocator_query_compatibility(allocator, mappable_params,
+ allocation_size);
+ bool is_mappable = iree_all_bits_set(
+ compatibility, IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE);
+
+ iree_status_t status = iree_ok_status();
+ if (is_mappable) {
+ // Compatible with allocate -> map -> generate.
+ status = iree_hal_buffer_view_generate_buffer_in_situ(
+ allocator, shape, shape_rank, element_type, encoding_type,
+ mappable_params, callback, user_data, out_buffer_view);
+ } else {
+ // Allocate host-local memory first and generate into that.
+ status = iree_hal_buffer_view_generate_buffer_on_host(
+ allocator, shape, shape_rank, element_type, encoding_type,
+ buffer_params, allocation_size, callback, user_data, out_buffer_view);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+//===----------------------------------------------------------------------===//
+// Buffer view parsing and printing
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_buffer_view_parse_params_t {
+ iree_string_view_t data_str;
+ iree_hal_element_type_t element_type;
+} iree_hal_buffer_view_parse_params_t;
+static iree_status_t iree_hal_buffer_view_parse_into(
+ iree_hal_buffer_mapping_t* mapping, void* user_data) {
+ iree_hal_buffer_view_parse_params_t* params =
+ (iree_hal_buffer_view_parse_params_t*)user_data;
+ return iree_hal_parse_buffer_elements(params->data_str, params->element_type,
+ mapping->contents);
+}
+
+static iree_status_t iree_hal_buffer_view_parse_impl(
+ iree_string_view_t value, iree_hal_allocator_t* buffer_allocator,
+ iree_hal_buffer_view_t** out_buffer_view) {
+ // Strip whitespace that may come along (linefeeds/etc).
+ value = iree_string_view_trim(value);
+ value = iree_string_view_strip_prefix(value, IREE_SV("\""));
+ value = iree_string_view_strip_suffix(value, IREE_SV("\""));
+ if (iree_string_view_is_empty(value)) {
+ // Empty lines are invalid; need at least the shape/type information.
+ *out_buffer_view = NULL;
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "empty string input");
+ }
+
+ // The part of the string corresponding to the shape, e.g. 1x2x3.
+ iree_string_view_t shape_str = iree_string_view_empty();
+ // The part of the string corresponding to the type, e.g. f32
+ iree_string_view_t type_str = iree_string_view_empty();
+ // The part of the string corresponding to the buffer data, e.g. 1 2 3 4 5 6
+ iree_string_view_t data_str = iree_string_view_empty();
+
+ iree_string_view_t shape_and_type_str = value;
+ iree_string_view_split(value, '=', &shape_and_type_str, &data_str);
+ iree_host_size_t last_x_index = iree_string_view_find_last_of(
+ shape_and_type_str, IREE_SV("x"), IREE_STRING_VIEW_NPOS);
+ if (last_x_index == IREE_STRING_VIEW_NPOS) {
+ // Scalar.
+ type_str = shape_and_type_str;
+ } else {
+ // Has a shape.
+ shape_str = iree_string_view_substr(shape_and_type_str, 0, last_x_index);
+ type_str = iree_string_view_substr(shape_and_type_str, last_x_index + 1,
+ IREE_STRING_VIEW_NPOS);
+ }
+
+ // AxBxC...
+ iree_host_size_t shape_rank = 0;
+ iree_status_t shape_result =
+ iree_hal_parse_shape(shape_str, 0, NULL, &shape_rank);
+ if (!iree_status_is_ok(shape_result) &&
+ !iree_status_is_out_of_range(shape_result)) {
+ return shape_result;
+ } else if (shape_rank > 128) {
+ return iree_make_status(
+ IREE_STATUS_RESOURCE_EXHAUSTED,
+ "a shape rank of %zu is just a little bit excessive, eh?", shape_rank);
+ }
+ shape_result = iree_status_ignore(shape_result);
+ iree_hal_dim_t* shape =
+ (iree_hal_dim_t*)iree_alloca(shape_rank * sizeof(iree_hal_dim_t));
+ IREE_RETURN_IF_ERROR(
+ iree_hal_parse_shape(shape_str, shape_rank, shape, &shape_rank));
+
+ // f32, i32, etc
+ iree_hal_element_type_t element_type = IREE_HAL_ELEMENT_TYPE_NONE;
+ IREE_RETURN_IF_ERROR(iree_hal_parse_element_type(type_str, &element_type));
+
+ // TODO(benvanik): allow specifying the encoding.
+ iree_hal_encoding_type_t encoding_type =
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR;
+
+ // Allocate the buffer from the provided allocator and parse directly into it.
+ const iree_hal_buffer_params_t buffer_params = {
+ .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
+ .usage = IREE_HAL_BUFFER_USAGE_DISPATCH | IREE_HAL_BUFFER_USAGE_TRANSFER,
+ };
+ iree_hal_buffer_view_parse_params_t parse_params = {
+ .data_str = data_str,
+ .element_type = element_type,
+ };
+ return iree_hal_buffer_view_generate_buffer(
+ buffer_allocator, shape, shape_rank, element_type, encoding_type,
+ buffer_params, iree_hal_buffer_view_parse_into, &parse_params,
+ out_buffer_view);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_parse(
+ iree_string_view_t value, iree_hal_allocator_t* buffer_allocator,
+ iree_hal_buffer_view_t** out_buffer_view) {
+ IREE_ASSERT_ARGUMENT(buffer_allocator);
+ IREE_ASSERT_ARGUMENT(out_buffer_view);
+ *out_buffer_view = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status =
+ iree_hal_buffer_view_parse_impl(value, buffer_allocator, out_buffer_view);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+#define APPEND_CHAR(c) \
+ { \
+ if (buffer) { \
+ if (buffer_length < buffer_capacity - 1) { \
+ buffer[buffer_length] = c; \
+ buffer[buffer_length + 1] = '\0'; \
+ } else { \
+ buffer = NULL; \
+ } \
+ } \
+ ++buffer_length; \
+ }
+
+static iree_status_t iree_hal_buffer_view_format_impl(
+ const iree_hal_buffer_view_t* buffer_view,
+ iree_host_size_t max_element_count, iree_host_size_t buffer_capacity,
+ char* buffer, iree_host_size_t* out_buffer_length) {
+ if (out_buffer_length) {
+ *out_buffer_length = 0;
+ }
+ if (buffer && buffer_capacity) {
+ buffer[0] = 0;
+ }
+
+ iree_host_size_t buffer_length = 0;
+ if (iree_hal_buffer_view_shape_rank(buffer_view) > 0) {
+ // Shape: 1x2x3
+ iree_host_size_t shape_length = 0;
+ iree_status_t status = iree_hal_format_shape(
+ iree_hal_buffer_view_shape_dims(buffer_view),
+ iree_hal_buffer_view_shape_rank(buffer_view),
+ buffer ? buffer_capacity - buffer_length : 0,
+ buffer ? buffer + buffer_length : NULL, &shape_length);
+ buffer_length += shape_length;
+ if (iree_status_is_out_of_range(status)) {
+ status = iree_status_ignore(status);
+ buffer = NULL;
+ } else if (!iree_status_is_ok(status)) {
+ return status;
+ }
+
+ // Separator: <shape>x<format>
+ APPEND_CHAR('x');
+ }
+
+ // Element type: f32
+ iree_host_size_t element_type_length = 0;
+ iree_status_t status = iree_hal_format_element_type(
+ iree_hal_buffer_view_element_type(buffer_view),
+ buffer ? buffer_capacity - buffer_length : 0,
+ buffer ? buffer + buffer_length : NULL, &element_type_length);
+ buffer_length += element_type_length;
+ if (iree_status_is_out_of_range(status)) {
+ status = iree_status_ignore(status);
+ buffer = NULL;
+ } else if (!iree_status_is_ok(status)) {
+ return status;
+ }
+
+ // TODO(benvanik): allow printing the encoding.
+
+ // Separator: <meta>=<value>
+ APPEND_CHAR('=');
+
+ // Buffer contents: 0 1 2 3 ...
+ iree_hal_buffer_mapping_t buffer_mapping = {{0}};
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range(
+ iree_hal_buffer_view_buffer(buffer_view), IREE_HAL_MAPPING_MODE_SCOPED,
+ IREE_HAL_MEMORY_ACCESS_READ, 0, IREE_WHOLE_BUFFER, &buffer_mapping));
+ iree_host_size_t elements_length = 0;
+ status = iree_hal_format_buffer_elements(
+ iree_make_const_byte_span(buffer_mapping.contents.data,
+ buffer_mapping.contents.data_length),
+ iree_hal_buffer_view_shape_dims(buffer_view),
+ iree_hal_buffer_view_shape_rank(buffer_view),
+ iree_hal_buffer_view_element_type(buffer_view), max_element_count,
+ buffer ? buffer_capacity - buffer_length : 0,
+ buffer ? buffer + buffer_length : NULL, &elements_length);
+ buffer_length += elements_length;
+ status =
+ iree_status_join(status, iree_hal_buffer_unmap_range(&buffer_mapping));
+ if (iree_status_is_out_of_range(status)) {
+ status = iree_status_ignore(status);
+ buffer = NULL;
+ } else if (!iree_status_is_ok(status)) {
+ return status;
+ }
+
+ if (out_buffer_length) {
+ *out_buffer_length = buffer_length;
+ }
+ return buffer ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_OUT_OF_RANGE);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_format(
+ const iree_hal_buffer_view_t* buffer_view,
+ iree_host_size_t max_element_count, iree_host_size_t buffer_capacity,
+ char* buffer, iree_host_size_t* out_buffer_length) {
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = iree_hal_buffer_view_format_impl(
+ buffer_view, max_element_count, buffer_capacity, buffer,
+ out_buffer_length);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// TODO(benvanik): streaming all the way down (needs string_util updates).
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_fprint(
+ FILE* file, const iree_hal_buffer_view_t* buffer_view,
+ iree_host_size_t max_element_count, iree_allocator_t host_allocator) {
+ IREE_ASSERT_ARGUMENT(file);
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Query the string length (in characters).
+ iree_host_size_t buffer_length = 0;
+ iree_status_t status = iree_hal_buffer_view_format(
+ buffer_view, max_element_count, 0, NULL, &buffer_length);
+ if (!iree_status_is_out_of_range(status)) {
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+
+ // Allocate scratch space to format in to.
+ // We should be streaming.
+ iree_host_size_t buffer_capacity = buffer_length + 1; // NUL
+ char* buffer = NULL;
+ status =
+ iree_allocator_malloc(host_allocator, buffer_capacity, (void**)&buffer);
+
+ // Format the buffer into the string storage.
+ if (iree_status_is_ok(status)) {
+ status =
+ iree_hal_buffer_view_format(buffer_view, max_element_count,
+ buffer_capacity, buffer, &buffer_length);
+ }
+
+ // Dump to the file.
+ if (iree_status_is_ok(status)) {
+ fprintf(file, "%.*s", (int)buffer_length, buffer);
+ }
+
+ iree_allocator_free(host_allocator, buffer);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/buffer_view_util.h b/runtime/src/iree/hal/buffer_view_util.h
new file mode 100644
index 0000000..a7d7f61
--- /dev/null
+++ b/runtime/src/iree/hal/buffer_view_util.h
@@ -0,0 +1,148 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_BUFFER_VIEW_UTIL_H_
+#define IREE_HAL_BUFFER_VIEW_UTIL_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/allocator.h"
+#include "iree/hal/buffer_view.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// Buffer view math
+//===----------------------------------------------------------------------===//
+
+// Calculates the allocation size of a buffer view.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_compute_view_size(
+ const iree_hal_dim_t* shape, iree_host_size_t shape_rank,
+ iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type,
+ iree_device_size_t* out_allocation_size);
+
+// Calculates a byte offset into a buffer at the given indices.
+// Only works with densely-packed representations.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_compute_view_offset(
+ const iree_hal_dim_t* shape, iree_host_size_t shape_rank,
+ iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type, const iree_hal_dim_t* indices,
+ size_t indices_count, iree_device_size_t* out_offset);
+
+// Calculates a byte range into a buffer of the given contiguous range.
+// Only works with densely-packed representations.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_compute_view_range(
+ const iree_hal_dim_t* shape, iree_host_size_t shape_rank,
+ iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type, const iree_hal_dim_t* start_indices,
+ iree_host_size_t indices_count, const iree_hal_dim_t* lengths,
+ iree_host_size_t lengths_count, iree_device_size_t* out_start_offset,
+ iree_device_size_t* out_length);
+
+//===----------------------------------------------------------------------===//
+// Buffer view allocation and generation
+//===----------------------------------------------------------------------===//
+
+// Allocates a buffer from |allocator| and wraps it in a buffer view.
+//
+// This is equivalent to:
+// 1. iree_hal_buffer_compute_view_size
+// 2. iree_hal_allocator_allocate_buffer
+// 3. iree_hal_buffer_view_create
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_allocate_buffer(
+ iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
+ iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type,
+ iree_hal_buffer_params_t buffer_params, iree_const_byte_span_t initial_data,
+ iree_hal_buffer_view_t** out_buffer_view);
+
+typedef iree_status_t(IREE_API_PTR* iree_hal_buffer_view_generator_callback_t)(
+ iree_hal_buffer_mapping_t* mapping, void* user_data);
+
+// Generates a buffer view with its initial contents produced by a callback.
+// When host and device memory are shared this allows direct generation into the
+// target device buffer. If not shared this can avoid expensive transfer mapping
+// operations at the cost of a transient host memory allocation. The mapped host
+// pointer passed to the callback is only valid within the callback.
+//
+// Buffers allocated like this do not need the IREE_HAL_BUFFER_USAGE_MAPPING bit
+// set; it will be added automatically if the allocator needs it and otherwise
+// the memory can remain unmappable (and thus fully device isolated).
+//
+// As this _may_ require allocation of the entire buffer content in host memory
+// it is always preferable to stage and issue copy commands via the device
+// queue. Even better is to do all generation on-device via dispatches without
+// the need to ever transfer. Usage of this method should be limited to times
+// where device-side generation isn't possible or memory consumption is not a
+// concern.
+//
+// This is equivalent to:
+// 1. iree_hal_buffer_compute_view_size
+// 2. iree_hal_allocator_allocate_buffer
+// 3. iree_hal_buffer_map_range + callback + iree_hal_buffer_unmap_range
+// 4. iree_hal_buffer_view_create
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_generate_buffer(
+ iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
+ iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
+ iree_hal_encoding_type_t encoding_type,
+ iree_hal_buffer_params_t buffer_params,
+ iree_hal_buffer_view_generator_callback_t callback, void* user_data,
+ iree_hal_buffer_view_t** out_buffer_view);
+
+//===----------------------------------------------------------------------===//
+// Buffer view parsing and printing
+//===----------------------------------------------------------------------===//
+
+// Parses a serialized set of buffer elements in the canonical tensor format
+// (the same as produced by iree_hal_buffer_view_format). The underlying buffer
+// will be allocated with |buffer_allocator| as a host-local/device-visible
+// buffer.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_parse(
+ iree_string_view_t value, iree_hal_allocator_t* buffer_allocator,
+ iree_hal_buffer_view_t** out_buffer_view);
+
+// TODO(#5413): enum for printing mode (include shape, precision).
+
+// Converts buffer view elements into a fully-specified string-form format like
+// `2x4xi16=[[1 2][3 4]]`.
+//
+// |max_element_count| can be used to limit the total number of elements printed
+// when the count may be large. Elided elements will be replaced with `...`.
+//
+// |buffer_capacity| defines the size of |buffer| in bytes and
+// |out_buffer_length| will return the string length in characters. Returns
+// IREE_STATUS_OUT_OF_RANGE if the buffer capacity is insufficient to hold the
+// formatted elements and |out_buffer_length| will contain the required size.
+//
+// Follows the standard API string formatting rules. See iree/base/api.h.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_format(
+ const iree_hal_buffer_view_t* buffer_view,
+ iree_host_size_t max_element_count, iree_host_size_t buffer_capacity,
+ char* buffer, iree_host_size_t* out_buffer_length);
+
+// Prints buffer view elements into a fully-specified string-form format like
+// `2x4xi16=[[1 2][3 4]]`.
+//
+// |max_element_count| can be used to limit the total number of elements printed
+// when the count may be large. Elided elements will be replaced with `...`.
+//
+// |host_allocator| will be used for any transient allocations required while
+// printing.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_view_fprint(
+ FILE* file, const iree_hal_buffer_view_t* buffer_view,
+ iree_host_size_t max_element_count, iree_allocator_t host_allocator);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_BUFFER_VIEW_UTIL_H_
diff --git a/runtime/src/iree/hal/command_buffer.c b/runtime/src/iree/hal/command_buffer.c
new file mode 100644
index 0000000..e4c7fdf
--- /dev/null
+++ b/runtime/src/iree/hal/command_buffer.c
@@ -0,0 +1,523 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/command_buffer.h"
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/command_buffer_validation.h"
+#include "iree/hal/detail.h"
+#include "iree/hal/device.h"
+#include "iree/hal/resource.h"
+
+// Conditionally executes an expression based on whether command buffer
+// validation was enabled in the build and the command buffer wants validation.
+#if IREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE
+#define IF_VALIDATING(command_buffer, expr) \
+ if (((command_buffer)->mode & IREE_HAL_COMMAND_BUFFER_MODE_UNVALIDATED) == \
+ 0) { \
+ expr; \
+ }
+#else
+#define IF_VALIDATING(command_buffer, expr)
+#endif // IREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE
+
+#define _VTABLE_DISPATCH(command_buffer, method_name) \
+ IREE_HAL_VTABLE_DISPATCH(command_buffer, iree_hal_command_buffer, method_name)
+
+//===----------------------------------------------------------------------===//
+// String utils
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_string_view_t
+iree_hal_command_buffer_mode_format(iree_hal_command_buffer_mode_t value,
+ iree_bitfield_string_temp_t* out_temp) {
+ static const iree_bitfield_string_mapping_t mappings[] = {
+ {IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT, IREE_SVL("ONE_SHOT")},
+ {IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION,
+ IREE_SVL("ALLOW_INLINE_EXECUTION")},
+ {IREE_HAL_COMMAND_BUFFER_MODE_UNVALIDATED, IREE_SVL("UNVALIDATED")},
+ };
+ return iree_bitfield_format_inline(value, mappings, IREE_ARRAYSIZE(mappings),
+ out_temp);
+}
+
+IREE_API_EXPORT iree_string_view_t iree_hal_command_category_format(
+ iree_hal_command_category_t value, iree_bitfield_string_temp_t* out_temp) {
+ static const iree_bitfield_string_mapping_t mappings[] = {
+ // Combined:
+ {IREE_HAL_COMMAND_CATEGORY_ANY, IREE_SVL("ANY")},
+ // Separate:
+ {IREE_HAL_COMMAND_CATEGORY_TRANSFER, IREE_SVL("TRANSFER")},
+ {IREE_HAL_COMMAND_CATEGORY_DISPATCH, IREE_SVL("DISPATCH")},
+ };
+ return iree_bitfield_format_inline(value, mappings, IREE_ARRAYSIZE(mappings),
+ out_temp);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_t
+//===----------------------------------------------------------------------===//
+
+IREE_HAL_API_RETAIN_RELEASE(command_buffer);
+
+IREE_API_EXPORT void iree_hal_command_buffer_initialize(
+ iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ const iree_hal_command_buffer_vtable_t* vtable,
+ iree_hal_command_buffer_t* command_buffer) {
+ iree_hal_resource_initialize(vtable, &command_buffer->resource);
+ command_buffer->mode = mode;
+ command_buffer->allowed_categories = command_categories;
+ command_buffer->queue_affinity = queue_affinity;
+
+ // Perform initialization validation after we allocate/initialize the concrete
+ // implementation.
+ IF_VALIDATING(command_buffer, {
+ iree_hal_command_buffer_initialize_validation(device, command_buffer);
+ });
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_create(
+ iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ iree_hal_command_buffer_t** out_command_buffer) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(out_command_buffer);
+ *out_command_buffer = NULL;
+
+ if (iree_all_bits_set(mode,
+ IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION)) {
+ // Inline command buffers must be one-shot and primary.
+ if (!iree_all_bits_set(mode, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "inline command buffers must be one-shot and primary");
+ }
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status =
+ IREE_HAL_VTABLE_DISPATCH(device, iree_hal_device, create_command_buffer)(
+ device, mode, command_categories, queue_affinity, out_command_buffer);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT void* iree_hal_command_buffer_dyn_cast(
+ iree_hal_command_buffer_t* command_buffer, const void* vtable) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ if (iree_hal_resource_is(command_buffer, vtable)) return command_buffer;
+ return _VTABLE_DISPATCH(command_buffer, dyn_cast)(command_buffer, vtable);
+}
+
+IREE_API_EXPORT iree_hal_command_buffer_mode_t
+iree_hal_command_buffer_mode(const iree_hal_command_buffer_t* command_buffer) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ return command_buffer->mode;
+}
+
+IREE_API_EXPORT iree_hal_command_category_t
+iree_hal_command_buffer_allowed_categories(
+ const iree_hal_command_buffer_t* command_buffer) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ return command_buffer->allowed_categories;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_command_buffer_begin(iree_hal_command_buffer_t* command_buffer) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_begin_validation(command_buffer));
+ });
+ iree_status_t status =
+ _VTABLE_DISPATCH(command_buffer, begin)(command_buffer);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_command_buffer_end(iree_hal_command_buffer_t* command_buffer) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_end_validation(command_buffer));
+ });
+ iree_status_t status = _VTABLE_DISPATCH(command_buffer, end)(command_buffer);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT void iree_hal_command_buffer_begin_debug_group(
+ iree_hal_command_buffer_t* command_buffer, iree_string_view_t label,
+ iree_hal_label_color_t label_color,
+ const iree_hal_label_location_t* location) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IF_VALIDATING(command_buffer,
+ iree_hal_command_buffer_begin_debug_group_validation(
+ command_buffer, label, label_color, location));
+ _VTABLE_DISPATCH(command_buffer, begin_debug_group)
+ (command_buffer, label, label_color, location);
+}
+
+IREE_API_EXPORT void iree_hal_command_buffer_end_debug_group(
+ iree_hal_command_buffer_t* command_buffer) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IF_VALIDATING(
+ command_buffer,
+ iree_hal_command_buffer_end_debug_group_validation(command_buffer));
+ _VTABLE_DISPATCH(command_buffer, end_debug_group)
+ (command_buffer);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_execution_barrier(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_hal_execution_barrier_flags_t flags,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_execution_barrier_validation(
+ command_buffer, source_stage_mask, target_stage_mask, flags,
+ memory_barrier_count, memory_barriers, buffer_barrier_count,
+ buffer_barriers));
+ });
+ iree_status_t status = _VTABLE_DISPATCH(command_buffer, execution_barrier)(
+ command_buffer, source_stage_mask, target_stage_mask, flags,
+ memory_barrier_count, memory_barriers, buffer_barrier_count,
+ buffer_barriers);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_signal_event(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_ASSERT_ARGUMENT(event);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_signal_event_validation(
+ command_buffer, event, source_stage_mask));
+ });
+ iree_status_t status = _VTABLE_DISPATCH(command_buffer, signal_event)(
+ command_buffer, event, source_stage_mask);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_reset_event(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_ASSERT_ARGUMENT(event);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_reset_event_validation(
+ command_buffer, event, source_stage_mask));
+ });
+ iree_status_t status = _VTABLE_DISPATCH(command_buffer, reset_event)(
+ command_buffer, event, source_stage_mask);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_wait_events(
+ iree_hal_command_buffer_t* command_buffer, iree_host_size_t event_count,
+ const iree_hal_event_t** events,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_ASSERT_ARGUMENT(!event_count || events);
+ IREE_ASSERT_ARGUMENT(!memory_barrier_count || memory_barriers);
+ IREE_ASSERT_ARGUMENT(!buffer_barrier_count || buffer_barriers);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_wait_events_validation(
+ command_buffer, event_count, events, source_stage_mask,
+ target_stage_mask, memory_barrier_count, memory_barriers,
+ buffer_barrier_count, buffer_barriers));
+ });
+ iree_status_t status = _VTABLE_DISPATCH(command_buffer, wait_events)(
+ command_buffer, event_count, events, source_stage_mask, target_stage_mask,
+ memory_barrier_count, memory_barriers, buffer_barrier_count,
+ buffer_barriers);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_discard_buffer(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* buffer) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_ASSERT_ARGUMENT(buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_discard_buffer_validation(command_buffer,
+ buffer));
+ });
+ iree_status_t status =
+ _VTABLE_DISPATCH(command_buffer, discard_buffer)(command_buffer, buffer);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_fill_buffer(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length,
+ const void* pattern, iree_host_size_t pattern_length) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_ASSERT_ARGUMENT(target_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_fill_buffer_validation(
+ command_buffer, target_buffer, target_offset, length, pattern,
+ pattern_length));
+ });
+ iree_status_t status = _VTABLE_DISPATCH(command_buffer, fill_buffer)(
+ command_buffer, target_buffer, target_offset, length, pattern,
+ pattern_length);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_update_buffer(
+ iree_hal_command_buffer_t* command_buffer, const void* source_buffer,
+ iree_host_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_ASSERT_ARGUMENT(source_buffer);
+ IREE_ASSERT_ARGUMENT(target_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_update_buffer_validation(
+ command_buffer, source_buffer, source_offset, target_buffer,
+ target_offset, length));
+ });
+ iree_status_t status = _VTABLE_DISPATCH(command_buffer, update_buffer)(
+ command_buffer, source_buffer, source_offset, target_buffer,
+ target_offset, length);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_copy_buffer(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* source_buffer,
+ iree_device_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_copy_buffer_validation(
+ command_buffer, source_buffer, source_offset, target_buffer,
+ target_offset, length));
+ });
+ iree_status_t status = _VTABLE_DISPATCH(command_buffer, copy_buffer)(
+ command_buffer, source_buffer, source_offset, target_buffer,
+ target_offset, length);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_push_constants(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, iree_host_size_t offset,
+ const void* values, iree_host_size_t values_length) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_ASSERT_ARGUMENT(executable_layout);
+ IREE_ASSERT_ARGUMENT(values);
+ if (IREE_UNLIKELY(values_length == 0)) {
+ return iree_ok_status();
+ }
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_hal_command_buffer_push_constants_validation(
+ command_buffer, executable_layout, offset, values, values_length));
+ });
+ iree_status_t status = _VTABLE_DISPATCH(command_buffer, push_constants)(
+ command_buffer, executable_layout, offset, values, values_length);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_push_descriptor_set(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_ASSERT_ARGUMENT(executable_layout);
+ IREE_ASSERT_ARGUMENT(!binding_count || bindings);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_hal_command_buffer_push_descriptor_set_validation(
+ command_buffer, executable_layout, set, binding_count, bindings));
+ });
+ iree_status_t status = _VTABLE_DISPATCH(command_buffer, push_descriptor_set)(
+ command_buffer, executable_layout, set, binding_count, bindings);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_bind_descriptor_set(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_hal_descriptor_set_t* descriptor_set,
+ iree_host_size_t dynamic_offset_count,
+ const iree_device_size_t* dynamic_offsets) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_ASSERT_ARGUMENT(executable_layout);
+ IREE_ASSERT_ARGUMENT(descriptor_set);
+ IREE_ASSERT_ARGUMENT(!dynamic_offset_count || dynamic_offsets);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_bind_descriptor_set_validation(
+ command_buffer, executable_layout, set, descriptor_set,
+ dynamic_offset_count, dynamic_offsets));
+ });
+ iree_status_t status = _VTABLE_DISPATCH(command_buffer, bind_descriptor_set)(
+ command_buffer, executable_layout, set, descriptor_set,
+ dynamic_offset_count, dynamic_offsets);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_dispatch(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_ASSERT_ARGUMENT(executable);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_dispatch_validation(
+ command_buffer, executable, entry_point, workgroup_x,
+ workgroup_y, workgroup_z));
+ });
+ iree_status_t status = _VTABLE_DISPATCH(command_buffer, dispatch)(
+ command_buffer, executable, entry_point, workgroup_x, workgroup_y,
+ workgroup_z);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_dispatch_indirect(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ iree_hal_buffer_t* workgroups_buffer,
+ iree_device_size_t workgroups_offset) {
+ IREE_ASSERT_ARGUMENT(command_buffer);
+ IREE_ASSERT_ARGUMENT(executable);
+ IREE_ASSERT_ARGUMENT(workgroups_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IF_VALIDATING(command_buffer, {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_dispatch_indirect_validation(
+ command_buffer, executable, entry_point, workgroups_buffer,
+ workgroups_offset));
+ });
+ iree_status_t status = _VTABLE_DISPATCH(command_buffer, dispatch_indirect)(
+ command_buffer, executable, entry_point, workgroups_buffer,
+ workgroups_offset);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+//===----------------------------------------------------------------------===//
+// Utilities for command buffer creation
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_hal_create_transfer_command_buffer(
+ iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t transfer_count,
+ const iree_hal_transfer_command_t* transfer_commands,
+ iree_hal_command_buffer_t** out_command_buffer) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_command_buffer_create(device, mode,
+ IREE_HAL_COMMAND_CATEGORY_TRANSFER,
+ queue_affinity, &command_buffer));
+
+ iree_status_t status = iree_hal_command_buffer_begin(command_buffer);
+ if (iree_status_is_ok(status)) {
+ for (iree_host_size_t i = 0; i < transfer_count; ++i) {
+ const iree_hal_transfer_command_t* transfer_command =
+ &transfer_commands[i];
+ switch (transfer_command->type) {
+ case IREE_HAL_TRANSFER_COMMAND_TYPE_FILL:
+ status = iree_hal_command_buffer_fill_buffer(
+ command_buffer, transfer_command->fill.target_buffer,
+ transfer_command->fill.target_offset,
+ transfer_command->fill.length, transfer_command->fill.pattern,
+ transfer_command->fill.pattern_length);
+ break;
+ case IREE_HAL_TRANSFER_COMMAND_TYPE_COPY:
+ status = iree_hal_command_buffer_copy_buffer(
+ command_buffer, transfer_command->copy.source_buffer,
+ transfer_command->copy.source_offset,
+ transfer_command->copy.target_buffer,
+ transfer_command->copy.target_offset,
+ transfer_command->copy.length);
+ break;
+ case IREE_HAL_TRANSFER_COMMAND_TYPE_UPDATE:
+ status = iree_hal_command_buffer_update_buffer(
+ command_buffer, transfer_command->update.source_buffer,
+ transfer_command->update.source_offset,
+ transfer_command->update.target_buffer,
+ transfer_command->update.target_offset,
+ transfer_command->update.length);
+ break;
+ default:
+ status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "unknown transfer_commands[%zu] type %d", i,
+ (int)transfer_command->type);
+ break;
+ }
+ if (!iree_status_is_ok(status)) break;
+ }
+ }
+ status =
+ iree_status_join(status, iree_hal_command_buffer_end(command_buffer));
+
+ if (iree_status_is_ok(status)) {
+ *out_command_buffer = command_buffer;
+ } else {
+ iree_hal_command_buffer_release(command_buffer);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/command_buffer.h b/runtime/src/iree/hal/command_buffer.h
new file mode 100644
index 0000000..c06da30
--- /dev/null
+++ b/runtime/src/iree/hal/command_buffer.h
@@ -0,0 +1,694 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_COMMAND_BUFFER_H_
+#define IREE_HAL_COMMAND_BUFFER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/allocator.h"
+#include "iree/hal/buffer.h"
+#include "iree/hal/descriptor_set.h"
+#include "iree/hal/descriptor_set_layout.h"
+#include "iree/hal/event.h"
+#include "iree/hal/executable.h"
+#include "iree/hal/executable_layout.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_hal_device_t iree_hal_device_t;
+
+//===----------------------------------------------------------------------===//
+// Types and Enums
+//===----------------------------------------------------------------------===//
+
+// A bitfield specifying the mode of operation for a command buffer.
+enum iree_hal_command_buffer_mode_bits_t {
+ // Command buffer will be submitted once and never used again.
+ // This may enable in-place patching of command buffers that reduce overhead
+ // when it's known that command buffers will not be reused.
+ IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT = 1u << 0,
+
+ // TODO(benvanik): IREE_HAL_COMMAND_BUFFER_MODE_REUSABLE = 1u << 1,
+ // TODO(benvanik): IREE_HAL_COMMAND_BUFFER_MODE_PRIMARY = 1u << 2,
+ // TODO(benvanik): IREE_HAL_COMMAND_BUFFER_MODE_SECONDARY = 1u << 3,
+
+ // Indicates that the command buffer execution is allowed to execute inline
+ // with recording. The exact execution behavior is unspecified by the API and
+ // intentionally unknowable and must always assume to happen entirely
+ // asynchronously and that it will only have completed after waiting on device
+ // idle or the wait semaphores specified in the submission are signaled.
+ //
+ // Local backends can use this to avoid recording when the calling program can
+ // guarantee that it makes no assumptions about execution being deferred until
+ // a submission. The command buffer must still be submitted for scheduling and
+ // must have no wait semaphores specified. This allows the same program code
+ // to execute work both synchronously and asynchronously as remote backends
+ // are allowed to ignore this.
+ //
+ // Remote backends can use this to flush the command buffer more aggressively
+ // to begin early execution and overlap with continued recording.
+ //
+ // Requires IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT and
+ // IREE_HAL_COMMAND_BUFFER_MODE_PRIMARY. Compatible with
+ // IREE_HAL_COMMAND_BUFFER_MODE_REUSABLE.
+ IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION = 1u << 4,
+
+ // Disables additional command buffer validation (if present).
+ // By default all command buffers will be validated if
+ // `IREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE=1` - if shimming command buffers
+ // or performing replay this validation can be disabled per-command buffer.
+ IREE_HAL_COMMAND_BUFFER_MODE_UNVALIDATED = 1u << 5,
+};
+typedef uint32_t iree_hal_command_buffer_mode_t;
+
+// A bitfield specifying the category of commands in a command queue.
+enum iree_hal_command_category_bits_t {
+ // Command is considered a transfer operation (memcpy, etc).
+ IREE_HAL_COMMAND_CATEGORY_TRANSFER = 1u << 0,
+ // Command is considered a dispatch operation (dispatch/execute).
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH = 1u << 1,
+ // Commands may be of any type.
+ // Using this value may prevent optimizations and if possible callers should
+ // always specify the strictest set possible (for example, only transfer
+ // commands to ensure they get placed on a DMA queue).
+ IREE_HAL_COMMAND_CATEGORY_ANY =
+ IREE_HAL_COMMAND_CATEGORY_TRANSFER | IREE_HAL_COMMAND_CATEGORY_DISPATCH,
+};
+typedef uint32_t iree_hal_command_category_t;
+
+// Bitfield specifying which execution stage a barrier should start/end at.
+//
+// Maps to VkPipelineStageFlagBits.
+enum iree_hal_execution_stage_bits_t {
+ // Top of the pipeline when commands are initially issued by the device.
+ IREE_HAL_EXECUTION_STAGE_COMMAND_ISSUE = 1u << 0,
+ // Stage of the pipeline when dispatch parameter data is consumed.
+ IREE_HAL_EXECUTION_STAGE_COMMAND_PROCESS = 1u << 1,
+ // Stage where dispatch commands execute.
+ IREE_HAL_EXECUTION_STAGE_DISPATCH = 1u << 2,
+ // Stage where transfer (copy/clear/fill/etc) commands execute.
+ IREE_HAL_EXECUTION_STAGE_TRANSFER = 1u << 3,
+ // Final stage in the pipeline when commands are retired on the device.
+ IREE_HAL_EXECUTION_STAGE_COMMAND_RETIRE = 1u << 4,
+ // Pseudo-stage for read/writes by the host. Not executed on device.
+ IREE_HAL_EXECUTION_STAGE_HOST = 1u << 5,
+};
+typedef uint32_t iree_hal_execution_stage_t;
+
+// Bitfield specifying flags controlling an execution dependency.
+//
+// Maps to VkDependencyFlags.
+enum iree_hal_execution_barrier_flag_bits_t {
+ IREE_HAL_EXECUTION_BARRIER_FLAG_NONE = 0,
+};
+typedef uint32_t iree_hal_execution_barrier_flags_t;
+
+// Bitfield specifying which scopes will access memory and how.
+//
+// Maps to VkAccessFlagBits.
+enum iree_hal_access_scope_bits_t {
+ // Read access to indirect command data as part of an indirect dispatch.
+ IREE_HAL_ACCESS_SCOPE_INDIRECT_COMMAND_READ = 1u << 0,
+ // Constant uniform buffer reads by the device.
+ IREE_HAL_ACCESS_SCOPE_CONSTANT_READ = 1u << 1,
+ // Storage buffer reads by dispatch commands.
+ IREE_HAL_ACCESS_SCOPE_DISPATCH_READ = 1u << 2,
+ // Storage buffer writes by dispatch commands.
+ IREE_HAL_ACCESS_SCOPE_DISPATCH_WRITE = 1u << 3,
+ // Source of a transfer operation.
+ IREE_HAL_ACCESS_SCOPE_TRANSFER_READ = 1u << 4,
+ // Target of a transfer operation.
+ IREE_HAL_ACCESS_SCOPE_TRANSFER_WRITE = 1u << 5,
+ // Read operation by the host through mapped memory.
+ IREE_HAL_ACCESS_SCOPE_HOST_READ = 1u << 6,
+ // Write operation by the host through mapped memory.
+ IREE_HAL_ACCESS_SCOPE_HOST_WRITE = 1u << 7,
+ // External/non-specific read.
+ IREE_HAL_ACCESS_SCOPE_MEMORY_READ = 1u << 8,
+ // External/non-specific write.
+ IREE_HAL_ACCESS_SCOPE_MEMORY_WRITE = 1u << 9,
+};
+typedef uint32_t iree_hal_access_scope_t;
+
+// Defines a global memory barrier.
+// These are cheaper to encode than buffer-specific barriers but may cause
+// stalls and bubbles in device pipelines if applied too broadly. Prefer them
+// over equivalently large sets of buffer-specific barriers (such as when
+// completely changing execution contexts).
+//
+// Maps to VkMemoryBarrier.
+typedef struct iree_hal_memory_barrier_t {
+ // All access scopes prior-to the barrier (inclusive).
+ iree_hal_access_scope_t source_scope;
+ // All access scopes following the barrier (inclusive).
+ iree_hal_access_scope_t target_scope;
+} iree_hal_memory_barrier_t;
+
+// Defines a memory barrier that applies to a range of a specific buffer.
+// Use of these (vs. global memory barriers) provides fine-grained execution
+// ordering to device command processors and allows for more aggressive
+// reordering.
+//
+// Maps to VkBufferMemoryBarrier.
+typedef struct iree_hal_buffer_barrier_t {
+ // All access scopes prior-to the barrier (inclusive).
+ iree_hal_access_scope_t source_scope;
+ // All access scopes following the barrier (inclusive).
+ iree_hal_access_scope_t target_scope;
+ // Buffer the barrier is restricted to.
+ // The barrier will apply to the entire physical device allocation.
+ iree_hal_buffer_t* buffer;
+ // Relative offset/length within |buffer| (which may itself be mapped into the
+ // device allocation at an offset).
+ iree_device_size_t offset;
+ iree_device_size_t length;
+} iree_hal_buffer_barrier_t;
+
+// An RGBA color.
+typedef struct iree_hal_label_color_t {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+} iree_hal_label_color_t;
+
+// A source location attached to debug labels.
+typedef struct iree_hal_label_location_t {
+ iree_string_view_t file;
+ int line;
+} iree_hal_label_location_t;
+
+// An unspecified color; debugging tools are to choose their own.
+static inline iree_hal_label_color_t iree_hal_label_color_unspecified() {
+ iree_hal_label_color_t color = {0, 0, 0, 0};
+ return color;
+}
+
+// Formats a command buffer mode bitfield as a string.
+// See iree_bitfield_format for usage.
+IREE_API_EXPORT iree_string_view_t
+iree_hal_command_buffer_mode_format(iree_hal_command_buffer_mode_t value,
+ iree_bitfield_string_temp_t* out_temp);
+
+// Formats a command category bitfield as a string.
+// See iree_bitfield_format for usage.
+IREE_API_EXPORT iree_string_view_t iree_hal_command_category_format(
+ iree_hal_command_category_t value, iree_bitfield_string_temp_t* out_temp);
+
+// Storage for command buffer validation state.
+// Designed to be embedded in concrete implementations that want validation.
+typedef struct iree_hal_command_buffer_validation_state_t {
+ iree_hal_device_t* device;
+ bool is_recording;
+ int32_t debug_group_depth;
+ // TODO(benvanik): current executable layout/descriptor set layout info.
+ // TODO(benvanik): valid push constant bit ranges.
+} iree_hal_command_buffer_validation_state_t;
+
+// Maximum size of any update in iree_hal_command_buffer_update_buffer.
+// 64KB is the limit on Vulkan and we uniformly use that today across all
+// targets as to not need too much command buffer memory.
+#define IREE_HAL_COMMAND_BUFFER_MAX_UPDATE_SIZE \
+ ((iree_device_size_t)(64 * 1024))
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_t
+//===----------------------------------------------------------------------===//
+
+// Asynchronous command buffer recording interface.
+// Commands are recorded by the implementation for later submission to command
+// queues.
+//
+// Buffers, events, and programs referenced must remain valid and not be
+// modified or read while there are commands in-flight. The usual flow is to
+// populate input buffers, dispatch using those buffers, wait on a semaphore
+// until the buffers are guaranteed to no longer be in use, and then reuse the
+// buffers. Lifetimes are managed by the command buffer and all used resources
+// will be retained for as long as the command buffer is live or until it is
+// reset.
+//
+// Errors that can be recognized when operations are enqueued will be returned
+// immediately, such as invalid argument errors. Errors that can only be
+// determined at execution time will be returned on semaphores. Once a failure
+// occurs the device queue will enter an error state that invalidates all
+// operations on the device queue (as ordering is not strict and any may still
+// be in-flight). In this case the user of the device queue should treat all
+// in-flight operations as cancelled and fully reset themselves. Other device
+// queues that may be waiting on events from the device queue will also enter
+// error states. Only once a user has acknowledged and cleared the error state
+// with a Reset the queue will become usable, and otherwise all operations will
+// return errors.
+//
+// Command buffers are thread-compatible. Use multiple command buffers if trying
+// to record commands from multiple threads. Command buffers must not be mutated
+// between when they have are submitted for execution on a queue and when the
+// semaphore fires indicating the completion of their execution.
+typedef struct iree_hal_command_buffer_t iree_hal_command_buffer_t;
+
+// Creates a command buffer ready to begin recording, possibly reusing an
+// existing one from the |device| pool.
+//
+// |queue_affinity| specifies the device queues the command buffer may be
+// submitted to. The queue affinity provided to iree_hal_device_queue_submit
+// must match or be a subset of the |queue_affinity|.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_create(
+ iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ iree_hal_command_buffer_t** out_command_buffer);
+
+// Retains the given |command_buffer| for the caller.
+IREE_API_EXPORT void iree_hal_command_buffer_retain(
+ iree_hal_command_buffer_t* command_buffer);
+
+// Releases the given |command_buffer| from the caller.
+IREE_API_EXPORT void iree_hal_command_buffer_release(
+ iree_hal_command_buffer_t* command_buffer);
+
+IREE_API_EXPORT void* iree_hal_command_buffer_dyn_cast(
+ iree_hal_command_buffer_t* command_buffer, const void* vtable);
+
+// Returns a bitmask indicating the behavior of the command buffer.
+IREE_API_EXPORT iree_hal_command_buffer_mode_t
+iree_hal_command_buffer_mode(const iree_hal_command_buffer_t* command_buffer);
+
+// Returns a bitmask indicating which command categories this command buffer
+// can record.
+IREE_API_EXPORT iree_hal_command_category_t
+iree_hal_command_buffer_allowed_categories(
+ const iree_hal_command_buffer_t* command_buffer);
+
+// Resets and begins recording into the command buffer, clearing all
+// previously recorded contents.
+// The command buffer must not be in-flight.
+IREE_API_EXPORT iree_status_t
+iree_hal_command_buffer_begin(iree_hal_command_buffer_t* command_buffer);
+
+// Ends recording into the command buffer.
+// This must be called prior to submitting the command buffer for execution.
+IREE_API_EXPORT iree_status_t
+iree_hal_command_buffer_end(iree_hal_command_buffer_t* command_buffer);
+
+// Pushes a new debug group with the given |label|.
+// All commands between this and a mandatory matching call to
+// iree_hal_command_buffer_end_debug_group will be grouped together with the
+// given label. If a source location is available it can be provided via
+// |location| to allow mapping back into the source program that issued the
+// commands.
+//
+// An optional RGBA color to show in the debug UI may be provided via
+// |label_color|; otherwise iree_hal_label_color_unspecified can be used to let
+// the debug tool choose.
+IREE_API_EXPORT void iree_hal_command_buffer_begin_debug_group(
+ iree_hal_command_buffer_t* command_buffer, iree_string_view_t label,
+ iree_hal_label_color_t label_color,
+ const iree_hal_label_location_t* location);
+
+// Pops a debug group from the stack.
+IREE_API_EXPORT void iree_hal_command_buffer_end_debug_group(
+ iree_hal_command_buffer_t* command_buffer);
+
+// Defines a memory dependency between commands recorded before and after the
+// barrier. One or more memory or buffer barriers can be specified to indicate
+// between which stages or buffers the dependencies exist.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_execution_barrier(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_hal_execution_barrier_flags_t flags,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers);
+
+// Sets an event to the signaled state.
+// |source_stage_mask| specifies when the event is signaled.
+//
+// Events are only valid within a single command buffer. Events can only be
+// used on non-transfer queues.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_signal_event(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask);
+
+// Resets an event to the non-signaled state.
+// |source_stage_mask| specifies when the event is unsignaled.
+//
+// Events are only valid within a single command buffer. Events can only be
+// used on non-transfer queues.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_reset_event(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask);
+
+// Waits for one or more events to be signaled and defines a memory dependency
+// between the synchronization scope of the signal operations and the commands
+// following the wait.
+//
+// |source_stage_mask| must include ExecutionStage::kHost for Event::Signal to
+// be visibile.
+//
+// Events are only valid within a single command buffer. Events remain
+// signaled even after waiting and must be reset to be reused. Events can only
+// be used on non-transfer queues.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_wait_events(
+ iree_hal_command_buffer_t* command_buffer, iree_host_size_t event_count,
+ const iree_hal_event_t** events,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers);
+
+// Hints to the device queue that the given buffer will not be used again.
+// After encoding a discard the buffer contents will be considered undefined.
+// This is because the discard may be used to elide write backs to host memory
+// or aggressively reuse the allocation for other purposes.
+//
+// For buffers allocated with IREE_HAL_MEMORY_TYPE_TRANSIENT this may allow
+// the device queue to reclaim the memory used by the buffer earlier than
+// otherwise possible.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_discard_buffer(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* buffer);
+
+// Fills the target buffer with the given repeating value.
+// Expects that |pattern_length| is one of 1, 2, or 4 and that the offset and
+// length are aligned to the natural alignment of the value.
+// The target buffer must be compatible with the devices owned by this
+// device queue and be allocated with IREE_HAL_BUFFER_USAGE_TRANSFER.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_fill_buffer(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length,
+ const void* pattern, iree_host_size_t pattern_length);
+
+// Updates a range of the given target buffer from the source host memory.
+// The source host memory is copied immediately into the command buffer and
+// occupies command buffer space. It is strongly recommended that large buffer
+// updates are performed via iree_hal_command_buffer_copy_buffer where there is
+// the possibility of a zero-copy path.
+// The |source_buffer| may be released by the caller immediately after this
+// call returns.
+// The |target_buffer| must be compatible with the devices owned by this
+// device queue and be allocated with IREE_HAL_BUFFER_USAGE_TRANSFER.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_update_buffer(
+ iree_hal_command_buffer_t* command_buffer, const void* source_buffer,
+ iree_host_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length);
+
+// Copies a range of one buffer to another.
+// Both buffers must be compatible with the devices owned by this device
+// queue and be allocated with IREE_HAL_BUFFER_USAGE_TRANSFER. Though the source
+// and target buffer may be the same the ranges must not overlap (as with
+// memcpy).
+//
+// This can be used to perform device->host, host->device, and device->device
+// copies.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_copy_buffer(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* source_buffer,
+ iree_device_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length);
+
+// Pushes an inline set of constants that can be accessed by subsequent
+// dispatches using a compatible executable layout.
+//
+// Push constants are treated as opaque bytes, meaning that they may be
+// bit-casted floats, bit-packed booleans, etc. |offset| and |values_length| are
+// in bytes.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_push_constants(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, iree_host_size_t offset,
+ const void* values, iree_host_size_t values_length);
+
+// Pushes a descriptor set and associates it with |set|.
+// This uses an internal ringbuffer inside of the command buffer to avoid the
+// need for creating and binding descriptor sets and managing their lifetime.
+//
+// The descriptor set will remain bound and valid so long as the executable
+// layouts used by dispatches are compatible (same descriptor layouts and push
+// constant sizes).
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_push_descriptor_set(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings);
+
+// Binds a descriptor set to the given |set| matching that used in the
+// executable layout interface.
+//
+// The descriptor set will remain bound and valid so long as the executable
+// layouts used by dispatches are compatible (same descriptor layouts and push
+// constant sizes).
+//
+// If any dynamic descriptor types are defined in the descriptor set layout then
+// the dynamic offsets must be provided. These offsets will be added to the base
+// offset of the descriptor layout binding.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_bind_descriptor_set(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_hal_descriptor_set_t* descriptor_set,
+ iree_host_size_t dynamic_offset_count,
+ const iree_device_size_t* dynamic_offsets);
+
+// Dispatches an execution request.
+// The request may execute overlapped with any other transfer operation or
+// dispatch made within the same barrier-defined sequence.
+//
+// The executable specified must be registered for use with the device driver
+// owning this queue. It must not be unregistered until all requests that use
+// it have completed.
+//
+// Fails if the queue does not support dispatch operations (as indicated by
+// can_dispatch).
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_dispatch(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z);
+
+// Dispatches an execution request with deferred workgroup counts.
+// This is the same as iree_hal_command_buffer_dispatch but the workgroup counts
+// are read from the given |workgroups_buffer| at offset |workgroups_offset| as
+// 3 uint32_t XYZ values before performing the dispatch. This allows prior
+// dispatches within the command sequence to populate the workgroup counts.
+//
+// The buffer must have been allocated with IREE_HAL_BUFFER_USAGE_DISPATCH and
+// be of IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_dispatch_indirect(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ iree_hal_buffer_t* workgroups_buffer, iree_device_size_t workgroups_offset);
+
+//===----------------------------------------------------------------------===//
+// Utilities for command buffer creation
+//===----------------------------------------------------------------------===//
+
+// Defines a transfer command operation.
+typedef enum iree_hal_transfer_command_type_t {
+ // iree_hal_command_buffer_fill_buffer
+ IREE_HAL_TRANSFER_COMMAND_TYPE_FILL = 0u,
+ // iree_hal_command_buffer_copy_buffer
+ IREE_HAL_TRANSFER_COMMAND_TYPE_COPY = 1u,
+ // iree_hal_command_buffer_update_buffer
+ IREE_HAL_TRANSFER_COMMAND_TYPE_UPDATE = 2u,
+} iree_hal_transfer_command_type_t;
+
+// Represents a single transfer command within a batch of commands.
+typedef struct iree_hal_transfer_command_t {
+ // The type of the command selecting which of the payload data is used.
+ iree_hal_transfer_command_type_t type;
+ union {
+ // IREE_HAL_TRANSFER_COMMAND_TYPE_FILL
+ struct {
+ iree_hal_buffer_t* target_buffer;
+ iree_device_size_t target_offset;
+ iree_device_size_t length;
+ const void* pattern;
+ iree_host_size_t pattern_length;
+ } fill;
+ // IREE_HAL_TRANSFER_COMMAND_TYPE_COPY
+ struct {
+ iree_hal_buffer_t* source_buffer;
+ iree_device_size_t source_offset;
+ iree_hal_buffer_t* target_buffer;
+ iree_device_size_t target_offset;
+ iree_device_size_t length;
+ } copy;
+ // IREE_HAL_TRANSFER_COMMAND_TYPE_UPDATE
+ struct {
+ const void* source_buffer;
+ iree_host_size_t source_offset;
+ iree_hal_buffer_t* target_buffer;
+ iree_device_size_t target_offset;
+ iree_device_size_t length;
+ } update;
+ };
+} iree_hal_transfer_command_t;
+
+// Builds a command buffer containing a recording of all |transfer_commands|.
+// All buffers must be compatible with |device| and ranges must not overlap
+// (same as with memcpy). All commands are executed concurrently with no
+// barriers. The provided commands and any referenced data needs only remain
+// live during recording, while all referenced buffers must be kept live by
+// the caller until the command buffer has completed execution.
+//
+// This is just a utility to make it easier to quickly construct batches of
+// transfer operations. If more control is required then record the command
+// buffer as normal.
+IREE_API_EXPORT iree_status_t iree_hal_create_transfer_command_buffer(
+ iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t transfer_count,
+ const iree_hal_transfer_command_t* transfer_commands,
+ iree_hal_command_buffer_t** out_command_buffer);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_t validation wrapper
+//===----------------------------------------------------------------------===//
+
+// Wraps |target_command_buffer| with a validation layer that checks the
+// parameters to each call in an attempt to return errors where usage may result
+// in failed or incorrect execution. This layer adds many additional checks to
+// each call but must be used when dealing with untrusted incoming commands.
+//
+// The validation is strictly input argument and permission-based and not a full
+// verification of the correctness of any barriers or memory dependencies. A
+// command buffer recording that has passed validation does not indicate that it
+// is guaranteed to make forward progress or properly observe memory visibility
+// or availability rules. Instead, validation ensures that no command references
+// memory outside of the allowed ranges or accesses memory in violation of the
+// allowed usage or access rights.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_wrap_validation(
+ iree_hal_device_t* device, iree_hal_command_buffer_t* target_command_buffer,
+ iree_hal_command_buffer_t** out_command_buffer);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_t implementation details
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_command_buffer_vtable_t {
+ void(IREE_API_PTR* destroy)(iree_hal_command_buffer_t* command_buffer);
+
+ void*(IREE_API_PTR* dyn_cast)(iree_hal_command_buffer_t* command_buffer,
+ const void* vtable);
+
+ iree_status_t(IREE_API_PTR* begin)(iree_hal_command_buffer_t* command_buffer);
+ iree_status_t(IREE_API_PTR* end)(iree_hal_command_buffer_t* command_buffer);
+
+ void(IREE_API_PTR* begin_debug_group)(
+ iree_hal_command_buffer_t* command_buffer, iree_string_view_t label,
+ iree_hal_label_color_t label_color,
+ const iree_hal_label_location_t* location);
+ void(IREE_API_PTR* end_debug_group)(
+ iree_hal_command_buffer_t* command_buffer);
+
+ iree_status_t(IREE_API_PTR* execution_barrier)(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_hal_execution_barrier_flags_t flags,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers);
+
+ iree_status_t(IREE_API_PTR* signal_event)(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask);
+
+ iree_status_t(IREE_API_PTR* reset_event)(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask);
+
+ iree_status_t(IREE_API_PTR* wait_events)(
+ iree_hal_command_buffer_t* command_buffer, iree_host_size_t event_count,
+ const iree_hal_event_t** events,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers);
+
+ iree_status_t(IREE_API_PTR* discard_buffer)(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* buffer);
+
+ iree_status_t(IREE_API_PTR* fill_buffer)(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length, const void* pattern,
+ iree_host_size_t pattern_length);
+
+ iree_status_t(IREE_API_PTR* update_buffer)(
+ iree_hal_command_buffer_t* command_buffer, const void* source_buffer,
+ iree_host_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length);
+
+ iree_status_t(IREE_API_PTR* copy_buffer)(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length);
+
+ iree_status_t(IREE_API_PTR* push_constants)(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, iree_host_size_t offset,
+ const void* values, iree_host_size_t values_length);
+
+ iree_status_t(IREE_API_PTR* push_descriptor_set)(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings);
+
+ iree_status_t(IREE_API_PTR* bind_descriptor_set)(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_hal_descriptor_set_t* descriptor_set,
+ iree_host_size_t dynamic_offset_count,
+ const iree_device_size_t* dynamic_offsets);
+
+ iree_status_t(IREE_API_PTR* dispatch)(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z);
+
+ iree_status_t(IREE_API_PTR* dispatch_indirect)(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ iree_hal_buffer_t* workgroups_buffer,
+ iree_device_size_t workgroups_offset);
+} iree_hal_command_buffer_vtable_t;
+IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_command_buffer_vtable_t);
+
+struct iree_hal_command_buffer_t {
+ iree_hal_resource_t resource;
+ iree_hal_command_buffer_mode_t mode;
+ iree_hal_command_category_t allowed_categories;
+ iree_hal_queue_affinity_t queue_affinity;
+
+#if IREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE
+ iree_hal_command_buffer_validation_state_t validation;
+#endif // IREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE
+};
+
+IREE_API_EXPORT void iree_hal_command_buffer_initialize(
+ iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ const iree_hal_command_buffer_vtable_t* vtable,
+ iree_hal_command_buffer_t* command_buffer);
+
+IREE_API_EXPORT void iree_hal_command_buffer_destroy(
+ iree_hal_command_buffer_t* command_buffer);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_COMMAND_BUFFER_H_
diff --git a/runtime/src/iree/hal/command_buffer_validation.c b/runtime/src/iree/hal/command_buffer_validation.c
new file mode 100644
index 0000000..8ccf775
--- /dev/null
+++ b/runtime/src/iree/hal/command_buffer_validation.c
@@ -0,0 +1,441 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/command_buffer_validation.h"
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/allocator.h"
+#include "iree/hal/buffer.h"
+#include "iree/hal/descriptor_set.h"
+#include "iree/hal/detail.h"
+#include "iree/hal/device.h"
+#include "iree/hal/event.h"
+#include "iree/hal/executable.h"
+#include "iree/hal/executable_layout.h"
+#include "iree/hal/resource.h"
+
+#if IREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE
+#define VALIDATION_STATE(command_buffer) (&(command_buffer)->validation)
+#else
+#define VALIDATION_STATE(command_buffer) \
+ ((iree_hal_command_buffer_validation_state_t*)NULL)
+#endif // IREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE
+
+// Returns success iff the queue supports the given command categories.
+static iree_status_t iree_hal_command_buffer_validate_categories(
+ const iree_hal_command_buffer_t* command_buffer,
+ iree_hal_command_category_t required_categories) {
+ if (!iree_all_bits_set(command_buffer->allowed_categories,
+ required_categories)) {
+#if IREE_STATUS_MODE
+ iree_bitfield_string_temp_t temp0, temp1;
+ iree_string_view_t required_categories_str =
+ iree_hal_command_category_format(required_categories, &temp0);
+ iree_string_view_t allowed_categories_str =
+ iree_hal_command_category_format(command_buffer->allowed_categories,
+ &temp1);
+ return iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "operation requires categories %.*s but command buffer only supports "
+ "%.*s",
+ (int)required_categories_str.size, required_categories_str.data,
+ (int)allowed_categories_str.size, allowed_categories_str.data);
+#else
+ return iree_status_from_code(IREE_STATUS_FAILED_PRECONDITION);
+#endif // IREE_STATUS_MODE
+ }
+ return iree_ok_status();
+}
+
+// Returns success iff the buffer is compatible with the device.
+static iree_status_t iree_hal_command_buffer_validate_buffer_compatibility(
+ const iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* buffer,
+ iree_hal_buffer_compatibility_t required_compatibility,
+ iree_hal_buffer_usage_t intended_usage) {
+ iree_hal_buffer_compatibility_t allowed_compatibility =
+ iree_hal_allocator_query_compatibility(
+ iree_hal_device_allocator(VALIDATION_STATE(command_buffer)->device),
+ (iree_hal_buffer_params_t){
+ .type = iree_hal_buffer_memory_type(buffer),
+ .usage = iree_hal_buffer_allowed_usage(buffer) & intended_usage,
+ },
+ iree_hal_buffer_allocation_size(buffer));
+ if (!iree_all_bits_set(allowed_compatibility, required_compatibility)) {
+#if IREE_STATUS_MODE
+ // Buffer cannot be used on the queue for the given usage.
+ iree_bitfield_string_temp_t temp0, temp1;
+ iree_string_view_t allowed_usage_str = iree_hal_buffer_usage_format(
+ iree_hal_buffer_allowed_usage(buffer), &temp0);
+ iree_string_view_t intended_usage_str =
+ iree_hal_buffer_usage_format(intended_usage, &temp1);
+ return iree_make_status(
+ IREE_STATUS_PERMISSION_DENIED,
+ "requested buffer usage is not supported for the buffer on this queue; "
+ "buffer allows %.*s, operation requires %.*s (allocator compatibility "
+ "mismatch)",
+ (int)allowed_usage_str.size, allowed_usage_str.data,
+ (int)intended_usage_str.size, intended_usage_str.data);
+#else
+ return iree_status_from_code(IREE_STATUS_PERMISSION_DENIED);
+#endif // IREE_STATUS_MODE
+ }
+ return iree_ok_status();
+}
+
+// Returns success iff the currently bound descriptor sets are valid for the
+// given executable entry point.
+static iree_status_t iree_hal_command_buffer_validate_dispatch_bindings(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point) {
+ // TODO(benvanik): validate buffers referenced have compatible memory types
+ // and access rights.
+ // TODO(benvanik): validate no aliasing between inputs/outputs.
+ return iree_ok_status();
+}
+
+void iree_hal_command_buffer_initialize_validation(
+ iree_hal_device_t* device, iree_hal_command_buffer_t* command_buffer) {
+ VALIDATION_STATE(command_buffer)->device = device;
+ VALIDATION_STATE(command_buffer)->is_recording = false;
+}
+
+iree_status_t iree_hal_command_buffer_begin_validation(
+ iree_hal_command_buffer_t* command_buffer) {
+ if (VALIDATION_STATE(command_buffer)->is_recording) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "command buffer is already in a recording state");
+ }
+ VALIDATION_STATE(command_buffer)->is_recording = true;
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_command_buffer_end_validation(
+ iree_hal_command_buffer_t* command_buffer) {
+ if (VALIDATION_STATE(command_buffer)->debug_group_depth != 0) {
+ return iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "unbalanced debug group depth (expected 0, is %d)",
+ VALIDATION_STATE(command_buffer)->debug_group_depth);
+ }
+ if (!VALIDATION_STATE(command_buffer)->is_recording) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "command buffer is not in a recording state");
+ }
+ VALIDATION_STATE(command_buffer)->is_recording = false;
+ return iree_ok_status();
+}
+
+void iree_hal_command_buffer_begin_debug_group_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_string_view_t label,
+ iree_hal_label_color_t label_color,
+ const iree_hal_label_location_t* location) {
+ ++VALIDATION_STATE(command_buffer)->debug_group_depth;
+}
+
+void iree_hal_command_buffer_end_debug_group_validation(
+ iree_hal_command_buffer_t* command_buffer) {
+ --VALIDATION_STATE(command_buffer)->debug_group_depth;
+}
+
+iree_status_t iree_hal_command_buffer_execution_barrier_validation(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_hal_execution_barrier_flags_t flags,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ // NOTE: all command buffer types can perform this so no need to check.
+
+ // TODO(benvanik): additional synchronization validation.
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_command_buffer_signal_event_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
+ command_buffer, IREE_HAL_COMMAND_CATEGORY_DISPATCH));
+
+ // TODO(benvanik): additional synchronization validation.
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_command_buffer_reset_event_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
+ command_buffer, IREE_HAL_COMMAND_CATEGORY_DISPATCH));
+
+ // TODO(benvanik): additional synchronization validation.
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_command_buffer_wait_events_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_host_size_t event_count,
+ const iree_hal_event_t** events,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
+ command_buffer, IREE_HAL_COMMAND_CATEGORY_DISPATCH));
+
+ // TODO(benvanik): additional synchronization validation.
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_command_buffer_discard_buffer_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* buffer) {
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
+ command_buffer, IREE_HAL_COMMAND_CATEGORY_TRANSFER));
+
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_memory_type(
+ iree_hal_buffer_memory_type(buffer),
+ IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE));
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_command_buffer_fill_buffer_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length,
+ const void* pattern, iree_host_size_t pattern_length) {
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
+ command_buffer, IREE_HAL_COMMAND_CATEGORY_TRANSFER));
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_buffer_compatibility(
+ command_buffer, target_buffer,
+ IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER,
+ IREE_HAL_BUFFER_USAGE_TRANSFER));
+
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_memory_type(
+ iree_hal_buffer_memory_type(target_buffer),
+ IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE));
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_access(
+ iree_hal_buffer_allowed_access(target_buffer),
+ IREE_HAL_MEMORY_ACCESS_WRITE));
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage(
+ iree_hal_buffer_allowed_usage(target_buffer),
+ IREE_HAL_BUFFER_USAGE_TRANSFER));
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_validate_range(target_buffer, target_offset, length));
+
+ // Ensure the value length is supported.
+ if (pattern_length != 1 && pattern_length != 2 && pattern_length != 4) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "fill value length is not one of the supported "
+ "values (pattern_length=%zu)",
+ pattern_length);
+ }
+
+ // Ensure the offset and length have an alignment matching the value length.
+ if ((target_offset % pattern_length) != 0 || (length % pattern_length) != 0) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "fill offset and/or length do not match the natural alignment of the "
+ "fill value (target_offset=%" PRIdsz ", length=%" PRIdsz
+ ", pattern_length=%zu)",
+ target_offset, length, pattern_length);
+ }
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_command_buffer_update_buffer_validation(
+ iree_hal_command_buffer_t* command_buffer, const void* source_buffer,
+ iree_host_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length) {
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
+ command_buffer, IREE_HAL_COMMAND_CATEGORY_TRANSFER));
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_buffer_compatibility(
+ command_buffer, target_buffer,
+ IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER,
+ IREE_HAL_BUFFER_USAGE_TRANSFER));
+
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_memory_type(
+ iree_hal_buffer_memory_type(target_buffer),
+ IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE));
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_access(
+ iree_hal_buffer_allowed_access(target_buffer),
+ IREE_HAL_MEMORY_ACCESS_WRITE));
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage(
+ iree_hal_buffer_allowed_usage(target_buffer),
+ IREE_HAL_BUFFER_USAGE_TRANSFER));
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_validate_range(target_buffer, target_offset, length));
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_command_buffer_copy_buffer_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* source_buffer,
+ iree_device_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length) {
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
+ command_buffer, IREE_HAL_COMMAND_CATEGORY_TRANSFER));
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_buffer_compatibility(
+ command_buffer, source_buffer,
+ IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER,
+ IREE_HAL_BUFFER_USAGE_TRANSFER));
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_buffer_compatibility(
+ command_buffer, target_buffer,
+ IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER,
+ IREE_HAL_BUFFER_USAGE_TRANSFER));
+
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_access(
+ iree_hal_buffer_allowed_access(source_buffer),
+ IREE_HAL_MEMORY_ACCESS_READ));
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage(
+ iree_hal_buffer_allowed_usage(source_buffer),
+ IREE_HAL_BUFFER_USAGE_TRANSFER));
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_validate_range(source_buffer, source_offset, length));
+
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage(
+ iree_hal_buffer_allowed_usage(target_buffer),
+ IREE_HAL_BUFFER_USAGE_TRANSFER));
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_access(
+ iree_hal_buffer_allowed_access(target_buffer),
+ IREE_HAL_MEMORY_ACCESS_WRITE));
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_validate_range(target_buffer, target_offset, length));
+
+ // At least source or destination must be device-visible to enable
+ // host->device, device->host, and device->device.
+ // TODO(b/117338171): host->host copies.
+ if (!iree_any_bit_set(iree_hal_buffer_memory_type(source_buffer),
+ IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE) &&
+ !iree_any_bit_set(iree_hal_buffer_memory_type(target_buffer),
+ IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) {
+#if IREE_STATUS_MODE
+ iree_bitfield_string_temp_t temp0, temp1;
+ iree_string_view_t source_memory_type_str = iree_hal_memory_type_format(
+ iree_hal_buffer_memory_type(source_buffer), &temp0);
+ iree_string_view_t target_memory_type_str = iree_hal_memory_type_format(
+ iree_hal_buffer_memory_type(target_buffer), &temp1);
+ return iree_make_status(
+ IREE_STATUS_PERMISSION_DENIED,
+ "at least one buffer must be device-visible for a copy; "
+ "source_buffer=%.*s, target_buffer=%.*s",
+ (int)source_memory_type_str.size, source_memory_type_str.data,
+ (int)target_memory_type_str.size, target_memory_type_str.data);
+#else
+ return iree_status_from_code(IREE_STATUS_PERMISSION_DENIED);
+#endif // IREE_STATUS_MODE
+ }
+
+ // Check for overlap - just like memcpy we don't handle that.
+ if (iree_hal_buffer_test_overlap(source_buffer, source_offset, length,
+ target_buffer, target_offset, length) !=
+ IREE_HAL_BUFFER_OVERLAP_DISJOINT) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "source and target ranges overlap within the same buffer");
+ }
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_command_buffer_push_constants_validation(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, iree_host_size_t offset,
+ const void* values, iree_host_size_t values_length) {
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
+ command_buffer, IREE_HAL_COMMAND_CATEGORY_DISPATCH));
+
+ if (IREE_UNLIKELY((values_length % 4) != 0)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "invalid alignment %zu, must be 4-byte aligned",
+ values_length);
+ }
+
+ // TODO(benvanik): validate offset and value count with layout.
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_command_buffer_push_descriptor_set_validation(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings) {
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
+ command_buffer, IREE_HAL_COMMAND_CATEGORY_DISPATCH));
+
+ // TODO(benvanik): validate set index.
+ // TODO(benvanik): validate binding_offset.
+ // TODO(benvanik): validate bindings.
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_command_buffer_bind_descriptor_set_validation(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_hal_descriptor_set_t* descriptor_set,
+ iree_host_size_t dynamic_offset_count,
+ const iree_device_size_t* dynamic_offsets) {
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
+ command_buffer, IREE_HAL_COMMAND_CATEGORY_DISPATCH));
+
+ // TODO(benvanik): validate set index.
+ // TODO(benvanik): validate dynamic offsets (both count and offsets).
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_command_buffer_dispatch_validation(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z) {
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
+ command_buffer, IREE_HAL_COMMAND_CATEGORY_DISPATCH));
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_dispatch_bindings(
+ command_buffer, executable, entry_point));
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_command_buffer_dispatch_indirect_validation(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ iree_hal_buffer_t* workgroups_buffer,
+ iree_device_size_t workgroups_offset) {
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
+ command_buffer, IREE_HAL_COMMAND_CATEGORY_DISPATCH));
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_buffer_compatibility(
+ command_buffer, workgroups_buffer,
+ IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_DISPATCH,
+ IREE_HAL_BUFFER_USAGE_DISPATCH));
+
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_memory_type(
+ iree_hal_buffer_memory_type(workgroups_buffer),
+ IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE));
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_access(
+ iree_hal_buffer_allowed_access(workgroups_buffer),
+ IREE_HAL_MEMORY_ACCESS_READ));
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage(
+ iree_hal_buffer_allowed_usage(workgroups_buffer),
+ IREE_HAL_BUFFER_USAGE_DISPATCH));
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_range(
+ workgroups_buffer, workgroups_offset, sizeof(uint32_t) * 3));
+
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_dispatch_bindings(
+ command_buffer, executable, entry_point));
+
+ return iree_ok_status();
+}
diff --git a/runtime/src/iree/hal/command_buffer_validation.h b/runtime/src/iree/hal/command_buffer_validation.h
new file mode 100644
index 0000000..42ab881
--- /dev/null
+++ b/runtime/src/iree/hal/command_buffer_validation.h
@@ -0,0 +1,104 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_COMMAND_BUFFER_VALIDATION_H_
+#define IREE_HAL_COMMAND_BUFFER_VALIDATION_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/command_buffer.h"
+
+void iree_hal_command_buffer_initialize_validation(
+ iree_hal_device_t* device, iree_hal_command_buffer_t* command_buffer);
+
+iree_status_t iree_hal_command_buffer_begin_validation(
+ iree_hal_command_buffer_t* command_buffer);
+
+iree_status_t iree_hal_command_buffer_end_validation(
+ iree_hal_command_buffer_t* command_buffer);
+
+void iree_hal_command_buffer_begin_debug_group_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_string_view_t label,
+ iree_hal_label_color_t label_color,
+ const iree_hal_label_location_t* location);
+
+void iree_hal_command_buffer_end_debug_group_validation(
+ iree_hal_command_buffer_t* command_buffer);
+
+iree_status_t iree_hal_command_buffer_execution_barrier_validation(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_hal_execution_barrier_flags_t flags,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers);
+
+iree_status_t iree_hal_command_buffer_signal_event_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask);
+
+iree_status_t iree_hal_command_buffer_reset_event_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask);
+
+iree_status_t iree_hal_command_buffer_wait_events_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_host_size_t event_count,
+ const iree_hal_event_t** events,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers);
+
+iree_status_t iree_hal_command_buffer_discard_buffer_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* buffer);
+
+iree_status_t iree_hal_command_buffer_fill_buffer_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length,
+ const void* pattern, iree_host_size_t pattern_length);
+
+iree_status_t iree_hal_command_buffer_update_buffer_validation(
+ iree_hal_command_buffer_t* command_buffer, const void* source_buffer,
+ iree_host_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length);
+
+iree_status_t iree_hal_command_buffer_copy_buffer_validation(
+ iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* source_buffer,
+ iree_device_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length);
+
+iree_status_t iree_hal_command_buffer_push_constants_validation(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, iree_host_size_t offset,
+ const void* values, iree_host_size_t values_length);
+
+iree_status_t iree_hal_command_buffer_push_descriptor_set_validation(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings);
+
+iree_status_t iree_hal_command_buffer_bind_descriptor_set_validation(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_hal_descriptor_set_t* descriptor_set,
+ iree_host_size_t dynamic_offset_count,
+ const iree_device_size_t* dynamic_offsets);
+
+iree_status_t iree_hal_command_buffer_dispatch_validation(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z);
+
+iree_status_t iree_hal_command_buffer_dispatch_indirect_validation(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ iree_hal_buffer_t* workgroups_buffer, iree_device_size_t workgroups_offset);
+
+#endif // IREE_HAL_COMMAND_BUFFER_VALIDATION_H_
diff --git a/runtime/src/iree/hal/cts/CMakeLists.txt b/runtime/src/iree/hal/cts/CMakeLists.txt
new file mode 100644
index 0000000..0216567
--- /dev/null
+++ b/runtime/src/iree/hal/cts/CMakeLists.txt
@@ -0,0 +1,194 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+set(IREE_ALL_CTS_TESTS
+ "allocator"
+ "buffer_mapping"
+ "command_buffer"
+ "command_buffer_dispatch"
+ "descriptor_set"
+ "descriptor_set_layout"
+ "driver"
+ "event"
+ "executable_cache"
+ "executable_layout"
+ "semaphore"
+ "semaphore_submission"
+ PARENT_SCOPE
+)
+
+# These tests use executables produced by the iree-translate compiler tool.
+# If the compiler is disabled or a HAL driver implementation is not yet
+# connected to a functional compiler target, these tests can be skipped.
+set(IREE_EXECUTABLE_CTS_TESTS
+ "command_buffer_dispatch"
+ "executable_cache"
+ PARENT_SCOPE
+)
+
+# List of testdata/{name}.mlir source files.
+set(IREE_ALL_CTS_EXECUTABLE_SOURCES
+ "command_buffer_dispatch_test"
+ "executable_cache_test"
+ PARENT_SCOPE
+)
+
+iree_cc_library(
+ NAME
+ cts_test_base
+ HDRS
+ "cts_test_base.h"
+ DEPS
+ iree::base
+ iree::hal
+ iree::testing::gtest
+ TESTONLY
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ allocator_test_library
+ HDRS
+ "allocator_test.h"
+ DEPS
+ ::cts_test_base
+ iree::base
+ iree::hal
+ iree::testing::gtest
+)
+
+iree_cc_library(
+ NAME
+ buffer_mapping_test_library
+ HDRS
+ "buffer_mapping_test.h"
+ DEPS
+ ::cts_test_base
+ iree::base
+ iree::hal
+ iree::testing::gtest
+)
+
+iree_cc_library(
+ NAME
+ command_buffer_test_library
+ HDRS
+ "command_buffer_test.h"
+ DEPS
+ ::cts_test_base
+ iree::base
+ iree::hal
+ iree::testing::gtest
+)
+
+iree_cc_library(
+ NAME
+ command_buffer_dispatch_test_library
+ HDRS
+ "command_buffer_dispatch_test.h"
+ DEPS
+ ::cts_test_base
+ iree::base
+ iree::hal
+ iree::testing::gtest
+)
+
+iree_cc_library(
+ NAME
+ descriptor_set_test_library
+ HDRS
+ "descriptor_set_test.h"
+ DEPS
+ ::cts_test_base
+ iree::base
+ iree::hal
+ iree::testing::gtest
+)
+
+iree_cc_library(
+ NAME
+ descriptor_set_layout_test_library
+ HDRS
+ "descriptor_set_layout_test.h"
+ DEPS
+ ::cts_test_base
+ iree::base
+ iree::hal
+ iree::testing::gtest
+)
+
+iree_cc_library(
+ NAME
+ driver_test_library
+ HDRS
+ "driver_test.h"
+ DEPS
+ ::cts_test_base
+ iree::base
+ iree::hal
+ iree::testing::gtest
+)
+
+iree_cc_library(
+ NAME
+ event_test_library
+ HDRS
+ "event_test.h"
+ DEPS
+ ::cts_test_base
+ iree::base
+ iree::hal
+ iree::testing::gtest
+)
+
+iree_cc_library(
+ NAME
+ executable_layout_test_library
+ HDRS
+ "executable_layout_test.h"
+ DEPS
+ ::cts_test_base
+ iree::base
+ iree::hal
+ iree::testing::gtest
+)
+
+iree_cc_library(
+ NAME
+ executable_cache_test_library
+ HDRS
+ "executable_cache_test.h"
+ DEPS
+ ::cts_test_base
+ iree::base
+ iree::hal
+ iree::testing::gtest
+)
+
+iree_cc_library(
+ NAME
+ semaphore_test_library
+ HDRS
+ "semaphore_test.h"
+ DEPS
+ ::cts_test_base
+ iree::base
+ iree::hal
+ iree::testing::gtest
+)
+
+iree_cc_library(
+ NAME
+ semaphore_submission_test_library
+ HDRS
+ "semaphore_submission_test.h"
+ DEPS
+ ::cts_test_base
+ iree::base
+ iree::hal
+ iree::testing::gtest
+)
diff --git a/runtime/src/iree/hal/cts/README.md b/runtime/src/iree/hal/cts/README.md
new file mode 100644
index 0000000..0bd8cb6
--- /dev/null
+++ b/runtime/src/iree/hal/cts/README.md
@@ -0,0 +1,38 @@
+# Conformance Test Suite (CTS) for HAL implementations
+
+These tests exercise IREE's Hardware Abstraction Layer (HAL) in a way that
+checks for conformance across implementations and devices. The tests themselves
+are structured to help with HAL driver development by using individual features
+in isolation, demonstrating typical full-system usage, and pointing out where
+capabilities are optional.
+
+## Usage
+
+Each HAL driver (in-tree or out-of-tree) can use the `iree_hal_cts_test_suite()`
+CMake function to create a set of tests. See the documentation in
+[iree_hal_cts_test_suite.cmake](../../build_tools/cmake/iree_hal_cts_test_suite.cmake)
+and [cts_test_base.h](cts_test_base.h) for concrete details.
+
+## On testing for error conditions
+
+In general, error states are only lightly tested because the low level APIs that
+IREE's HAL is designed to thinly abstract over often assume programmer usage
+will be correct and treat errors as undefined behavior. See the Vulkan spec:
+
+* https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap3.html#introduction-conventions
+* https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap4.html#fundamentals-errors
+
+While the generic tests in the CTS may not be able to check for error conditions
+exhaustively, individual HAL implementations can implement stricter behavior
+or enable higher level checks like what the
+[Vulkan Validation Layers](https://github.com/KhronosGroup/Vulkan-ValidationLayers)
+provide.
+
+## Tips for adding new HAL implementations
+
+* Driver (`iree_hal_driver_t`) and device (`iree_hal_device_t`) creation, tested
+ in [driver_test](driver_test.h), are both prerequisites for all tests.
+* Tests for individual components (e.g.
+ [descriptor_set_layout_test](descriptor_set_layout_test.h)) are more
+ approachable than tests which use collections of components together (e.g.
+ [command_buffer_test](command_buffer_test.h)).
diff --git a/runtime/src/iree/hal/cts/allocator_test.h b/runtime/src/iree/hal/cts/allocator_test.h
new file mode 100644
index 0000000..4a1103c
--- /dev/null
+++ b/runtime/src/iree/hal/cts/allocator_test.h
@@ -0,0 +1,113 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CTS_ALLOCATOR_TEST_H_
+#define IREE_HAL_CTS_ALLOCATOR_TEST_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+namespace {
+
+constexpr iree_device_size_t kAllocationSize = 1024;
+
+} // namespace
+
+class allocator_test : public CtsTestBase {};
+
+// All allocators must support some baseline capabilities.
+//
+// Certain capabilities or configurations are optional and may vary between
+// driver implementations or target devices, such as:
+// IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL
+// IREE_HAL_BUFFER_USAGE_MAPPING
+TEST_P(allocator_test, BaselineBufferCompatibility) {
+ // Need at least one way to get data between the host and device.
+ iree_hal_buffer_params_t host_local_params = {0};
+ host_local_params.type =
+ IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
+ host_local_params.usage = IREE_HAL_BUFFER_USAGE_TRANSFER;
+ iree_hal_buffer_compatibility_t transfer_compatibility_host =
+ iree_hal_allocator_query_compatibility(
+ device_allocator_, host_local_params, kAllocationSize);
+
+ iree_hal_buffer_params_t device_local_params = {0};
+ device_local_params.type =
+ IREE_HAL_MEMORY_TYPE_HOST_VISIBLE | IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL;
+ device_local_params.usage = IREE_HAL_BUFFER_USAGE_TRANSFER;
+ iree_hal_buffer_compatibility_t transfer_compatibility_device =
+ iree_hal_allocator_query_compatibility(
+ device_allocator_, device_local_params, kAllocationSize);
+
+ iree_hal_buffer_compatibility_t required_transfer_compatibility =
+ IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE |
+ IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER;
+ EXPECT_TRUE(iree_all_bits_set(transfer_compatibility_host,
+ required_transfer_compatibility) ||
+ iree_all_bits_set(transfer_compatibility_device,
+ required_transfer_compatibility));
+
+ // Need to be able to use some type of buffer as dispatch inputs or outputs.
+ iree_hal_buffer_params_t dispatch_params = {0};
+ dispatch_params.type =
+ IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
+ dispatch_params.usage = IREE_HAL_BUFFER_USAGE_DISPATCH;
+ iree_hal_buffer_compatibility_t dispatch_compatibility =
+ iree_hal_allocator_query_compatibility(device_allocator_, dispatch_params,
+ kAllocationSize);
+ EXPECT_TRUE(
+ iree_all_bits_set(dispatch_compatibility,
+ IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE |
+ IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_DISPATCH));
+}
+
+TEST_P(allocator_test, AllocateBuffer) {
+ iree_hal_buffer_params_t params = {0};
+ params.type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL;
+ params.usage = IREE_HAL_BUFFER_USAGE_TRANSFER;
+ iree_hal_buffer_t* buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+ device_allocator_, params, kAllocationSize, iree_const_byte_span_empty(),
+ &buffer));
+
+ // At a mimimum, the requested memory type should be respected.
+ // Additional bits may be optionally set depending on the allocator.
+ EXPECT_TRUE(
+ iree_all_bits_set(iree_hal_buffer_memory_type(buffer), params.type));
+ EXPECT_TRUE(
+ iree_all_bits_set(iree_hal_buffer_allowed_usage(buffer), params.usage));
+ EXPECT_GE(iree_hal_buffer_allocation_size(buffer),
+ kAllocationSize); // Larger is okay.
+
+ iree_hal_buffer_release(buffer);
+}
+
+// While empty allocations aren't particularly useful, they can occur in
+// practice so we should at least be able to create them without errors.
+TEST_P(allocator_test, AllocateEmptyBuffer) {
+ iree_hal_buffer_params_t params = {0};
+ params.type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL;
+ params.usage = IREE_HAL_BUFFER_USAGE_TRANSFER;
+ iree_hal_buffer_t* buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+ device_allocator_, params, /*allocation_size=*/0,
+ iree_const_byte_span_empty(), &buffer));
+
+ iree_hal_buffer_release(buffer);
+}
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_CTS_ALLOCATOR_TEST_H_
diff --git a/runtime/src/iree/hal/cts/buffer_mapping_test.h b/runtime/src/iree/hal/cts/buffer_mapping_test.h
new file mode 100644
index 0000000..2810efa
--- /dev/null
+++ b/runtime/src/iree/hal/cts/buffer_mapping_test.h
@@ -0,0 +1,554 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CTS_BUFFER_MAPPING_TEST_H_
+#define IREE_HAL_CTS_BUFFER_MAPPING_TEST_H_
+
+#include <cstdint>
+#include <vector>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+using ::testing::ContainerEq;
+
+namespace {
+constexpr iree_device_size_t kDefaultAllocationSize = 1024;
+} // namespace
+
+// Tests for buffer mapping (IREE_HAL_BUFFER_USAGE_MAPPING) support and
+// for `iree_hal_buffer_*` functions which require buffer mapping.
+//
+// Note that most of these tests first write into a buffer using one or more
+// functions then read the (possibly partial) contents of that buffer using
+// `iree_hal_buffer_map_read`. As the buffer read implementation is
+// nontrivial, particularly on implementations with complex host/device splits,
+// test failures may indicate issues in either the code doing the writing or the
+// code doing the reading.
+//
+// Where applicable, tests for each function are organized in increasing order
+// of complexity, such as:
+// * write to full buffer
+// * write with an offset and length
+// * write into a subspan of a buffer
+
+class buffer_mapping_test : public CtsTestBase {
+ protected:
+ void AllocateUninitializedBuffer(iree_device_size_t buffer_size,
+ iree_hal_buffer_t** out_buffer) {
+ iree_hal_buffer_params_t params = {0};
+ params.type =
+ IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+ params.usage =
+ IREE_HAL_BUFFER_USAGE_TRANSFER | IREE_HAL_BUFFER_USAGE_MAPPING;
+ iree_hal_buffer_t* device_buffer = NULL;
+ IREE_CHECK_OK(iree_hal_allocator_allocate_buffer(
+ iree_hal_device_allocator(device_), params, buffer_size,
+ iree_const_byte_span_empty(), &device_buffer));
+ *out_buffer = device_buffer;
+ }
+};
+
+TEST_P(buffer_mapping_test, AllocatorSupportsBufferMapping) {
+ iree_hal_buffer_params_t params = {0};
+ params.type = IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+ params.usage = IREE_HAL_BUFFER_USAGE_MAPPING;
+ iree_hal_buffer_compatibility_t compatibility =
+ iree_hal_allocator_query_compatibility(device_allocator_, params,
+ kDefaultAllocationSize);
+ EXPECT_TRUE(iree_all_bits_set(compatibility,
+ IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE));
+
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(kDefaultAllocationSize, &buffer);
+
+ EXPECT_TRUE(
+ iree_all_bits_set(iree_hal_buffer_memory_type(buffer), params.type));
+ EXPECT_TRUE(
+ iree_all_bits_set(iree_hal_buffer_allowed_usage(buffer), params.usage));
+ EXPECT_GE(iree_hal_buffer_allocation_size(buffer), kDefaultAllocationSize);
+
+ iree_hal_buffer_release(buffer);
+}
+
+TEST_P(buffer_mapping_test, ZeroWholeBuffer) {
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(kDefaultAllocationSize, &buffer);
+
+ // Zero the entire buffer.
+ IREE_ASSERT_OK(
+ iree_hal_buffer_map_zero(buffer, /*byte_offset=*/0, IREE_WHOLE_BUFFER));
+
+ // Check that the contents match what we expect.
+ std::vector<uint8_t> actual_data(kDefaultAllocationSize);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(
+ buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+ std::vector<uint8_t> reference_buffer(kDefaultAllocationSize);
+ std::memset(reference_buffer.data(), 0, kDefaultAllocationSize);
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+ iree_hal_buffer_release(buffer);
+}
+
+TEST_P(buffer_mapping_test, ZeroWithOffset) {
+ iree_device_size_t buffer_size = 16;
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(buffer_size, &buffer);
+
+ // Fill the entire buffer then zero only a segment of it.
+ uint8_t fill_value = 0xFF;
+ IREE_ASSERT_OK(iree_hal_buffer_map_fill(buffer, /*byte_offset=*/0,
+ IREE_WHOLE_BUFFER, &fill_value,
+ sizeof(fill_value)));
+ IREE_ASSERT_OK(
+ iree_hal_buffer_map_zero(buffer, /*byte_offset=*/4, /*byte_length=*/8));
+
+ // Check that the contents match what we expect.
+ std::vector<uint8_t> actual_data(buffer_size);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(
+ buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+ std::vector<uint8_t> reference_buffer{0xFF, 0xFF, 0xFF, 0xFF, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0xFF, 0xFF, 0xFF, 0xFF};
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+ iree_hal_buffer_release(buffer);
+}
+
+TEST_P(buffer_mapping_test, ZeroSubspan) {
+ iree_device_size_t buffer_size = 16;
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(buffer_size, &buffer);
+
+ // Fill the entire buffer.
+ uint8_t fill_value = 0xFF;
+ IREE_ASSERT_OK(iree_hal_buffer_map_fill(buffer, /*byte_offset=*/0,
+ IREE_WHOLE_BUFFER, &fill_value,
+ sizeof(fill_value)));
+
+ // Create a subspan.
+ iree_device_size_t subspan_length = 8;
+ iree_hal_buffer_t* buffer_subspan = NULL;
+ IREE_ASSERT_OK(iree_hal_buffer_subspan(buffer, /*byte_offset=*/4,
+ subspan_length, &buffer_subspan));
+
+ // Zero part of the subspan.
+ IREE_ASSERT_OK(iree_hal_buffer_map_zero(buffer_subspan, /*byte_offset=*/4,
+ /*byte_length=*/4));
+
+ // Check that the contents match what we expect.
+ std::vector<uint8_t> actual_data(buffer_size);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(
+ buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+ std::vector<uint8_t> reference_buffer{0xFF, 0xFF, 0xFF, 0xFF, //
+ 0xFF, 0xFF, 0xFF, 0xFF, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0xFF, 0xFF, 0xFF, 0xFF};
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+ // Also check the subspan.
+ std::vector<uint8_t> actual_data_subspan(subspan_length);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(buffer_subspan, /*source_offset=*/0,
+ actual_data_subspan.data(),
+ actual_data_subspan.size()));
+ std::vector<uint8_t> reference_buffer_subspan{0xFF, 0xFF, 0xFF, 0xFF, //
+ 0x00, 0x00, 0x00, 0x00};
+ EXPECT_THAT(actual_data_subspan, ContainerEq(reference_buffer_subspan));
+
+ iree_hal_buffer_release(buffer_subspan);
+ iree_hal_buffer_release(buffer);
+}
+
+TEST_P(buffer_mapping_test, FillEmpty) {
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(kDefaultAllocationSize, &buffer);
+
+ // Zero the whole buffer then "fill" 0 bytes with a different pattern.
+ IREE_ASSERT_OK(iree_hal_buffer_map_zero(buffer, 0, IREE_WHOLE_BUFFER));
+ uint8_t fill_value = 0xFF;
+ IREE_ASSERT_OK(
+ iree_hal_buffer_map_fill(buffer, /*byte_offset=*/0,
+ /*byte_length=*/0, // <---- empty!
+ /*pattern=*/&fill_value,
+ /*pattern_length=*/sizeof(fill_value)));
+
+ // Check that the buffer is still all zeroes.
+ std::vector<uint8_t> actual_data(kDefaultAllocationSize);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(
+ buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+ std::vector<uint8_t> reference_buffer(kDefaultAllocationSize);
+ std::memset(reference_buffer.data(), 0, kDefaultAllocationSize);
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+ iree_hal_buffer_release(buffer);
+}
+
+TEST_P(buffer_mapping_test, FillWholeBuffer) {
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(kDefaultAllocationSize, &buffer);
+
+ uint8_t fill_value = 0xFF;
+ IREE_ASSERT_OK(
+ iree_hal_buffer_map_fill(buffer, /*byte_offset=*/0,
+ /*byte_length=*/IREE_WHOLE_BUFFER,
+ /*pattern=*/&fill_value,
+ /*pattern_length=*/sizeof(fill_value)));
+
+ // Check that the buffer is filled with the pattern.
+ std::vector<uint8_t> actual_data(kDefaultAllocationSize);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(
+ buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+ std::vector<uint8_t> reference_buffer(kDefaultAllocationSize);
+ std::memset(reference_buffer.data(), fill_value, kDefaultAllocationSize);
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+ iree_hal_buffer_release(buffer);
+}
+
+TEST_P(buffer_mapping_test, FillWithOffset) {
+ iree_device_size_t buffer_size = 16;
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(buffer_size, &buffer);
+
+ // Zero the entire buffer then fill only a segment of it.
+ IREE_ASSERT_OK(iree_hal_buffer_map_zero(buffer, 0, IREE_WHOLE_BUFFER));
+ uint8_t fill_value = 0xFF;
+ IREE_ASSERT_OK(
+ iree_hal_buffer_map_fill(buffer, /*byte_offset=*/4,
+ /*byte_length=*/8,
+ /*pattern=*/&fill_value,
+ /*pattern_length=*/sizeof(fill_value)));
+
+ // Check that only the segment of the buffer is filled with the pattern.
+ std::vector<uint8_t> actual_data(buffer_size);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(
+ buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+ std::vector<uint8_t> reference_offset_buffer{0x00, 0x00, 0x00, 0x00, //
+ 0xFF, 0xFF, 0xFF, 0xFF, //
+ 0xFF, 0xFF, 0xFF, 0xFF, //
+ 0x00, 0x00, 0x00, 0x00};
+ EXPECT_THAT(actual_data, ContainerEq(reference_offset_buffer));
+
+ iree_hal_buffer_release(buffer);
+}
+
+TEST_P(buffer_mapping_test, FillSubspan) {
+ iree_device_size_t buffer_size = 16;
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(buffer_size, &buffer);
+
+ // Zero the entire buffer.
+ IREE_ASSERT_OK(iree_hal_buffer_map_zero(buffer, 0, IREE_WHOLE_BUFFER));
+
+ // Create a subspan.
+ iree_device_size_t subspan_length = 8;
+ iree_hal_buffer_t* buffer_subspan = NULL;
+ IREE_ASSERT_OK(iree_hal_buffer_subspan(buffer, /*byte_offset=*/4,
+ subspan_length, &buffer_subspan));
+
+ // Fill part of the subspan.
+ uint8_t fill_value = 0xFF;
+ IREE_ASSERT_OK(
+ iree_hal_buffer_map_fill(buffer_subspan, /*byte_offset=*/4,
+ /*byte_length=*/4,
+ /*pattern=*/&fill_value,
+ /*pattern_length=*/sizeof(fill_value)));
+
+ // Check that the contents match what we expect.
+ std::vector<uint8_t> actual_data(buffer_size);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(
+ buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+ std::vector<uint8_t> reference_buffer{0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0xFF, 0xFF, 0xFF, 0xFF, //
+ 0x00, 0x00, 0x00, 0x00};
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+ // Also check the subspan.
+ std::vector<uint8_t> actual_data_subspan(subspan_length);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(buffer_subspan, /*source_offset=*/0,
+ actual_data_subspan.data(),
+ actual_data_subspan.size()));
+ std::vector<uint8_t> reference_buffer_subspan{0x00, 0x00, 0x00, 0x00, //
+ 0xFF, 0xFF, 0xFF, 0xFF};
+ EXPECT_THAT(actual_data_subspan, ContainerEq(reference_buffer_subspan));
+
+ iree_hal_buffer_release(buffer_subspan);
+ iree_hal_buffer_release(buffer);
+}
+
+TEST_P(buffer_mapping_test, ReadData) {
+ iree_device_size_t buffer_size = 16;
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(buffer_size, &buffer);
+
+ // Zero the first half, fill the second half.
+ IREE_ASSERT_OK(
+ iree_hal_buffer_map_zero(buffer, /*byte_offset=*/0, /*byte_length=*/8));
+ uint8_t fill_value = 0xFF;
+ IREE_ASSERT_OK(
+ iree_hal_buffer_map_fill(buffer, /*byte_offset=*/8,
+ /*byte_length=*/8,
+ /*pattern=*/&fill_value,
+ /*pattern_length=*/sizeof(fill_value)));
+
+ // Read the entire buffer.
+ std::vector<uint8_t> actual_data(buffer_size);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(
+ buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+ std::vector<uint8_t> reference_buffer{0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0xFF, 0xFF, 0xFF, 0xFF, //
+ 0xFF, 0xFF, 0xFF, 0xFF};
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+ // Read only a segment of the buffer.
+ std::vector<uint8_t> actual_data_offset(8);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(buffer, /*source_offset=*/4,
+ actual_data_offset.data(),
+ actual_data_offset.size()));
+ std::vector<uint8_t> reference_buffer_offset{0x00, 0x00, 0x00, 0x00, //
+ 0xFF, 0xFF, 0xFF, 0xFF};
+ EXPECT_THAT(actual_data_offset, ContainerEq(reference_buffer_offset));
+
+ iree_hal_buffer_release(buffer);
+}
+
+TEST_P(buffer_mapping_test, ReadDataSubspan) {
+ iree_device_size_t buffer_size = 16;
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(buffer_size, &buffer);
+
+ // Fill a few segments with distinct values.
+ uint8_t value = 0xAA;
+ IREE_ASSERT_OK(iree_hal_buffer_map_fill(buffer, 0, 4, &value, sizeof(value)));
+ value = 0xBB;
+ IREE_ASSERT_OK(iree_hal_buffer_map_fill(buffer, 4, 4, &value, sizeof(value)));
+ value = 0xCC;
+ IREE_ASSERT_OK(iree_hal_buffer_map_fill(buffer, 8, 4, &value, sizeof(value)));
+ value = 0xDD;
+ IREE_ASSERT_OK(
+ iree_hal_buffer_map_fill(buffer, 12, 4, &value, sizeof(value)));
+
+ // Create a subspan.
+ iree_device_size_t subspan_length = 8;
+ iree_hal_buffer_t* buffer_subspan = NULL;
+ IREE_ASSERT_OK(iree_hal_buffer_subspan(buffer, /*byte_offset=*/4,
+ subspan_length, &buffer_subspan));
+
+ // Read the entire buffer subspan.
+ std::vector<uint8_t> actual_data(subspan_length);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(buffer_subspan, /*source_offset=*/0,
+ actual_data.data(),
+ actual_data.size()));
+ std::vector<uint8_t> reference_buffer{0xBB, 0xBB, 0xBB, 0xBB, //
+ 0xCC, 0xCC, 0xCC, 0xCC};
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+ // Read only a segment of the buffer.
+ std::vector<uint8_t> actual_data_offset(4);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(buffer_subspan, /*source_offset=*/4,
+ actual_data_offset.data(),
+ actual_data_offset.size()));
+ std::vector<uint8_t> reference_buffer_offset{0xCC, 0xCC, 0xCC, 0xCC};
+ EXPECT_THAT(actual_data_offset, ContainerEq(reference_buffer_offset));
+
+ iree_hal_buffer_release(buffer_subspan);
+ iree_hal_buffer_release(buffer);
+}
+
+TEST_P(buffer_mapping_test, WriteDataWholeBuffer) {
+ iree_device_size_t buffer_size = 16;
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(buffer_size, &buffer);
+
+ // Write over the whole buffer.
+ uint8_t fill_value = 0xFF;
+ std::vector<uint8_t> reference_buffer(buffer_size);
+ std::memset(reference_buffer.data(), fill_value, buffer_size);
+ IREE_ASSERT_OK(iree_hal_buffer_map_write(buffer, /*target_offset=*/0,
+ reference_buffer.data(),
+ reference_buffer.size()));
+
+ // Check that entire buffer was written to.
+ std::vector<uint8_t> actual_data(buffer_size);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(
+ buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+ iree_hal_buffer_release(buffer);
+}
+
+TEST_P(buffer_mapping_test, WriteDataWithOffset) {
+ iree_device_size_t buffer_size = 16;
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(buffer_size, &buffer);
+
+ // Zero the entire buffer.
+ IREE_ASSERT_OK(iree_hal_buffer_map_zero(buffer, 0, IREE_WHOLE_BUFFER));
+
+ // Write over part of the buffer.
+ std::vector<uint8_t> fill_buffer{0x11, 0x22, 0x33, 0x44, //
+ 0x55, 0x66, 0x77, 0x88};
+ IREE_ASSERT_OK(iree_hal_buffer_map_write(
+ buffer, /*target_offset=*/4, fill_buffer.data(), fill_buffer.size()));
+
+ // Check that the contents match what we expect.
+ std::vector<uint8_t> actual_data(buffer_size);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(
+ buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+ std::vector<uint8_t> reference_buffer{0x00, 0x00, 0x00, 0x00, //
+ 0x11, 0x22, 0x33, 0x44, //
+ 0x55, 0x66, 0x77, 0x88, //
+ 0x00, 0x00, 0x00, 0x00};
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+ iree_hal_buffer_release(buffer);
+}
+
+TEST_P(buffer_mapping_test, WriteDataSubspan) {
+ iree_device_size_t buffer_size = 16;
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(buffer_size, &buffer);
+
+ // Zero the entire buffer.
+ IREE_ASSERT_OK(iree_hal_buffer_map_zero(buffer, 0, IREE_WHOLE_BUFFER));
+
+ // Create a subspan.
+ iree_device_size_t subspan_length = 8;
+ iree_hal_buffer_t* buffer_subspan = NULL;
+ IREE_ASSERT_OK(iree_hal_buffer_subspan(buffer, /*byte_offset=*/4,
+ subspan_length, &buffer_subspan));
+
+ // Write over part of the subspan.
+ std::vector<uint8_t> fill_buffer{0x11, 0x22, 0x33, 0x44};
+ IREE_ASSERT_OK(iree_hal_buffer_map_write(buffer_subspan, /*target_offset=*/4,
+ fill_buffer.data(),
+ fill_buffer.size()));
+
+ // Check that the contents match what we expect.
+ std::vector<uint8_t> actual_data(buffer_size);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(
+ buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+ std::vector<uint8_t> reference_buffer{0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0x11, 0x22, 0x33, 0x44, //
+ 0x00, 0x00, 0x00, 0x00};
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+ // Also check the subspan.
+ std::vector<uint8_t> actual_data_subspan(subspan_length);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(buffer_subspan, /*source_offset=*/0,
+ actual_data_subspan.data(),
+ actual_data_subspan.size()));
+ std::vector<uint8_t> reference_buffer_subspan{0x00, 0x00, 0x00, 0x00, //
+ 0x11, 0x22, 0x33, 0x44};
+ EXPECT_THAT(actual_data_subspan, ContainerEq(reference_buffer_subspan));
+
+ iree_hal_buffer_release(buffer_subspan);
+ iree_hal_buffer_release(buffer);
+}
+
+TEST_P(buffer_mapping_test, CopyData) {
+ iree_hal_buffer_t* buffer_a = NULL;
+ iree_hal_buffer_t* buffer_b = NULL;
+ AllocateUninitializedBuffer(kDefaultAllocationSize, &buffer_a);
+ AllocateUninitializedBuffer(kDefaultAllocationSize, &buffer_b);
+
+ uint8_t fill_value = 0x07;
+ IREE_ASSERT_OK(
+ iree_hal_buffer_map_fill(buffer_a, /*byte_offset=*/0,
+ /*byte_length=*/kDefaultAllocationSize,
+ /*pattern=*/&fill_value,
+ /*pattern_length=*/sizeof(fill_value)));
+ IREE_ASSERT_OK(iree_hal_buffer_map_copy(
+ /*source_buffer=*/buffer_a,
+ /*source_offset=*/0, /*target_buffer=*/buffer_b, /*target_offset=*/0,
+ /*data_length=*/kDefaultAllocationSize));
+
+ std::vector<uint8_t> reference_buffer(kDefaultAllocationSize);
+ std::memset(reference_buffer.data(), fill_value, kDefaultAllocationSize);
+
+ std::vector<uint8_t> actual_data(kDefaultAllocationSize);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(
+ buffer_b, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+ iree_hal_buffer_release(buffer_a);
+ iree_hal_buffer_release(buffer_b);
+}
+
+// Maps a buffer range for reading from device -> host.
+// This is roughly what iree_hal_buffer_map_read does internally.
+TEST_P(buffer_mapping_test, MapRangeRead) {
+ iree_device_size_t buffer_size = 16;
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(buffer_size, &buffer);
+
+ uint8_t fill_value = 0xEF;
+ IREE_ASSERT_OK(iree_hal_buffer_map_fill(buffer, /*byte_offset=*/0,
+ IREE_WHOLE_BUFFER, &fill_value,
+ sizeof(fill_value)));
+
+ iree_hal_buffer_mapping_t mapping;
+ IREE_ASSERT_OK(iree_hal_buffer_map_range(
+ buffer, IREE_HAL_MAPPING_MODE_SCOPED, IREE_HAL_MEMORY_ACCESS_READ,
+ /*byte_offset=*/0, /*byte_length=*/buffer_size, &mapping));
+ EXPECT_EQ(buffer, mapping.buffer);
+ EXPECT_GE(mapping.contents.data_length, (iree_host_size_t)buffer_size);
+
+ std::vector<uint8_t> reference_buffer(buffer_size);
+ std::memset(reference_buffer.data(), fill_value, buffer_size);
+ std::vector<uint8_t> mapping_data(
+ mapping.contents.data,
+ mapping.contents.data + mapping.contents.data_length);
+ EXPECT_THAT(mapping_data, ContainerEq(reference_buffer));
+
+ iree_hal_buffer_unmap_range(&mapping);
+ iree_hal_buffer_release(buffer);
+}
+
+// Maps a buffer range for writing from host -> device.
+// This is roughly what iree_hal_buffer_map_write does internally.
+TEST_P(buffer_mapping_test, MapRangeWrite) {
+ iree_device_size_t buffer_size = 16;
+ iree_hal_buffer_t* buffer = NULL;
+ AllocateUninitializedBuffer(buffer_size, &buffer);
+
+ iree_hal_buffer_mapping_t mapping;
+ IREE_ASSERT_OK(iree_hal_buffer_map_range(
+ buffer, IREE_HAL_MAPPING_MODE_SCOPED,
+ IREE_HAL_MEMORY_ACCESS_DISCARD_WRITE,
+ /*byte_offset=*/0, /*byte_length=*/buffer_size, &mapping));
+ EXPECT_EQ(buffer, mapping.buffer);
+ EXPECT_GE(mapping.contents.data_length, (iree_host_size_t)buffer_size);
+
+ // Write into the mapped memory, flush for device access, then read back.
+ uint8_t fill_value = 0x12;
+ std::memset(mapping.contents.data, fill_value, buffer_size);
+ IREE_ASSERT_OK(iree_hal_buffer_flush_range(&mapping, /*byte_offset=*/0,
+ /*byte_length=*/buffer_size));
+ std::vector<uint8_t> actual_data(buffer_size);
+ IREE_ASSERT_OK(iree_hal_buffer_map_read(
+ buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+ std::vector<uint8_t> reference_buffer(buffer_size);
+ std::memset(reference_buffer.data(), fill_value, buffer_size);
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+ iree_hal_buffer_unmap_range(&mapping);
+ iree_hal_buffer_release(buffer);
+}
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_CTS_BUFFER_MAPPING_TEST_H_
diff --git a/runtime/src/iree/hal/cts/command_buffer_dispatch_test.h b/runtime/src/iree/hal/cts/command_buffer_dispatch_test.h
new file mode 100644
index 0000000..d30b5d0
--- /dev/null
+++ b/runtime/src/iree/hal/cts/command_buffer_dispatch_test.h
@@ -0,0 +1,155 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CTS_COMMAND_BUFFER_DISPATCH_TEST_H_
+#define IREE_HAL_CTS_COMMAND_BUFFER_DISPATCH_TEST_H_
+
+#include "iree/base/api.h"
+#include "iree/base/string_view.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+class command_buffer_dispatch_test : public CtsTestBase {
+ protected:
+ void PrepareAbsExecutable() {
+ IREE_ASSERT_OK(iree_hal_executable_cache_create(
+ device_, iree_make_cstring_view("default"),
+ iree_loop_inline(&loop_status_), &executable_cache_));
+
+ iree_hal_descriptor_set_layout_binding_t descriptor_set_layout_bindings[] =
+ {
+ {0, IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ {1, IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ };
+ IREE_ASSERT_OK(iree_hal_descriptor_set_layout_create(
+ device_, IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_PUSH_ONLY,
+ IREE_ARRAYSIZE(descriptor_set_layout_bindings),
+ descriptor_set_layout_bindings, &descriptor_set_layout_));
+ IREE_ASSERT_OK(iree_hal_executable_layout_create(
+ device_, /*push_constants=*/0, /*set_layout_count=*/1,
+ &descriptor_set_layout_, &executable_layout_));
+
+ iree_hal_executable_params_t executable_params;
+ iree_hal_executable_params_initialize(&executable_params);
+ executable_params.caching_mode =
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ALIAS_PROVIDED_DATA;
+ executable_params.executable_format =
+ iree_make_cstring_view(get_test_executable_format());
+ executable_params.executable_data = get_test_executable_data(
+ iree_make_cstring_view("command_buffer_dispatch_test.bin"));
+ executable_params.executable_layout_count = 1;
+ executable_params.executable_layouts = &executable_layout_;
+
+ IREE_ASSERT_OK(iree_hal_executable_cache_prepare_executable(
+ executable_cache_, &executable_params, &executable_));
+ }
+
+ void CleanupExecutable() {
+ iree_hal_executable_release(executable_);
+ iree_hal_executable_layout_release(executable_layout_);
+ iree_hal_descriptor_set_layout_release(descriptor_set_layout_);
+ iree_hal_executable_cache_release(executable_cache_);
+ IREE_ASSERT_OK(loop_status_);
+ }
+
+ iree_status_t loop_status_ = iree_ok_status();
+ iree_hal_executable_cache_t* executable_cache_ = NULL;
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout_ = NULL;
+ iree_hal_executable_layout_t* executable_layout_ = NULL;
+ iree_hal_executable_t* executable_ = NULL;
+};
+
+TEST_P(command_buffer_dispatch_test, DispatchAbs) {
+ PrepareAbsExecutable();
+
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_command_buffer_create(
+ device_,
+ IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT |
+ IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION,
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+
+ IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
+
+ // Create input and output buffers.
+ iree_hal_buffer_params_t input_params = {0};
+ input_params.type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL;
+ input_params.usage =
+ IREE_HAL_BUFFER_USAGE_DISPATCH | IREE_HAL_BUFFER_USAGE_TRANSFER;
+ iree_hal_buffer_view_t* input_buffer_view = NULL;
+ float input_data[1] = {-2.5f};
+ IREE_ASSERT_OK(iree_hal_buffer_view_allocate_buffer(
+ device_allocator_, /*shape=*/NULL,
+ /*shape_rank=*/0, IREE_HAL_ELEMENT_TYPE_FLOAT_32,
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, input_params,
+ iree_make_const_byte_span((void*)input_data, sizeof(input_data)),
+ &input_buffer_view));
+ iree_hal_buffer_params_t output_params = {0};
+ output_params.type =
+ IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+ output_params.usage = IREE_HAL_BUFFER_USAGE_DISPATCH |
+ IREE_HAL_BUFFER_USAGE_TRANSFER |
+ IREE_HAL_BUFFER_USAGE_MAPPING;
+ iree_hal_buffer_t* output_buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+ device_allocator_, output_params, sizeof(float),
+ iree_const_byte_span_empty(), &output_buffer));
+
+ iree_hal_descriptor_set_binding_t descriptor_set_bindings[] = {
+ {/*binding=*/0, iree_hal_buffer_view_buffer(input_buffer_view),
+ /*offset=*/0, iree_hal_buffer_view_byte_length(input_buffer_view)},
+ {/*binding=*/1, output_buffer, iree_hal_buffer_byte_offset(output_buffer),
+ iree_hal_buffer_byte_length(output_buffer)},
+ };
+
+ IREE_ASSERT_OK(iree_hal_command_buffer_push_descriptor_set(
+ command_buffer, executable_layout_, /*set=*/0,
+ IREE_ARRAYSIZE(descriptor_set_bindings), descriptor_set_bindings));
+
+ IREE_ASSERT_OK(iree_hal_command_buffer_dispatch(
+ command_buffer, executable_, /*entry_point=*/0,
+ /*workgroup_x=*/1, /*workgroup_y=*/1, /*workgroup_z=*/1));
+ IREE_ASSERT_OK(iree_hal_command_buffer_execution_barrier(
+ command_buffer,
+ /*source_stage_mask=*/IREE_HAL_EXECUTION_STAGE_DISPATCH |
+ IREE_HAL_EXECUTION_STAGE_TRANSFER |
+ IREE_HAL_EXECUTION_STAGE_COMMAND_RETIRE,
+ /*target_stage_mask=*/IREE_HAL_EXECUTION_STAGE_COMMAND_ISSUE |
+ IREE_HAL_EXECUTION_STAGE_DISPATCH | IREE_HAL_EXECUTION_STAGE_TRANSFER,
+ IREE_HAL_EXECUTION_BARRIER_FLAG_NONE, /*memory_barrier_count=*/0,
+ /*memory_barriers=*/NULL,
+ /*buffer_barrier_count=*/0, /*buffer_barriers=*/NULL));
+
+ IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
+
+ IREE_ASSERT_OK(SubmitCommandBufferAndWait(IREE_HAL_COMMAND_CATEGORY_DISPATCH,
+ command_buffer));
+
+ float output_value = 0.0f;
+ IREE_ASSERT_OK(iree_hal_device_transfer_d2h(
+ device_, output_buffer,
+ /*source_offset=*/0, &output_value, sizeof(output_value),
+ IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()));
+ EXPECT_EQ(2.5f, output_value);
+
+ iree_hal_command_buffer_release(command_buffer);
+ iree_hal_buffer_release(output_buffer);
+ iree_hal_buffer_view_release(input_buffer_view);
+ CleanupExecutable();
+}
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_CTS_COMMAND_BUFFER_DISPATCH_TEST_H_
diff --git a/runtime/src/iree/hal/cts/command_buffer_test.h b/runtime/src/iree/hal/cts/command_buffer_test.h
new file mode 100644
index 0000000..2327197
--- /dev/null
+++ b/runtime/src/iree/hal/cts/command_buffer_test.h
@@ -0,0 +1,574 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CTS_COMMAND_BUFFER_TEST_H_
+#define IREE_HAL_CTS_COMMAND_BUFFER_TEST_H_
+
+#include <cstdint>
+#include <vector>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+using ::testing::ContainerEq;
+
+namespace {
+constexpr iree_device_size_t kDefaultAllocationSize = 1024;
+} // namespace
+
+class command_buffer_test : public CtsTestBase {
+ protected:
+ void CreateZeroedDeviceBuffer(iree_device_size_t buffer_size,
+ iree_hal_buffer_t** out_buffer) {
+ iree_hal_buffer_params_t params = {0};
+ params.type =
+ IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+ params.usage = IREE_HAL_BUFFER_USAGE_DISPATCH |
+ IREE_HAL_BUFFER_USAGE_TRANSFER |
+ IREE_HAL_BUFFER_USAGE_MAPPING;
+ iree_hal_buffer_t* device_buffer = NULL;
+ IREE_CHECK_OK(iree_hal_allocator_allocate_buffer(
+ iree_hal_device_allocator(device_), params, buffer_size,
+ iree_const_byte_span_empty(), &device_buffer));
+ IREE_ASSERT_OK(
+ iree_hal_buffer_map_zero(device_buffer, 0, IREE_WHOLE_BUFFER));
+ *out_buffer = device_buffer;
+ }
+
+ std::vector<uint8_t> RunFillBufferTest(iree_device_size_t buffer_size,
+ iree_device_size_t target_offset,
+ iree_device_size_t fill_length,
+ const void* pattern,
+ iree_host_size_t pattern_length) {
+ iree_hal_buffer_t* device_buffer = NULL;
+ CreateZeroedDeviceBuffer(buffer_size, &device_buffer);
+
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_CHECK_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_ANY, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+ IREE_CHECK_OK(iree_hal_command_buffer_begin(command_buffer));
+
+ // Fill the pattern.
+ IREE_CHECK_OK(iree_hal_command_buffer_fill_buffer(
+ command_buffer, device_buffer, target_offset, fill_length, pattern,
+ pattern_length));
+ IREE_CHECK_OK(iree_hal_command_buffer_end(command_buffer));
+ IREE_CHECK_OK(SubmitCommandBufferAndWait(IREE_HAL_COMMAND_CATEGORY_ANY,
+ command_buffer));
+
+ // Read data for returning.
+ std::vector<uint8_t> actual_data(buffer_size);
+ IREE_CHECK_OK(iree_hal_device_transfer_d2h(
+ device_, device_buffer, /*source_offset=*/0,
+ /*target_buffer=*/actual_data.data(),
+ /*data_length=*/buffer_size, IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT,
+ iree_infinite_timeout()));
+
+ // Cleanup and return.
+ iree_hal_command_buffer_release(command_buffer);
+ iree_hal_buffer_release(device_buffer);
+ return actual_data;
+ }
+};
+
+TEST_P(command_buffer_test, Create) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+
+ EXPECT_TRUE((iree_hal_command_buffer_allowed_categories(command_buffer) &
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH) ==
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH);
+
+ iree_hal_command_buffer_release(command_buffer);
+}
+
+TEST_P(command_buffer_test, BeginEnd) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+
+ IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
+ IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
+
+ iree_hal_command_buffer_release(command_buffer);
+}
+
+TEST_P(command_buffer_test, SubmitEmpty) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+
+ IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
+ IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
+
+ IREE_ASSERT_OK(SubmitCommandBufferAndWait(IREE_HAL_COMMAND_CATEGORY_DISPATCH,
+ command_buffer));
+
+ iree_hal_command_buffer_release(command_buffer);
+}
+
+TEST_P(command_buffer_test, CopyWholeBuffer) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_TRANSFER, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+
+ uint8_t i8_val = 0x54;
+ std::vector<uint8_t> reference_buffer(kDefaultAllocationSize);
+ std::memset(reference_buffer.data(), i8_val, kDefaultAllocationSize);
+
+ // Create and fill a host buffer.
+ iree_hal_buffer_params_t host_params = {0};
+ host_params.type =
+ IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
+ host_params.usage = IREE_HAL_BUFFER_USAGE_DISPATCH |
+ IREE_HAL_BUFFER_USAGE_TRANSFER |
+ IREE_HAL_BUFFER_USAGE_MAPPING;
+ iree_hal_buffer_t* host_buffer = nullptr;
+ IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+ device_allocator_, host_params, kDefaultAllocationSize,
+ iree_make_const_byte_span(reference_buffer.data(),
+ reference_buffer.size()),
+ &host_buffer));
+
+ // Create a device buffer.
+ iree_hal_buffer_params_t device_params = {0};
+ device_params.type =
+ IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+ device_params.usage = IREE_HAL_BUFFER_USAGE_DISPATCH |
+ IREE_HAL_BUFFER_USAGE_TRANSFER |
+ IREE_HAL_BUFFER_USAGE_MAPPING;
+ iree_hal_buffer_t* device_buffer = nullptr;
+ IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+ device_allocator_, device_params, kDefaultAllocationSize,
+ iree_const_byte_span_empty(), &device_buffer));
+
+ // Copy the host buffer to the device buffer.
+ IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
+ IREE_ASSERT_OK(iree_hal_command_buffer_copy_buffer(
+ command_buffer, /*source_buffer=*/host_buffer, /*source_offset=*/0,
+ /*target_buffer=*/device_buffer, /*target_offset=*/0,
+ /*length=*/kDefaultAllocationSize));
+ IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
+
+ IREE_ASSERT_OK(SubmitCommandBufferAndWait(IREE_HAL_COMMAND_CATEGORY_TRANSFER,
+ command_buffer));
+
+ // Read the device buffer and compare.
+ std::vector<uint8_t> actual_data(kDefaultAllocationSize);
+ IREE_ASSERT_OK(iree_hal_device_transfer_d2h(
+ device_, device_buffer, /*source_offset=*/0,
+ /*target_buffer=*/actual_data.data(),
+ /*data_length=*/kDefaultAllocationSize,
+ IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()));
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+ // Must release the command buffer before resources used by it.
+ iree_hal_command_buffer_release(command_buffer);
+ iree_hal_buffer_release(device_buffer);
+ iree_hal_buffer_release(host_buffer);
+}
+
+TEST_P(command_buffer_test, CopySubBuffer) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_TRANSFER, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+
+ iree_hal_buffer_params_t device_params = {0};
+ device_params.type =
+ IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+ device_params.usage = IREE_HAL_BUFFER_USAGE_DISPATCH |
+ IREE_HAL_BUFFER_USAGE_TRANSFER |
+ IREE_HAL_BUFFER_USAGE_MAPPING;
+ iree_hal_buffer_t* device_buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+ device_allocator_, device_params, kDefaultAllocationSize,
+ iree_const_byte_span_empty(), &device_buffer));
+
+ uint8_t i8_val = 0x88;
+ std::vector<uint8_t> reference_buffer(kDefaultAllocationSize);
+ std::memset(reference_buffer.data() + 8, i8_val,
+ kDefaultAllocationSize / 2 - 4);
+
+ // Create another host buffer with a smaller size.
+ iree_hal_buffer_params_t host_params = {0};
+ host_params.type =
+ IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
+ host_params.usage = IREE_HAL_BUFFER_USAGE_DISPATCH |
+ IREE_HAL_BUFFER_USAGE_TRANSFER |
+ IREE_HAL_BUFFER_USAGE_MAPPING;
+ std::vector<uint8_t> host_buffer_data(kDefaultAllocationSize, i8_val);
+ iree_hal_buffer_t* host_buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+ device_allocator_, host_params, host_buffer_data.size() / 2,
+ iree_make_const_byte_span(host_buffer_data.data(),
+ host_buffer_data.size() / 2),
+ &host_buffer));
+
+ // Copy the host buffer to the device buffer; zero fill the untouched bytes.
+ uint8_t zero_val = 0x0;
+ IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
+ IREE_ASSERT_OK(iree_hal_command_buffer_fill_buffer(
+ command_buffer, device_buffer, /*target_offset=*/0, /*length=*/8,
+ &zero_val, /*pattern_length=*/sizeof(zero_val)));
+ IREE_ASSERT_OK(iree_hal_command_buffer_copy_buffer(
+ command_buffer, /*source_buffer=*/host_buffer, /*source_offset=*/4,
+ /*target_buffer=*/device_buffer, /*target_offset=*/8,
+ /*length=*/kDefaultAllocationSize / 2 - 4));
+ IREE_ASSERT_OK(iree_hal_command_buffer_fill_buffer(
+ command_buffer, device_buffer,
+ /*target_offset=*/8 + kDefaultAllocationSize / 2 - 4,
+ /*length=*/kDefaultAllocationSize - (8 + kDefaultAllocationSize / 2 - 4),
+ &zero_val,
+ /*pattern_length=*/sizeof(zero_val)));
+ IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
+
+ IREE_ASSERT_OK(SubmitCommandBufferAndWait(IREE_HAL_COMMAND_CATEGORY_TRANSFER,
+ command_buffer));
+
+ // Read the device buffer and compare.
+ std::vector<uint8_t> actual_data(kDefaultAllocationSize);
+ IREE_ASSERT_OK(iree_hal_device_transfer_d2h(
+ device_, device_buffer, /*source_offset=*/0,
+ /*target_buffer=*/actual_data.data(),
+ /*data_length=*/kDefaultAllocationSize,
+ IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()));
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+ // Must release the command buffer before resources used by it.
+ iree_hal_command_buffer_release(command_buffer);
+ iree_hal_buffer_release(device_buffer);
+ iree_hal_buffer_release(host_buffer);
+}
+
+TEST_P(command_buffer_test, FillBuffer_pattern1_size1_offset0_length1) {
+ iree_device_size_t buffer_size = 1;
+ iree_device_size_t target_offset = 0;
+ iree_device_size_t fill_length = 1;
+ uint8_t pattern = 0x07;
+ std::vector<uint8_t> reference_buffer{0x07};
+ std::vector<uint8_t> actual_buffer =
+ RunFillBufferTest(buffer_size, target_offset, fill_length,
+ (void*)&pattern, sizeof(pattern));
+ EXPECT_THAT(actual_buffer, ContainerEq(reference_buffer));
+}
+
+TEST_P(command_buffer_test, FillBuffer_pattern1_size5_offset0_length5) {
+ iree_device_size_t buffer_size = 5;
+ iree_device_size_t target_offset = 0;
+ iree_device_size_t fill_length = 5;
+ uint8_t pattern = 0x07;
+ std::vector<uint8_t> reference_buffer{0x07, 0x07, 0x07, 0x07, //
+ 0x07};
+ std::vector<uint8_t> actual_buffer =
+ RunFillBufferTest(buffer_size, target_offset, fill_length,
+ (void*)&pattern, sizeof(pattern));
+ EXPECT_THAT(actual_buffer, ContainerEq(reference_buffer));
+}
+
+TEST_P(command_buffer_test, FillBuffer_pattern1_size16_offset0_length1) {
+ iree_device_size_t buffer_size = 16;
+ iree_device_size_t target_offset = 0;
+ iree_device_size_t fill_length = 1;
+ uint8_t pattern = 0x07;
+ std::vector<uint8_t> reference_buffer{0x07, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00};
+ std::vector<uint8_t> actual_buffer =
+ RunFillBufferTest(buffer_size, target_offset, fill_length,
+ (void*)&pattern, sizeof(pattern));
+ EXPECT_THAT(actual_buffer, ContainerEq(reference_buffer));
+}
+
+TEST_P(command_buffer_test, FillBuffer_pattern1_size16_offset0_length3) {
+ iree_device_size_t buffer_size = 16;
+ iree_device_size_t target_offset = 0;
+ iree_device_size_t fill_length = 3;
+ uint8_t pattern = 0x07;
+ std::vector<uint8_t> reference_buffer{0x07, 0x07, 0x07, 0x00, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00};
+ std::vector<uint8_t> actual_buffer =
+ RunFillBufferTest(buffer_size, target_offset, fill_length,
+ (void*)&pattern, sizeof(pattern));
+ EXPECT_THAT(actual_buffer, ContainerEq(reference_buffer));
+}
+
+TEST_P(command_buffer_test, FillBuffer_pattern1_size16_offset0_length8) {
+ iree_device_size_t buffer_size = 16;
+ iree_device_size_t target_offset = 0;
+ iree_device_size_t fill_length = 8;
+ uint8_t pattern = 0x07;
+ std::vector<uint8_t> reference_buffer{0x07, 0x07, 0x07, 0x07, //
+ 0x07, 0x07, 0x07, 0x07, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00};
+ std::vector<uint8_t> actual_buffer =
+ RunFillBufferTest(buffer_size, target_offset, fill_length,
+ (void*)&pattern, sizeof(pattern));
+ EXPECT_THAT(actual_buffer, ContainerEq(reference_buffer));
+}
+
+TEST_P(command_buffer_test, FillBuffer_pattern1_size16_offset2_length8) {
+ iree_device_size_t buffer_size = 16;
+ iree_device_size_t target_offset = 2;
+ iree_device_size_t fill_length = 8;
+ uint8_t pattern = 0x07;
+ std::vector<uint8_t> reference_buffer{0x00, 0x00, 0x07, 0x07, //
+ 0x07, 0x07, 0x07, 0x07, //
+ 0x07, 0x07, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00};
+ std::vector<uint8_t> actual_buffer =
+ RunFillBufferTest(buffer_size, target_offset, fill_length,
+ (void*)&pattern, sizeof(pattern));
+ EXPECT_THAT(actual_buffer, ContainerEq(reference_buffer));
+}
+
+TEST_P(command_buffer_test, FillBuffer_pattern2_size2_offset0_length2) {
+ iree_device_size_t buffer_size = 2;
+ iree_device_size_t target_offset = 0;
+ iree_device_size_t fill_length = 2;
+ uint16_t pattern = 0xAB23;
+ std::vector<uint8_t> reference_buffer{0x23, 0xAB};
+ std::vector<uint8_t> actual_buffer =
+ RunFillBufferTest(buffer_size, target_offset, fill_length,
+ (void*)&pattern, sizeof(pattern));
+ EXPECT_THAT(actual_buffer, ContainerEq(reference_buffer));
+}
+
+TEST_P(command_buffer_test, FillBuffer_pattern2_size16_offset0_length8) {
+ iree_device_size_t buffer_size = 16;
+ iree_device_size_t target_offset = 0;
+ iree_device_size_t fill_length = 8;
+ uint16_t pattern = 0xAB23;
+ std::vector<uint8_t> reference_buffer{0x23, 0xAB, 0x23, 0xAB, //
+ 0x23, 0xAB, 0x23, 0xAB, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00};
+ std::vector<uint8_t> actual_buffer =
+ RunFillBufferTest(buffer_size, target_offset, fill_length,
+ (void*)&pattern, sizeof(pattern));
+ EXPECT_THAT(actual_buffer, ContainerEq(reference_buffer));
+}
+
+TEST_P(command_buffer_test, FillBuffer_pattern2_size16_offset0_length10) {
+ iree_device_size_t buffer_size = 16;
+ iree_device_size_t target_offset = 0;
+ iree_device_size_t fill_length = 10;
+ uint16_t pattern = 0xAB23;
+ std::vector<uint8_t> reference_buffer{0x23, 0xAB, 0x23, 0xAB, //
+ 0x23, 0xAB, 0x23, 0xAB, //
+ 0x23, 0xAB, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00};
+ std::vector<uint8_t> actual_buffer =
+ RunFillBufferTest(buffer_size, target_offset, fill_length,
+ (void*)&pattern, sizeof(pattern));
+ EXPECT_THAT(actual_buffer, ContainerEq(reference_buffer));
+}
+
+TEST_P(command_buffer_test, FillBuffer_pattern2_size16_offset2_length8) {
+ iree_device_size_t buffer_size = 16;
+ iree_device_size_t target_offset = 2;
+ iree_device_size_t fill_length = 8;
+ uint16_t pattern = 0xAB23;
+ std::vector<uint8_t> reference_buffer{0x00, 0x00, 0x23, 0xAB, //
+ 0x23, 0xAB, 0x23, 0xAB, //
+ 0x23, 0xAB, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00};
+ std::vector<uint8_t> actual_buffer =
+ RunFillBufferTest(buffer_size, target_offset, fill_length,
+ (void*)&pattern, sizeof(pattern));
+ EXPECT_THAT(actual_buffer, ContainerEq(reference_buffer));
+}
+
+TEST_P(command_buffer_test, FillBuffer_pattern4_size4_offset0_length4) {
+ iree_device_size_t buffer_size = 4;
+ iree_device_size_t target_offset = 0;
+ iree_device_size_t fill_length = 4;
+ uint32_t pattern = 0xAB23CD45;
+ std::vector<uint8_t> reference_buffer{0x45, 0xCD, 0x23, 0xAB};
+ std::vector<uint8_t> actual_buffer =
+ RunFillBufferTest(buffer_size, target_offset, fill_length,
+ (void*)&pattern, sizeof(pattern));
+ EXPECT_THAT(actual_buffer, ContainerEq(reference_buffer));
+}
+
+TEST_P(command_buffer_test, FillBuffer_pattern4_size16_offset0_length8) {
+ iree_device_size_t buffer_size = 16;
+ iree_device_size_t target_offset = 0;
+ iree_device_size_t fill_length = 8;
+ uint32_t pattern = 0xAB23CD45;
+ std::vector<uint8_t> reference_buffer{0x45, 0xCD, 0x23, 0xAB, //
+ 0x45, 0xCD, 0x23, 0xAB, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00};
+ std::vector<uint8_t> actual_buffer =
+ RunFillBufferTest(buffer_size, target_offset, fill_length,
+ (void*)&pattern, sizeof(pattern));
+ EXPECT_THAT(actual_buffer, ContainerEq(reference_buffer));
+}
+
+TEST_P(command_buffer_test, UpdateBufferWholeBuffer) {
+ iree_device_size_t target_buffer_size = 16;
+ std::vector<uint8_t> source_buffer{0x01, 0x02, 0x03, 0x04, //
+ 0x05, 0x06, 0x07, 0x08, //
+ 0xA1, 0xA2, 0xA3, 0xA4, //
+ 0xA5, 0xA6, 0xA7, 0xA8};
+
+ iree_hal_buffer_t* device_buffer = NULL;
+ CreateZeroedDeviceBuffer(target_buffer_size, &device_buffer);
+
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_CHECK_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_ANY, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+ IREE_CHECK_OK(iree_hal_command_buffer_begin(command_buffer));
+
+ // Issue the update_buffer command.
+ IREE_CHECK_OK(iree_hal_command_buffer_update_buffer(
+ command_buffer, source_buffer.data(), /*source_offset=*/0, device_buffer,
+ /*target_offset=*/0, /*length=*/target_buffer_size));
+ IREE_CHECK_OK(iree_hal_command_buffer_end(command_buffer));
+ IREE_CHECK_OK(SubmitCommandBufferAndWait(IREE_HAL_COMMAND_CATEGORY_ANY,
+ command_buffer));
+
+ // Check that the contents match what we expect.
+ std::vector<uint8_t> actual_data(target_buffer_size);
+ IREE_CHECK_OK(iree_hal_device_transfer_d2h(
+ device_, device_buffer, /*source_offset=*/0, actual_data.data(),
+ actual_data.size(), IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT,
+ iree_infinite_timeout()));
+ EXPECT_THAT(actual_data, ContainerEq(source_buffer));
+
+ iree_hal_command_buffer_release(command_buffer);
+ iree_hal_buffer_release(device_buffer);
+}
+
+TEST_P(command_buffer_test, UpdateBufferWithOffsets) {
+ iree_device_size_t target_buffer_size = 16;
+ std::vector<uint8_t> source_buffer{0x01, 0x02, 0x03, 0x04, //
+ 0x05, 0x06, 0x07, 0x08, //
+ 0xA1, 0xA2, 0xA3, 0xA4, //
+ 0xA5, 0xA6, 0xA7, 0xA8};
+
+ iree_hal_buffer_t* device_buffer = NULL;
+ CreateZeroedDeviceBuffer(target_buffer_size, &device_buffer);
+
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_CHECK_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_ANY, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+ IREE_CHECK_OK(iree_hal_command_buffer_begin(command_buffer));
+
+ // Issue the update_buffer command.
+ IREE_CHECK_OK(iree_hal_command_buffer_update_buffer(
+ command_buffer, source_buffer.data(), /*source_offset=*/4, device_buffer,
+ /*target_offset=*/4, /*length=*/8));
+ IREE_CHECK_OK(iree_hal_command_buffer_end(command_buffer));
+ IREE_CHECK_OK(SubmitCommandBufferAndWait(IREE_HAL_COMMAND_CATEGORY_ANY,
+ command_buffer));
+
+ // Check that the contents match what we expect.
+ std::vector<uint8_t> actual_data(target_buffer_size);
+ IREE_CHECK_OK(iree_hal_device_transfer_d2h(
+ device_, device_buffer, /*source_offset=*/0, actual_data.data(),
+ actual_data.size(), IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT,
+ iree_infinite_timeout()));
+ std::vector<uint8_t> reference_buffer{0x00, 0x00, 0x00, 0x00, //
+ 0x05, 0x06, 0x07, 0x08, //
+ 0xA1, 0xA2, 0xA3, 0xA4, //
+ 0x00, 0x00, 0x00, 0x00};
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+ iree_hal_command_buffer_release(command_buffer);
+ iree_hal_buffer_release(device_buffer);
+}
+
+TEST_P(command_buffer_test, UpdateBufferSubspan) {
+ iree_device_size_t target_buffer_size = 16;
+ std::vector<uint8_t> source_buffer{0x01, 0x02, 0x03, 0x04, //
+ 0x05, 0x06, 0x07, 0x08, //
+ 0xA1, 0xA2, 0xA3, 0xA4, //
+ 0xA5, 0xA6, 0xA7, 0xA8};
+
+ iree_hal_buffer_t* device_buffer = NULL;
+ CreateZeroedDeviceBuffer(target_buffer_size, &device_buffer);
+
+ // Create a subspan.
+ iree_device_size_t subspan_length = 8;
+ iree_hal_buffer_t* buffer_subspan;
+ IREE_ASSERT_OK(iree_hal_buffer_subspan(device_buffer, /*byte_offset=*/4,
+ subspan_length, &buffer_subspan));
+
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_CHECK_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_ANY, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+ IREE_CHECK_OK(iree_hal_command_buffer_begin(command_buffer));
+
+ // Issue the update_buffer command.
+ IREE_CHECK_OK(iree_hal_command_buffer_update_buffer(
+ command_buffer, source_buffer.data(), /*source_offset=*/4, buffer_subspan,
+ /*target_offset=*/4, /*length=*/4));
+ IREE_CHECK_OK(iree_hal_command_buffer_end(command_buffer));
+ IREE_CHECK_OK(SubmitCommandBufferAndWait(IREE_HAL_COMMAND_CATEGORY_ANY,
+ command_buffer));
+
+ // Check that the contents match what we expect.
+ std::vector<uint8_t> actual_data(target_buffer_size);
+ IREE_ASSERT_OK(iree_hal_device_transfer_d2h(
+ device_, device_buffer, /*source_offset=*/0, actual_data.data(),
+ actual_data.size(), IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT,
+ iree_infinite_timeout()));
+ std::vector<uint8_t> reference_buffer{0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00, //
+ 0x05, 0x06, 0x07, 0x08, //
+ 0x00, 0x00, 0x00, 0x00};
+ EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+ // Also check the subspan.
+ std::vector<uint8_t> actual_data_subspan(subspan_length);
+ IREE_ASSERT_OK(iree_hal_device_transfer_d2h(
+ device_, buffer_subspan, /*source_offset=*/0, actual_data_subspan.data(),
+ actual_data_subspan.size(), IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT,
+ iree_infinite_timeout()));
+ std::vector<uint8_t> reference_buffer_subspan{0x00, 0x00, 0x00, 0x00, //
+ 0x05, 0x06, 0x07, 0x08};
+ EXPECT_THAT(actual_data_subspan, ContainerEq(reference_buffer_subspan));
+
+ iree_hal_command_buffer_release(command_buffer);
+ iree_hal_buffer_release(buffer_subspan);
+ iree_hal_buffer_release(device_buffer);
+}
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_CTS_COMMAND_BUFFER_TEST_H_
diff --git a/runtime/src/iree/hal/cts/cts_test_base.h b/runtime/src/iree/hal/cts/cts_test_base.h
new file mode 100644
index 0000000..32e4431
--- /dev/null
+++ b/runtime/src/iree/hal/cts/cts_test_base.h
@@ -0,0 +1,179 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CTS_CTS_TEST_BASE_H_
+#define IREE_HAL_CTS_CTS_TEST_BASE_H_
+
+#include <set>
+#include <string>
+
+#include "iree/base/api.h"
+#include "iree/base/string_view.h"
+#include "iree/hal/api.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+// Registers the driver that will be used with INSTANTIATE_TEST_SUITE_P.
+// Leaf test binaries must implement this function.
+iree_status_t register_test_driver(iree_hal_driver_registry_t* registry);
+
+// Returns the executable format for the driver under test.
+// Leaf test binaries must implement this function.
+const char* get_test_executable_format();
+
+// Returns a file's executable data for the driver under test.
+// Leaf test binaries must implement this function.
+iree_const_byte_span_t get_test_executable_data(iree_string_view_t file_name);
+
+// Common setup for tests parameterized on driver names.
+class CtsTestBase : public ::testing::TestWithParam<std::string> {
+ protected:
+ static void SetUpTestSuite() {
+ IREE_CHECK_OK(register_test_driver(iree_hal_driver_registry_default()));
+ }
+
+ virtual void SetUp() {
+ const std::string& driver_name = GetParam();
+
+ // Get driver with the given name and create its default device.
+ // Skip drivers that are (gracefully) unavailable, fail if creation fails.
+ iree_hal_driver_t* driver = NULL;
+ iree_status_t status = TryGetDriver(driver_name, &driver);
+ if (iree_status_is_unavailable(status)) {
+ iree_status_free(status);
+ IREE_LOG(WARNING) << "Skipping test as '" << driver_name
+ << "' driver is unavailable";
+ GTEST_SKIP();
+ return;
+ }
+ IREE_ASSERT_OK(status);
+ driver_ = driver;
+
+ iree_hal_device_t* device = NULL;
+ status = iree_hal_driver_create_default_device(
+ driver_, iree_allocator_system(), &device);
+ if (iree_status_is_unavailable(status)) {
+ iree_status_free(status);
+ IREE_LOG(WARNING) << "Skipping test as default device for '"
+ << driver_name << "' driver is unavailable";
+ GTEST_SKIP();
+ return;
+ }
+ IREE_ASSERT_OK(status);
+ iree_status_free(status);
+ device_ = device;
+
+ device_allocator_ = iree_hal_device_allocator(device_);
+ iree_hal_allocator_retain(device_allocator_);
+ }
+
+ virtual void TearDown() {
+ if (device_allocator_) {
+ iree_hal_allocator_release(device_allocator_);
+ device_allocator_ = NULL;
+ }
+ if (device_) {
+ iree_hal_device_release(device_);
+ device_ = NULL;
+ }
+ if (driver_) {
+ iree_hal_driver_release(driver_);
+ driver_ = NULL;
+ }
+ }
+
+ // Submits |command_buffer| to the device and waits for it to complete before
+ // returning.
+ iree_status_t SubmitCommandBufferAndWait(
+ iree_hal_command_category_t command_categories,
+ iree_hal_command_buffer_t* command_buffer) {
+ iree_hal_semaphore_t* signal_semaphore = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_semaphore_create(device_, 0ull, &signal_semaphore));
+
+ iree_hal_submission_batch_t submission_batch;
+
+ // No wait semaphores.
+ submission_batch.wait_semaphores.count = 0;
+ submission_batch.wait_semaphores.semaphores = NULL;
+ submission_batch.wait_semaphores.payload_values = NULL;
+
+ iree_hal_command_buffer_t* command_buffer_ptrs[] = {command_buffer};
+ submission_batch.command_buffer_count = IREE_ARRAYSIZE(command_buffer_ptrs);
+ submission_batch.command_buffers = command_buffer_ptrs;
+
+ // One signal semaphore from 0 -> 1.
+ iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore};
+ uint64_t payload_values[] = {1ull};
+ submission_batch.signal_semaphores.count =
+ IREE_ARRAYSIZE(signal_semaphore_ptrs);
+ submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
+ submission_batch.signal_semaphores.payload_values = payload_values;
+
+ iree_status_t status =
+ iree_hal_device_queue_submit(device_, command_categories,
+ /*queue_affinity=*/0,
+ /*batch_count=*/1, &submission_batch);
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_semaphore_wait(signal_semaphore, 1ull,
+ iree_infinite_timeout());
+ }
+
+ iree_hal_semaphore_release(signal_semaphore);
+ return status;
+ }
+
+ iree_hal_driver_t* driver_ = NULL;
+ iree_hal_device_t* device_ = NULL;
+ iree_hal_allocator_t* device_allocator_ = NULL;
+
+ private:
+ // Gets a HAL driver with the provided name, if available.
+ static iree_status_t TryGetDriver(const std::string& driver_name,
+ iree_hal_driver_t** out_driver) {
+ static std::set<std::string> unavailable_driver_names;
+
+ // If creation failed before, don't try again.
+ if (unavailable_driver_names.find(driver_name) !=
+ unavailable_driver_names.end()) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE, "driver unavailable");
+ }
+
+ // No existing driver, attempt to create.
+ iree_hal_driver_t* driver = NULL;
+ iree_status_t status = iree_hal_driver_registry_try_create_by_name(
+ iree_hal_driver_registry_default(),
+ iree_make_string_view(driver_name.data(), driver_name.size()),
+ iree_allocator_system(), &driver);
+ if (iree_status_is_unavailable(status)) {
+ unavailable_driver_names.insert(driver_name);
+ }
+ if (iree_status_is_ok(status)) {
+ *out_driver = driver;
+ }
+ return status;
+ }
+};
+
+struct GenerateTestName {
+ template <class ParamType>
+ std::string operator()(
+ const ::testing::TestParamInfo<ParamType>& info) const {
+ std::string name = info.param;
+ std::replace(name.begin(), name.end(), '-', '_');
+ return name;
+ }
+};
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_CTS_CTS_TEST_BASE_H_
diff --git a/runtime/src/iree/hal/cts/cts_test_template.cc.in b/runtime/src/iree/hal/cts/cts_test_template.cc.in
new file mode 100644
index 0000000..7783f5e
--- /dev/null
+++ b/runtime/src/iree/hal/cts/cts_test_template.cc.in
@@ -0,0 +1,63 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// clang-format off
+#cmakedefine IREE_CTS_TEST_FILE_PATH "@IREE_CTS_TEST_FILE_PATH@"
+#cmakedefine IREE_CTS_DRIVER_REGISTRATION_HDR "@IREE_CTS_DRIVER_REGISTRATION_HDR@"
+#cmakedefine IREE_CTS_DRIVER_REGISTRATION_FN @IREE_CTS_DRIVER_REGISTRATION_FN@
+#cmakedefine IREE_CTS_TEST_CLASS_NAME @IREE_CTS_TEST_CLASS_NAME@
+#cmakedefine IREE_CTS_DRIVER_NAME "@IREE_CTS_DRIVER_NAME@"
+#cmakedefine IREE_CTS_EXECUTABLE_FORMAT @IREE_CTS_EXECUTABLE_FORMAT@
+#cmakedefine IREE_CTS_EXECUTABLES_TESTDATA_HDR "@IREE_CTS_EXECUTABLES_TESTDATA_HDR@"
+// clang-format on
+
+#include IREE_CTS_TEST_FILE_PATH
+
+#include IREE_CTS_DRIVER_REGISTRATION_HDR
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/testing/gtest.h"
+
+#ifdef IREE_CTS_EXECUTABLES_TESTDATA_HDR
+#include IREE_CTS_EXECUTABLES_TESTDATA_HDR
+#endif
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+iree_status_t register_test_driver(iree_hal_driver_registry_t* registry) {
+ return IREE_CTS_DRIVER_REGISTRATION_FN(registry);
+}
+
+const char* get_test_executable_format() {
+#ifdef IREE_CTS_EXECUTABLE_FORMAT
+ return IREE_CTS_EXECUTABLE_FORMAT;
+#else
+ return "UNDEFINED";
+#endif
+}
+
+iree_const_byte_span_t get_test_executable_data(iree_string_view_t file_name) {
+#ifdef IREE_CTS_EXECUTABLES_TESTDATA_HDR
+ const struct iree_file_toc_t* toc = iree_cts_testdata_executables_create();
+ for (size_t i = 0; i < iree_cts_testdata_executables_size(); ++i) {
+ const auto& file = toc[i];
+ if (iree_string_view_equal(file_name, iree_make_cstring_view(file.name))) {
+ return iree_make_const_byte_span(file.data, file.size);
+ }
+ }
+ // TODO(scotttodd): error handling / reporting? This a sharp edge.
+#endif
+ return iree_const_byte_span_empty();
+}
+
+INSTANTIATE_TEST_SUITE_P(CTS, IREE_CTS_TEST_CLASS_NAME,
+ ::testing::Values(IREE_CTS_DRIVER_NAME),
+ GenerateTestName());
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
diff --git a/runtime/src/iree/hal/cts/descriptor_set_layout_test.h b/runtime/src/iree/hal/cts/descriptor_set_layout_test.h
new file mode 100644
index 0000000..2b5515c
--- /dev/null
+++ b/runtime/src/iree/hal/cts/descriptor_set_layout_test.h
@@ -0,0 +1,75 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CTS_DESCRIPTOR_SET_LAYOUT_TEST_H_
+#define IREE_HAL_CTS_DESCRIPTOR_SET_LAYOUT_TEST_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+class descriptor_set_layout_test : public CtsTestBase {};
+
+// Note: bindingCount == 0 is valid in VkDescriptorSetLayoutCreateInfo:
+// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkDescriptorSetLayoutCreateInfo.html
+TEST_P(descriptor_set_layout_test, CreateWithNoBindings) {
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout = NULL;
+ IREE_ASSERT_OK(iree_hal_descriptor_set_layout_create(
+ device_, IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_IMMUTABLE,
+ /*binding_count=*/0,
+ /*bindings=*/NULL, &descriptor_set_layout));
+ iree_hal_descriptor_set_layout_release(descriptor_set_layout);
+}
+
+TEST_P(descriptor_set_layout_test, CreateWithOneBinding) {
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout = NULL;
+ iree_hal_descriptor_set_layout_binding_t descriptor_set_layout_bindings[] = {
+ {/*binding=*/0, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ };
+ IREE_ASSERT_OK(iree_hal_descriptor_set_layout_create(
+ device_, IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_IMMUTABLE,
+ IREE_ARRAYSIZE(descriptor_set_layout_bindings),
+ descriptor_set_layout_bindings, &descriptor_set_layout));
+ iree_hal_descriptor_set_layout_release(descriptor_set_layout);
+}
+
+TEST_P(descriptor_set_layout_test, CreateWithTwoBindings) {
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout = NULL;
+ iree_hal_descriptor_set_layout_binding_t descriptor_set_layout_bindings[] = {
+ {/*binding=*/0, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ {/*binding=*/1, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ };
+ IREE_ASSERT_OK(iree_hal_descriptor_set_layout_create(
+ device_, IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_IMMUTABLE,
+ IREE_ARRAYSIZE(descriptor_set_layout_bindings),
+ descriptor_set_layout_bindings, &descriptor_set_layout));
+ iree_hal_descriptor_set_layout_release(descriptor_set_layout);
+}
+
+TEST_P(descriptor_set_layout_test, CreateWithPushDescriptorType) {
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout = NULL;
+ iree_hal_descriptor_set_layout_binding_t descriptor_set_layout_bindings[] = {
+ {/*binding=*/0, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ {/*binding=*/1, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ };
+ IREE_ASSERT_OK(iree_hal_descriptor_set_layout_create(
+ device_, IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_PUSH_ONLY,
+ IREE_ARRAYSIZE(descriptor_set_layout_bindings),
+ descriptor_set_layout_bindings, &descriptor_set_layout));
+ iree_hal_descriptor_set_layout_release(descriptor_set_layout);
+}
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_CTS_DESCRIPTOR_SET_LAYOUT_TEST_H_
diff --git a/runtime/src/iree/hal/cts/descriptor_set_test.h b/runtime/src/iree/hal/cts/descriptor_set_test.h
new file mode 100644
index 0000000..241ad95
--- /dev/null
+++ b/runtime/src/iree/hal/cts/descriptor_set_test.h
@@ -0,0 +1,72 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CTS_DESCRIPTOR_SET_TEST_H_
+#define IREE_HAL_CTS_DESCRIPTOR_SET_TEST_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+class descriptor_set_test : public CtsTestBase {};
+
+TEST_P(descriptor_set_test, CreateWithNoBindings) {
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout = NULL;
+ IREE_ASSERT_OK(iree_hal_descriptor_set_layout_create(
+ device_, IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_IMMUTABLE,
+ /*binding_count=*/0,
+ /*bindings=*/NULL, &descriptor_set_layout));
+
+ iree_hal_descriptor_set_t* descriptor_set = NULL;
+ IREE_ASSERT_OK(iree_hal_descriptor_set_create(
+ device_, descriptor_set_layout, /*binding_count=*/0,
+ /*bindings=*/NULL, &descriptor_set));
+
+ // The descriptor set struct is an opaque handle. We can't test for much more
+ // than successful creation.
+
+ iree_hal_descriptor_set_release(descriptor_set);
+ iree_hal_descriptor_set_layout_release(descriptor_set_layout);
+}
+
+TEST_P(descriptor_set_test, CreateWithTwoBindings) {
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout = NULL;
+ iree_hal_descriptor_set_layout_binding_t descriptor_set_layout_bindings[] = {
+ {/*binding=*/0, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ {/*binding=*/1, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ };
+ IREE_ASSERT_OK(iree_hal_descriptor_set_layout_create(
+ device_, IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_IMMUTABLE,
+ IREE_ARRAYSIZE(descriptor_set_layout_bindings),
+ descriptor_set_layout_bindings, &descriptor_set_layout));
+
+ iree_hal_descriptor_set_binding_t descriptor_set_bindings[] = {
+ {/*binding=*/0, /*buffer=*/NULL, /*offset=*/0, /*length=*/0},
+ {/*binding=*/1, /*buffer=*/NULL, /*offset=*/0, /*length=*/0},
+ };
+ iree_hal_descriptor_set_t* descriptor_set = NULL;
+ IREE_ASSERT_OK(iree_hal_descriptor_set_create(
+ device_, descriptor_set_layout, IREE_ARRAYSIZE(descriptor_set_bindings),
+ descriptor_set_bindings, &descriptor_set));
+
+ // The descriptor set struct is an opaque handle. We can't test for much more
+ // than successful creation.
+
+ iree_hal_descriptor_set_release(descriptor_set);
+ iree_hal_descriptor_set_layout_release(descriptor_set_layout);
+}
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_CTS_DESCRIPTOR_SET_TEST_H_
diff --git a/runtime/src/iree/hal/cts/driver_test.h b/runtime/src/iree/hal/cts/driver_test.h
new file mode 100644
index 0000000..0ebb947
--- /dev/null
+++ b/runtime/src/iree/hal/cts/driver_test.h
@@ -0,0 +1,53 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CTS_DRIVER_TEST_H_
+#define IREE_HAL_CTS_DRIVER_TEST_H_
+
+#include <iostream>
+#include <string>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+class driver_test : public CtsTestBase {};
+
+TEST_P(driver_test, QueryAndCreateAvailableDevices) {
+ iree_hal_device_info_t* device_infos = NULL;
+ iree_host_size_t device_info_count;
+ IREE_ASSERT_OK(iree_hal_driver_query_available_devices(
+ driver_, iree_allocator_system(), &device_infos, &device_info_count));
+
+ std::cout << "Driver has " << device_info_count << " device(s)";
+ for (iree_host_size_t i = 0; i < device_info_count; ++i) {
+ std::cout << " Creating device '"
+ << std::string(device_infos[i].name.data,
+ device_infos[i].name.size)
+ << "'";
+ iree_hal_device_t* device = NULL;
+ IREE_ASSERT_OK(iree_hal_driver_create_device(
+ driver_, device_infos[i].device_id, iree_allocator_system(), &device));
+ iree_string_view_t device_id = iree_hal_device_id(device);
+ std::cout << " Created device with id: '"
+ << std::string(device_id.data, device_id.size) << "'";
+ iree_hal_device_release(device);
+ }
+
+ iree_allocator_free(iree_allocator_system(), device_infos);
+}
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_CTS_DRIVER_TEST_H_
diff --git a/runtime/src/iree/hal/cts/event_test.h b/runtime/src/iree/hal/cts/event_test.h
new file mode 100644
index 0000000..7bc1769
--- /dev/null
+++ b/runtime/src/iree/hal/cts/event_test.h
@@ -0,0 +1,123 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CTS_EVENT_TEST_H_
+#define IREE_HAL_CTS_EVENT_TEST_H_
+
+#include <cstdint>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+class event_test : public CtsTestBase {};
+
+TEST_P(event_test, Create) {
+ iree_hal_event_t* event = NULL;
+ IREE_ASSERT_OK(iree_hal_event_create(device_, &event));
+ iree_hal_event_release(event);
+}
+
+TEST_P(event_test, SignalAndReset) {
+ iree_hal_event_t* event = NULL;
+ IREE_ASSERT_OK(iree_hal_event_create(device_, &event));
+
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+
+ IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
+ IREE_ASSERT_OK(iree_hal_command_buffer_signal_event(
+ command_buffer, event, IREE_HAL_EXECUTION_STAGE_COMMAND_PROCESS));
+ IREE_ASSERT_OK(iree_hal_command_buffer_reset_event(
+ command_buffer, event, IREE_HAL_EXECUTION_STAGE_COMMAND_RETIRE));
+ IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
+
+ IREE_ASSERT_OK(SubmitCommandBufferAndWait(IREE_HAL_COMMAND_CATEGORY_DISPATCH,
+ command_buffer));
+
+ iree_hal_event_release(event);
+ iree_hal_command_buffer_release(command_buffer);
+}
+
+TEST_P(event_test, SubmitWithChainedCommandBuffers) {
+ iree_hal_event_t* event = NULL;
+ IREE_ASSERT_OK(iree_hal_event_create(device_, &event));
+
+ iree_hal_command_buffer_t* command_buffer_1 = NULL;
+ iree_hal_command_buffer_t* command_buffer_2 = NULL;
+ IREE_ASSERT_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer_1));
+ IREE_ASSERT_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer_2));
+
+ // First command buffer signals the event when it completes.
+ IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer_1));
+ IREE_ASSERT_OK(iree_hal_command_buffer_signal_event(
+ command_buffer_1, event, IREE_HAL_EXECUTION_STAGE_COMMAND_RETIRE));
+ IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer_1));
+
+ // Second command buffer waits on the event before starting.
+ IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer_2));
+ const iree_hal_event_t* event_pts[] = {event};
+ // TODO(scotttodd): verify execution stage usage (check Vulkan spec)
+ IREE_ASSERT_OK(iree_hal_command_buffer_wait_events(
+ command_buffer_2, IREE_ARRAYSIZE(event_pts), event_pts,
+ /*source_stage_mask=*/IREE_HAL_EXECUTION_STAGE_COMMAND_RETIRE,
+ /*target_stage_mask=*/IREE_HAL_EXECUTION_STAGE_COMMAND_ISSUE,
+ /*memory_barrier_count=*/0,
+ /*memory_barriers=*/NULL, /*buffer_barrier_count=*/0,
+ /*buffer_barriers=*/NULL));
+ IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer_2));
+
+ // No wait semaphores, one signal which we immediately wait on after submit.
+ iree_hal_submission_batch_t submission_batch;
+ submission_batch.wait_semaphores.count = 0;
+ submission_batch.wait_semaphores.semaphores = NULL;
+ submission_batch.wait_semaphores.payload_values = NULL;
+ iree_hal_command_buffer_t* command_buffer_ptrs[] = {command_buffer_1,
+ command_buffer_2};
+ submission_batch.command_buffer_count = IREE_ARRAYSIZE(command_buffer_ptrs);
+ submission_batch.command_buffers = command_buffer_ptrs;
+ iree_hal_semaphore_t* signal_semaphore;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &signal_semaphore));
+ iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore};
+ submission_batch.signal_semaphores.count =
+ IREE_ARRAYSIZE(signal_semaphore_ptrs);
+ submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
+ uint64_t payload_values[] = {1ull};
+ submission_batch.signal_semaphores.payload_values = payload_values;
+
+ IREE_ASSERT_OK(
+ iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
+ /*queue_affinity=*/0,
+ /*batch_count=*/1, &submission_batch));
+ IREE_ASSERT_OK(
+ iree_hal_semaphore_wait(signal_semaphore, 1ull, iree_infinite_timeout()));
+
+ iree_hal_command_buffer_release(command_buffer_1);
+ iree_hal_command_buffer_release(command_buffer_2);
+ iree_hal_semaphore_release(signal_semaphore);
+ iree_hal_event_release(event);
+}
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_CTS_EVENT_TEST_H_
diff --git a/runtime/src/iree/hal/cts/executable_cache_test.h b/runtime/src/iree/hal/cts/executable_cache_test.h
new file mode 100644
index 0000000..f8c23b2
--- /dev/null
+++ b/runtime/src/iree/hal/cts/executable_cache_test.h
@@ -0,0 +1,96 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CTS_EXECUTABLE_CACHE_TEST_H_
+#define IREE_HAL_CTS_EXECUTABLE_CACHE_TEST_H_
+
+#include "iree/base/api.h"
+#include "iree/base/string_view.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+class executable_cache_test : public CtsTestBase {};
+
+TEST_P(executable_cache_test, Create) {
+ iree_status_t loop_status = iree_ok_status();
+ iree_hal_executable_cache_t* executable_cache = NULL;
+ IREE_ASSERT_OK(iree_hal_executable_cache_create(
+ device_, iree_make_cstring_view("default"),
+ iree_loop_inline(&loop_status), &executable_cache));
+
+ iree_hal_executable_cache_release(executable_cache);
+ IREE_ASSERT_OK(loop_status);
+}
+
+TEST_P(executable_cache_test, CantPrepareUnknownFormat) {
+ iree_status_t loop_status = iree_ok_status();
+ iree_hal_executable_cache_t* executable_cache = NULL;
+ IREE_ASSERT_OK(iree_hal_executable_cache_create(
+ device_, iree_make_cstring_view("default"),
+ iree_loop_inline(&loop_status), &executable_cache));
+
+ EXPECT_FALSE(iree_hal_executable_cache_can_prepare_format(
+ executable_cache, /*caching_mode=*/0, iree_make_cstring_view("FOO?")));
+
+ iree_hal_executable_cache_release(executable_cache);
+ IREE_ASSERT_OK(loop_status);
+}
+
+TEST_P(executable_cache_test, PrepareExecutable) {
+ iree_status_t loop_status = iree_ok_status();
+ iree_hal_executable_cache_t* executable_cache = NULL;
+ IREE_ASSERT_OK(iree_hal_executable_cache_create(
+ device_, iree_make_cstring_view("default"),
+ iree_loop_inline(&loop_status), &executable_cache));
+
+ // Note: this layout must match the testdata executable.
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout = NULL;
+ iree_hal_descriptor_set_layout_binding_t descriptor_set_layout_bindings[] = {
+ {0, IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ {1, IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ };
+ IREE_ASSERT_OK(iree_hal_descriptor_set_layout_create(
+ device_, IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_IMMUTABLE,
+ IREE_ARRAYSIZE(descriptor_set_layout_bindings),
+ descriptor_set_layout_bindings, &descriptor_set_layout));
+ iree_hal_executable_layout_t* executable_layout;
+ IREE_ASSERT_OK(iree_hal_executable_layout_create(
+ device_, /*push_constants=*/0, /*set_layout_count=*/1,
+ &descriptor_set_layout, &executable_layout));
+
+ iree_hal_executable_params_t executable_params;
+ iree_hal_executable_params_initialize(&executable_params);
+ executable_params.caching_mode =
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ALIAS_PROVIDED_DATA;
+ executable_params.executable_format =
+ iree_make_cstring_view(get_test_executable_format());
+ executable_params.executable_data = get_test_executable_data(
+ iree_make_cstring_view("executable_cache_test.bin"));
+ executable_params.executable_layout_count = 1;
+ executable_params.executable_layouts = &executable_layout;
+
+ iree_hal_executable_t* executable = NULL;
+ IREE_ASSERT_OK(iree_hal_executable_cache_prepare_executable(
+ executable_cache, &executable_params, &executable));
+
+ iree_hal_executable_release(executable);
+ iree_hal_executable_layout_release(executable_layout);
+ iree_hal_descriptor_set_layout_release(descriptor_set_layout);
+ iree_hal_executable_cache_release(executable_cache);
+ IREE_ASSERT_OK(loop_status);
+}
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_CTS_EXECUTABLE_CACHE_TEST_H_
diff --git a/runtime/src/iree/hal/cts/executable_layout_test.h b/runtime/src/iree/hal/cts/executable_layout_test.h
new file mode 100644
index 0000000..8af3e60
--- /dev/null
+++ b/runtime/src/iree/hal/cts/executable_layout_test.h
@@ -0,0 +1,97 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CTS_EXECUTABLE_LAYOUT_TEST_H_
+#define IREE_HAL_CTS_EXECUTABLE_LAYOUT_TEST_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+class executable_layout_test : public CtsTestBase {};
+
+TEST_P(executable_layout_test, CreateWithNoLayouts) {
+ iree_hal_executable_layout_t* executable_layout = NULL;
+ IREE_ASSERT_OK(iree_hal_executable_layout_create(
+ device_, /*push_constants=*/0, /*set_layout_count=*/0, NULL,
+ &executable_layout));
+
+ iree_hal_executable_layout_release(executable_layout);
+}
+
+TEST_P(executable_layout_test, CreateWithPushConstants) {
+ iree_hal_executable_layout_t* executable_layout = NULL;
+ // Note: The Vulkan maxPushConstantsSize limit must be at least 128 bytes:
+ // https://www.khronos.org/registry/vulkan/specs/1.2/html/vkspec.html#limits-minmax
+ IREE_ASSERT_OK(iree_hal_executable_layout_create(
+ device_, /*push_constants=*/5, /*set_layout_count=*/0, NULL,
+ &executable_layout));
+
+ iree_hal_executable_layout_release(executable_layout);
+}
+
+TEST_P(executable_layout_test, CreateWithOneLayout) {
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout = NULL;
+ iree_hal_descriptor_set_layout_binding_t descriptor_set_layout_bindings[] = {
+ {/*binding=*/0, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ {/*binding=*/1, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ };
+ IREE_ASSERT_OK(iree_hal_descriptor_set_layout_create(
+ device_, IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_IMMUTABLE,
+ IREE_ARRAYSIZE(descriptor_set_layout_bindings),
+ descriptor_set_layout_bindings, &descriptor_set_layout));
+
+ iree_hal_executable_layout_t* executable_layout = NULL;
+ IREE_ASSERT_OK(iree_hal_executable_layout_create(
+ device_, /*push_constants=*/0, /*set_layout_count=*/1,
+ &descriptor_set_layout, &executable_layout));
+
+ iree_hal_executable_layout_release(executable_layout);
+ iree_hal_descriptor_set_layout_release(descriptor_set_layout);
+}
+
+TEST_P(executable_layout_test, CreateWithTwoLayouts) {
+ iree_hal_descriptor_set_layout_t* descriptor_set_layouts[2] = {NULL};
+ iree_hal_descriptor_set_layout_binding_t layout_bindings_0[] = {
+ {/*binding=*/0, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ {/*binding=*/1, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ };
+ IREE_ASSERT_OK(iree_hal_descriptor_set_layout_create(
+ device_, IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_IMMUTABLE,
+ IREE_ARRAYSIZE(layout_bindings_0), layout_bindings_0,
+ &descriptor_set_layouts[0]));
+
+ iree_hal_descriptor_set_layout_binding_t layout_bindings_1[] = {
+ {/*binding=*/0, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ {/*binding=*/1, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ {/*binding=*/2, /*type=*/IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER},
+ };
+ IREE_ASSERT_OK(iree_hal_descriptor_set_layout_create(
+ device_, IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_IMMUTABLE,
+ IREE_ARRAYSIZE(layout_bindings_1), layout_bindings_1,
+ &descriptor_set_layouts[1]));
+
+ iree_hal_executable_layout_t* executable_layout = NULL;
+ IREE_ASSERT_OK(iree_hal_executable_layout_create(
+ device_, /*push_constants=*/0, IREE_ARRAYSIZE(descriptor_set_layouts),
+ descriptor_set_layouts, &executable_layout));
+
+ iree_hal_executable_layout_release(executable_layout);
+ iree_hal_descriptor_set_layout_release(descriptor_set_layouts[0]);
+ iree_hal_descriptor_set_layout_release(descriptor_set_layouts[1]);
+}
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_CTS_EXECUTABLE_LAYOUT_TEST_H_
diff --git a/runtime/src/iree/hal/cts/semaphore_submission_test.h b/runtime/src/iree/hal/cts/semaphore_submission_test.h
new file mode 100644
index 0000000..0c41c76
--- /dev/null
+++ b/runtime/src/iree/hal/cts/semaphore_submission_test.h
@@ -0,0 +1,209 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CTS_SEMAPHORE_SUBMISSION_TEST_H_
+#define IREE_HAL_CTS_SEMAPHORE_SUBMISSION_TEST_H_
+
+#include <cstdint>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+class semaphore_submission_test : public CtsTestBase {};
+
+TEST_P(semaphore_submission_test, SubmitWithNoCommandBuffers) {
+ // No waits, one signal which we immediately wait on after submit.
+ iree_hal_submission_batch_t submission_batch;
+ submission_batch.wait_semaphores.count = 0;
+ submission_batch.wait_semaphores.semaphores = NULL;
+ submission_batch.wait_semaphores.payload_values = NULL;
+ submission_batch.command_buffer_count = 0;
+ submission_batch.command_buffers = NULL;
+ iree_hal_semaphore_t* signal_semaphore = NULL;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &signal_semaphore));
+ iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore};
+ submission_batch.signal_semaphores.count =
+ IREE_ARRAYSIZE(signal_semaphore_ptrs);
+ submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
+ uint64_t payload_values[] = {1ull};
+ submission_batch.signal_semaphores.payload_values = payload_values;
+
+ IREE_ASSERT_OK(
+ iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
+ /*queue_affinity=*/0,
+ /*batch_count=*/1, &submission_batch));
+ IREE_ASSERT_OK(
+ iree_hal_semaphore_wait(signal_semaphore, 1ull, iree_infinite_timeout()));
+
+ iree_hal_semaphore_release(signal_semaphore);
+}
+
+TEST_P(semaphore_submission_test, SubmitAndSignal) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+
+ IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
+ IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
+
+ // No waits, one signal which we immediately wait on after submit.
+ iree_hal_submission_batch_t submission_batch;
+ submission_batch.wait_semaphores.count = 0;
+ submission_batch.wait_semaphores.semaphores = NULL;
+ submission_batch.wait_semaphores.payload_values = NULL;
+ submission_batch.command_buffer_count = 1;
+ submission_batch.command_buffers = &command_buffer;
+ iree_hal_semaphore_t* signal_semaphore = NULL;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &signal_semaphore));
+ iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore};
+ submission_batch.signal_semaphores.count =
+ IREE_ARRAYSIZE(signal_semaphore_ptrs);
+ submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
+ uint64_t payload_values[] = {1ull};
+ submission_batch.signal_semaphores.payload_values = payload_values;
+
+ IREE_ASSERT_OK(
+ iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
+ /*queue_affinity=*/0,
+ /*batch_count=*/1, &submission_batch));
+ IREE_ASSERT_OK(
+ iree_hal_semaphore_wait(signal_semaphore, 1ull, iree_infinite_timeout()));
+
+ iree_hal_command_buffer_release(command_buffer);
+ iree_hal_semaphore_release(signal_semaphore);
+}
+
+TEST_P(semaphore_submission_test, SubmitWithWait) {
+ // Empty command buffer.
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+ IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
+ IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
+
+ // One wait and one signal semaphore.
+ iree_hal_submission_batch_t submission_batch;
+ iree_hal_semaphore_t* wait_semaphore = NULL;
+ iree_hal_semaphore_t* signal_semaphore = NULL;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &wait_semaphore));
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 100ull, &signal_semaphore));
+ iree_hal_semaphore_t* wait_semaphore_ptrs[] = {wait_semaphore};
+ iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore};
+ uint64_t wait_payload_values[] = {1ull};
+ uint64_t signal_payload_values[] = {101ull};
+ submission_batch.wait_semaphores.count = IREE_ARRAYSIZE(wait_semaphore_ptrs);
+ submission_batch.wait_semaphores.semaphores = wait_semaphore_ptrs;
+ submission_batch.wait_semaphores.payload_values = wait_payload_values;
+ submission_batch.command_buffer_count = 1;
+ submission_batch.command_buffers = &command_buffer;
+ submission_batch.signal_semaphores.count =
+ IREE_ARRAYSIZE(signal_semaphore_ptrs);
+ submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
+ submission_batch.signal_semaphores.payload_values = signal_payload_values;
+
+ IREE_ASSERT_OK(
+ iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
+ /*queue_affinity=*/0,
+ /*batch_count=*/1, &submission_batch));
+
+ // Work shouldn't start until the wait semaphore reaches its payload value.
+ uint64_t value;
+ IREE_ASSERT_OK(iree_hal_semaphore_query(signal_semaphore, &value));
+ EXPECT_EQ(100ull, value);
+
+ // Signal the wait semaphore, work should begin and complete.
+ IREE_ASSERT_OK(iree_hal_semaphore_signal(wait_semaphore, 1ull));
+ IREE_ASSERT_OK(iree_hal_semaphore_wait(signal_semaphore, 101ull,
+ iree_infinite_timeout()));
+
+ iree_hal_command_buffer_release(command_buffer);
+ iree_hal_semaphore_release(wait_semaphore);
+ iree_hal_semaphore_release(signal_semaphore);
+}
+
+TEST_P(semaphore_submission_test, SubmitWithMultipleSemaphores) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_ASSERT_OK(iree_hal_command_buffer_create(
+ device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+
+ IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
+ IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
+
+ iree_hal_submission_batch_t submission_batch;
+ iree_hal_semaphore_t* wait_semaphore_1 = NULL;
+ iree_hal_semaphore_t* wait_semaphore_2 = NULL;
+ iree_hal_semaphore_t* signal_semaphore_1 = NULL;
+ iree_hal_semaphore_t* signal_semaphore_2 = NULL;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &wait_semaphore_1));
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &wait_semaphore_2));
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &signal_semaphore_1));
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &signal_semaphore_2));
+ iree_hal_semaphore_t* wait_semaphore_ptrs[] = {wait_semaphore_1,
+ wait_semaphore_2};
+ iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore_1,
+ signal_semaphore_2};
+ uint64_t wait_payload_values[] = {1ull, 1ull};
+ uint64_t signal_payload_values[] = {1ull, 1ull};
+ submission_batch.wait_semaphores.count = IREE_ARRAYSIZE(wait_semaphore_ptrs);
+ submission_batch.wait_semaphores.semaphores = wait_semaphore_ptrs;
+ submission_batch.wait_semaphores.payload_values = wait_payload_values;
+ submission_batch.command_buffer_count = 1;
+ submission_batch.command_buffers = &command_buffer;
+ submission_batch.signal_semaphores.count =
+ IREE_ARRAYSIZE(signal_semaphore_ptrs);
+ submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
+ submission_batch.signal_semaphores.payload_values = signal_payload_values;
+
+ IREE_ASSERT_OK(
+ iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
+ /*queue_affinity=*/0,
+ /*batch_count=*/1, &submission_batch));
+
+ // Work shouldn't start until all wait semaphores reach their payload values.
+ uint64_t value;
+ IREE_ASSERT_OK(iree_hal_semaphore_query(signal_semaphore_1, &value));
+ EXPECT_EQ(0ull, value);
+ IREE_ASSERT_OK(iree_hal_semaphore_query(signal_semaphore_2, &value));
+ EXPECT_EQ(0ull, value);
+
+ // Signal the wait semaphores, work should begin and complete.
+ IREE_ASSERT_OK(iree_hal_semaphore_signal(wait_semaphore_1, 1ull));
+ IREE_ASSERT_OK(iree_hal_semaphore_signal(wait_semaphore_2, 1ull));
+
+ iree_hal_semaphore_list_t signal_semaphore_list;
+ signal_semaphore_list.count = IREE_ARRAYSIZE(signal_semaphore_ptrs);
+ signal_semaphore_list.semaphores = signal_semaphore_ptrs;
+ uint64_t payload_values[] = {1ull, 1ull};
+ signal_semaphore_list.payload_values = payload_values;
+ IREE_ASSERT_OK(iree_hal_device_wait_semaphores(
+ device_, IREE_HAL_WAIT_MODE_ALL, &signal_semaphore_list,
+ iree_infinite_timeout()));
+
+ iree_hal_command_buffer_release(command_buffer);
+ iree_hal_semaphore_release(wait_semaphore_1);
+ iree_hal_semaphore_release(wait_semaphore_2);
+ iree_hal_semaphore_release(signal_semaphore_1);
+ iree_hal_semaphore_release(signal_semaphore_2);
+}
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_CTS_SEMAPHORE_SUBMISSION_TEST_H_
diff --git a/runtime/src/iree/hal/cts/semaphore_test.h b/runtime/src/iree/hal/cts/semaphore_test.h
new file mode 100644
index 0000000..d44dd13
--- /dev/null
+++ b/runtime/src/iree/hal/cts/semaphore_test.h
@@ -0,0 +1,237 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CTS_SEMAPHORE_TEST_H_
+#define IREE_HAL_CTS_SEMAPHORE_TEST_H_
+
+#include <cstdint>
+#include <thread>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+class semaphore_test : public CtsTestBase {};
+
+// Tests that a semaphore that is unused properly cleans itself up.
+TEST_P(semaphore_test, NoOp) {
+ iree_hal_semaphore_t* semaphore = NULL;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 123ull, &semaphore));
+
+ uint64_t value;
+ IREE_ASSERT_OK(iree_hal_semaphore_query(semaphore, &value));
+ EXPECT_EQ(123ull, value);
+
+ iree_hal_semaphore_release(semaphore);
+}
+
+// Tests that a semaphore will accept new values as it is signaled.
+TEST_P(semaphore_test, NormalSignaling) {
+ iree_hal_semaphore_t* semaphore = NULL;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 2ull, &semaphore));
+
+ uint64_t value;
+ IREE_ASSERT_OK(iree_hal_semaphore_query(semaphore, &value));
+ EXPECT_EQ(2ull, value);
+ IREE_ASSERT_OK(iree_hal_semaphore_signal(semaphore, 3ull));
+ IREE_ASSERT_OK(iree_hal_semaphore_query(semaphore, &value));
+ EXPECT_EQ(3ull, value);
+ IREE_ASSERT_OK(iree_hal_semaphore_signal(semaphore, 40ull));
+ IREE_ASSERT_OK(iree_hal_semaphore_query(semaphore, &value));
+ EXPECT_EQ(40ull, value);
+
+ iree_hal_semaphore_release(semaphore);
+}
+
+// Note: Behavior is undefined when signaling with decreasing values, so we
+// can't reliably test it across backends. Some backends may return errors,
+// while others may accept the new, decreasing, values.
+
+// Tests semaphore failure handling.
+TEST_P(semaphore_test, Failure) {
+ iree_hal_semaphore_t* semaphore = NULL;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 2ull, &semaphore));
+
+ IREE_ASSERT_OK(iree_hal_semaphore_signal(semaphore, 3ull));
+ uint64_t value;
+ IREE_ASSERT_OK(iree_hal_semaphore_query(semaphore, &value));
+ EXPECT_EQ(3ull, value);
+
+ iree_hal_semaphore_fail(semaphore,
+ iree_status_from_code(IREE_STATUS_UNKNOWN));
+ EXPECT_TRUE(
+ iree_status_is_unknown(iree_hal_semaphore_query(semaphore, &value)));
+
+ // Signaling again is undefined behavior. Some backends may return a sticky
+ // failure status while others may silently process new signal values.
+
+ iree_hal_semaphore_release(semaphore);
+}
+
+// Tests waiting on no semaphores.
+TEST_P(semaphore_test, EmptyWait) {
+ IREE_ASSERT_OK(iree_hal_device_wait_semaphores(
+ device_, IREE_HAL_WAIT_MODE_ANY, NULL,
+ iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
+ IREE_ASSERT_OK(iree_hal_device_wait_semaphores(
+ device_, IREE_HAL_WAIT_MODE_ALL, NULL,
+ iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
+
+ IREE_ASSERT_OK(iree_hal_device_wait_semaphores(
+ device_, IREE_HAL_WAIT_MODE_ANY, NULL,
+ iree_make_timeout_ns(IREE_DURATION_INFINITE)));
+ IREE_ASSERT_OK(iree_hal_device_wait_semaphores(
+ device_, IREE_HAL_WAIT_MODE_ALL, NULL,
+ iree_make_timeout_ns(IREE_DURATION_INFINITE)));
+}
+
+// Tests waiting on a semaphore that has already been signaled.
+// **Never completes when using SwiftShader**
+TEST_P(semaphore_test, DISABLED_WaitAlreadySignaled) {
+ iree_hal_semaphore_t* semaphore = NULL;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 2ull, &semaphore));
+
+ // Test both previous and current values.
+ IREE_ASSERT_OK(iree_hal_semaphore_wait(
+ semaphore, 1ull, iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
+ IREE_ASSERT_OK(iree_hal_semaphore_wait(
+ semaphore, 2ull, iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
+
+ IREE_ASSERT_OK(iree_hal_semaphore_wait(
+ semaphore, 1ull, iree_make_timeout_ns(IREE_DURATION_INFINITE)));
+ IREE_ASSERT_OK(iree_hal_semaphore_wait(
+ semaphore, 2ull, iree_make_timeout_ns(IREE_DURATION_INFINITE)));
+
+ iree_hal_semaphore_release(semaphore);
+}
+
+// Tests waiting on a semaphore that has not been signaled.
+TEST_P(semaphore_test, WaitUnsignaled) {
+ iree_hal_semaphore_t* semaphore = NULL;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 2ull, &semaphore));
+
+ // NOTE: we don't actually block here because otherwise we'd lock up.
+ // Result status is undefined - some backends may return DeadlineExceededError
+ // while others may return success.
+ IREE_IGNORE_ERROR(iree_hal_semaphore_wait(
+ semaphore, 3ull, iree_make_deadline(IREE_TIME_INFINITE_PAST)));
+
+ iree_hal_semaphore_release(semaphore);
+}
+
+// Waiting on a failed semaphore is undefined behavior. Some backends may
+// return UnknownError while others may succeed.
+
+// Tests IREE_HAL_WAIT_MODE_ALL when not all are signaled.
+TEST_P(semaphore_test, WaitAllButNotAllSignaled) {
+ iree_hal_semaphore_t* semaphore_a = NULL;
+ iree_hal_semaphore_t* semaphore_b = NULL;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &semaphore_a));
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 1ull, &semaphore_b));
+
+ iree_hal_semaphore_list_t semaphore_list;
+ iree_hal_semaphore_t* semaphore_ptrs[] = {semaphore_a, semaphore_b};
+ semaphore_list.count = IREE_ARRAYSIZE(semaphore_ptrs);
+ semaphore_list.semaphores = semaphore_ptrs;
+ uint64_t payload_values[] = {1ull, 1ull};
+ semaphore_list.payload_values = payload_values;
+
+ // NOTE: we don't actually block here because otherwise we'd lock up.
+ // Result status is undefined - some backends may return DeadlineExceededError
+ // while others may return success.
+ IREE_IGNORE_ERROR(iree_hal_device_wait_semaphores(
+ device_, IREE_HAL_WAIT_MODE_ALL, &semaphore_list,
+ iree_make_deadline(IREE_TIME_INFINITE_PAST)));
+
+ iree_hal_semaphore_release(semaphore_a);
+ iree_hal_semaphore_release(semaphore_b);
+}
+
+// Tests IREE_HAL_WAIT_MODE_ALL when all are signaled.
+TEST_P(semaphore_test, WaitAllAndAllSignaled) {
+ iree_hal_semaphore_t* semaphore_a = NULL;
+ iree_hal_semaphore_t* semaphore_b = NULL;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 1ull, &semaphore_a));
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 1ull, &semaphore_b));
+
+ iree_hal_semaphore_list_t semaphore_list;
+ iree_hal_semaphore_t* semaphore_ptrs[] = {semaphore_a, semaphore_b};
+ semaphore_list.count = IREE_ARRAYSIZE(semaphore_ptrs);
+ semaphore_list.semaphores = semaphore_ptrs;
+ uint64_t payload_values[] = {1ull, 1ull};
+ semaphore_list.payload_values = payload_values;
+
+ // NOTE: we don't actually block here because otherwise we'd lock up.
+ // Result status is undefined - some backends may return DeadlineExceededError
+ // while others may return success.
+ IREE_IGNORE_ERROR(iree_hal_device_wait_semaphores(
+ device_, IREE_HAL_WAIT_MODE_ALL, &semaphore_list,
+ iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
+
+ iree_hal_semaphore_release(semaphore_a);
+ iree_hal_semaphore_release(semaphore_b);
+}
+
+// Tests IREE_HAL_WAIT_MODE_ANY.
+// **Fails using timeline semaphore emulation**
+TEST_P(semaphore_test, DISABLED_WaitAny) {
+ iree_hal_semaphore_t* semaphore_a = NULL;
+ iree_hal_semaphore_t* semaphore_b = NULL;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &semaphore_a));
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 1ull, &semaphore_b));
+
+ iree_hal_semaphore_list_t semaphore_list;
+ iree_hal_semaphore_t* semaphore_ptrs[] = {semaphore_a, semaphore_b};
+ semaphore_list.count = IREE_ARRAYSIZE(semaphore_ptrs);
+ semaphore_list.semaphores = semaphore_ptrs;
+ uint64_t payload_values[] = {1ull, 1ull};
+ semaphore_list.payload_values = payload_values;
+
+ IREE_ASSERT_OK(iree_hal_device_wait_semaphores(
+ device_, IREE_HAL_WAIT_MODE_ANY, &semaphore_list,
+ iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
+
+ iree_hal_semaphore_release(semaphore_a);
+ iree_hal_semaphore_release(semaphore_b);
+}
+
+// Tests threading behavior by ping-ponging between the test main thread and
+// a little thread.
+TEST_P(semaphore_test, PingPong) {
+ iree_hal_semaphore_t* a2b = NULL;
+ iree_hal_semaphore_t* b2a = NULL;
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &a2b));
+ IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &b2a));
+ std::thread thread([&]() {
+ // Should advance right past this because the value is already set.
+ IREE_ASSERT_OK(iree_hal_semaphore_wait(
+ a2b, 0ull, iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
+ IREE_ASSERT_OK(iree_hal_semaphore_signal(b2a, 1ull));
+ // Jump ahead (blocking at first).
+ IREE_ASSERT_OK(iree_hal_semaphore_wait(
+ a2b, 4ull, iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
+ });
+ // Block until thread signals.
+ IREE_ASSERT_OK(iree_hal_semaphore_wait(
+ b2a, 1ull, iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
+ IREE_ASSERT_OK(iree_hal_semaphore_signal(a2b, 4ull));
+ thread.join();
+
+ iree_hal_semaphore_release(a2b);
+ iree_hal_semaphore_release(b2a);
+}
+
+} // namespace cts
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_CTS_SEMAPHORE_TEST_H_
diff --git a/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_test.mlir b/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_test.mlir
new file mode 100644
index 0000000..63aa1ea
--- /dev/null
+++ b/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_test.mlir
@@ -0,0 +1,37 @@
+// Bootstrapped from this source IR:
+//
+// func.func @abs(%input : tensor<f32>) -> (tensor<f32>) {
+// %result = math.abs %input : tensor<f32>
+// return %result : tensor<f32>
+// }
+
+#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
+ #hal.descriptor_set.layout<0, bindings = [
+ #hal.descriptor_set.binding<0, storage_buffer>,
+ #hal.descriptor_set.binding<1, storage_buffer>
+ ]>
+]>
+
+hal.executable.source public @executable {
+ hal.executable.entry_point public @abs layout(#executable_layout)
+
+ builtin.module {
+ func.func @abs() {
+ %c0 = arith.constant 0 : index
+
+ %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(32) : !flow.dispatch.tensor<readonly:f32>
+ %1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(32) : !flow.dispatch.tensor<writeonly:f32>
+
+ %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:f32> -> tensor<f32>
+ %3 = linalg.init_tensor [] : tensor<f32>
+ %4 = linalg.generic {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []} ins(%2 : tensor<f32>) outs(%3 : tensor<f32>) {
+ ^bb0(%arg0: f32, %arg1: f32):
+ %5 = math.abs %arg0 : f32
+ linalg.yield %5 : f32
+ } -> tensor<f32>
+ flow.dispatch.tensor.store %4, %1, offsets = [], sizes = [], strides = [] : tensor<f32> -> !flow.dispatch.tensor<writeonly:f32>
+
+ return
+ }
+ }
+}
diff --git a/runtime/src/iree/hal/cts/testdata/executable_cache_test.mlir b/runtime/src/iree/hal/cts/testdata/executable_cache_test.mlir
new file mode 100644
index 0000000..63aa1ea
--- /dev/null
+++ b/runtime/src/iree/hal/cts/testdata/executable_cache_test.mlir
@@ -0,0 +1,37 @@
+// Bootstrapped from this source IR:
+//
+// func.func @abs(%input : tensor<f32>) -> (tensor<f32>) {
+// %result = math.abs %input : tensor<f32>
+// return %result : tensor<f32>
+// }
+
+#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
+ #hal.descriptor_set.layout<0, bindings = [
+ #hal.descriptor_set.binding<0, storage_buffer>,
+ #hal.descriptor_set.binding<1, storage_buffer>
+ ]>
+]>
+
+hal.executable.source public @executable {
+ hal.executable.entry_point public @abs layout(#executable_layout)
+
+ builtin.module {
+ func.func @abs() {
+ %c0 = arith.constant 0 : index
+
+ %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(32) : !flow.dispatch.tensor<readonly:f32>
+ %1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(32) : !flow.dispatch.tensor<writeonly:f32>
+
+ %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:f32> -> tensor<f32>
+ %3 = linalg.init_tensor [] : tensor<f32>
+ %4 = linalg.generic {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []} ins(%2 : tensor<f32>) outs(%3 : tensor<f32>) {
+ ^bb0(%arg0: f32, %arg1: f32):
+ %5 = math.abs %arg0 : f32
+ linalg.yield %5 : f32
+ } -> tensor<f32>
+ flow.dispatch.tensor.store %4, %1, offsets = [], sizes = [], strides = [] : tensor<f32> -> !flow.dispatch.tensor<writeonly:f32>
+
+ return
+ }
+ }
+}
diff --git a/runtime/src/iree/hal/cuda/CMakeLists.txt b/runtime/src/iree/hal/cuda/CMakeLists.txt
new file mode 100644
index 0000000..76f3936
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/CMakeLists.txt
@@ -0,0 +1,98 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+if(NOT IREE_HAL_DRIVER_CUDA)
+ return()
+endif()
+
+if(NOT CUDAToolkit_INCLUDE_DIRS)
+ message(FATAL_ERROR "No CUDA SDK includes found: should have been set globally")
+endif()
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ cuda
+ HDRS
+ "api.h"
+ SRCS
+ "api.h"
+ "context_wrapper.h"
+ "cuda_allocator.c"
+ "cuda_allocator.h"
+ "cuda_buffer.c"
+ "cuda_buffer.h"
+ "cuda_device.c"
+ "cuda_device.h"
+ "cuda_driver.c"
+ "cuda_event.c"
+ "cuda_event.h"
+ "descriptor_set_layout.c"
+ "descriptor_set_layout.h"
+ "event_semaphore.c"
+ "event_semaphore.h"
+ "executable_layout.c"
+ "executable_layout.h"
+ "graph_command_buffer.c"
+ "graph_command_buffer.h"
+ "native_executable.c"
+ "native_executable.h"
+ "nop_executable_cache.c"
+ "nop_executable_cache.h"
+ "status_util.c"
+ "status_util.h"
+ "stream_command_buffer.c"
+ "stream_command_buffer.h"
+ DEPS
+ ::dynamic_symbols
+ iree::base
+ iree::base::core_headers
+ iree::base::internal
+ iree::base::internal::arena
+ iree::base::internal::flatcc::parsing
+ iree::base::internal::synchronization
+ iree::base::tracing
+ iree::hal
+ iree::hal::utils::buffer_transfer
+ iree::hal::utils::deferred_command_buffer
+ iree::hal::utils::resource_set
+ iree::schemas::cuda_executable_def_c_fbs
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ dynamic_symbols
+ HDRS
+ "dynamic_symbols.h"
+ TEXTUAL_HDRS
+ "dynamic_symbol_tables.h"
+ SRCS
+ "cuda_headers.h"
+ "dynamic_symbols.c"
+ INCLUDES
+ ${CUDAToolkit_INCLUDE_DIRS}
+ DEPS
+ iree::base::core_headers
+ iree::base::internal::dynamic_library
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ dynamic_symbols_test
+ SRCS
+ "dynamic_symbols_test.cc"
+ DEPS
+ ::dynamic_symbols
+ iree::base
+ iree::testing::gtest
+ iree::testing::gtest_main
+ LABELS
+ "driver=cuda"
+)
diff --git a/runtime/src/iree/hal/cuda/api.h b/runtime/src/iree/hal/cuda/api.h
new file mode 100644
index 0000000..b6a3402
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/api.h
@@ -0,0 +1,82 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// See iree/base/api.h for documentation on the API conventions used.
+
+#ifndef IREE_HAL_CUDA_API_H_
+#define IREE_HAL_CUDA_API_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Defines how command buffers are recorded and executed.
+typedef enum iree_hal_cuda_command_buffer_mode_e {
+ // Command buffers are recorded into CUDA graphs.
+ IREE_HAL_CUDA_COMMAND_BUFFER_MODE_GRAPH = 0,
+ // Command buffers are directly issued against a CUDA stream.
+ IREE_HAL_CUDA_COMMAND_BUFFER_MODE_STREAM = 1,
+} iree_hal_cuda_command_buffer_mode_t;
+
+// Parameters configuring an iree_hal_cuda_device_t.
+// Must be initialized with iree_hal_cuda_device_params_initialize prior to use.
+typedef struct iree_hal_cuda_device_params_t {
+ // Number of queues exposed on the device.
+ // Each queue acts as a separate synchronization scope where all work executes
+ // concurrently unless prohibited by semaphores.
+ iree_host_size_t queue_count;
+
+ // Total size of each block in the device shared block pool.
+ // Larger sizes will lower overhead and ensure the heap isn't hit for
+ // transient allocations while also increasing memory consumption.
+ iree_host_size_t arena_block_size;
+
+ // Specifies how command buffers are recorded and executed.
+ iree_hal_cuda_command_buffer_mode_t command_buffer_mode;
+
+ // Allow executing command buffers against CUDA streams as they are recorded.
+ // Only command buffers produced by the compiler that have the
+ // IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION bit set will use this.
+ bool allow_inline_execution;
+} iree_hal_cuda_device_params_t;
+
+// Initializes |out_params| to default values.
+void iree_hal_cuda_device_params_initialize(
+ iree_hal_cuda_device_params_t* out_params);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_cuda_driver_t
+//===----------------------------------------------------------------------===//
+
+// CUDA driver creation options.
+typedef struct iree_hal_cuda_driver_options_t {
+ // Index of the default CUDA device to use within the list of available
+ // devices.
+ int default_device_index;
+} iree_hal_cuda_driver_options_t;
+
+IREE_API_EXPORT void iree_hal_cuda_driver_options_initialize(
+ iree_hal_cuda_driver_options_t* out_options);
+
+// Creates a CUDA HAL driver that manage its own CUcontext.
+//
+// |out_driver| must be released by the caller (see |iree_hal_driver_release|).
+IREE_API_EXPORT iree_status_t iree_hal_cuda_driver_create(
+ iree_string_view_t identifier,
+ const iree_hal_cuda_device_params_t* default_params,
+ const iree_hal_cuda_driver_options_t* options,
+ iree_allocator_t host_allocator, iree_hal_driver_t** out_driver);
+
+// TODO(thomasraoux): Support importing a CUcontext from app.
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_API_H_
diff --git a/runtime/src/iree/hal/cuda/context_wrapper.h b/runtime/src/iree/hal/cuda/context_wrapper.h
new file mode 100644
index 0000000..ab5281b
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/context_wrapper.h
@@ -0,0 +1,22 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_CONTEXT_WRAPPER_H_
+#define IREE_HAL_CUDA_CONTEXT_WRAPPER_H_
+
+#include "iree/hal/api.h"
+#include "iree/hal/cuda/cuda_headers.h"
+#include "iree/hal/cuda/dynamic_symbols.h"
+
+// Structure to wrap all objects constant within a context. This makes it
+// simpler to pass it to the different objects and saves memory.
+typedef struct iree_hal_cuda_context_wrapper_t {
+ CUcontext cu_context;
+ iree_allocator_t host_allocator;
+ iree_hal_cuda_dynamic_symbols_t* syms;
+} iree_hal_cuda_context_wrapper_t;
+
+#endif // IREE_HAL_CUDA_CONTEXT_WRAPPER_H_
diff --git a/runtime/src/iree/hal/cuda/cts/CMakeLists.txt b/runtime/src/iree/hal/cuda/cts/CMakeLists.txt
new file mode 100644
index 0000000..69754f7
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/cts/CMakeLists.txt
@@ -0,0 +1,54 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+iree_hal_cts_test_suite(
+ DRIVER_NAME
+ cuda
+ DRIVER_REGISTRATION_HDR
+ "runtime/src/iree/hal/cuda/registration/driver_module.h"
+ DRIVER_REGISTRATION_FN
+ "iree_hal_cuda_driver_module_register"
+ COMPILER_TARGET_BACKEND
+ "cuda"
+ EXECUTABLE_FORMAT
+ "\"PTXE\""
+ DEPS
+ iree::hal::cuda::registration
+ EXCLUDED_TESTS
+ # This test depends on iree_hal_cuda_stream_command_buffer_update_buffer
+ # via iree_hal_buffer_view_allocate_buffer, which is not implemented yet.
+ "command_buffer_dispatch"
+ # Non-push descriptor sets are not implemented in the CUDA backend yet.
+ "descriptor_set"
+ # Semaphores are not implemented in the CUDA backend yet.
+ "semaphore_submission"
+ "semaphore"
+)
+
+# Variant test suite using graph command buffers (--cuda_use_streams=0)
+iree_hal_cts_test_suite(
+ DRIVER_NAME
+ cuda
+ VARIANT_SUFFIX
+ graph
+ DRIVER_REGISTRATION_HDR
+ "runtime/src/iree/hal/cuda/registration/driver_module.h"
+ DRIVER_REGISTRATION_FN
+ "iree_hal_cuda_driver_module_register"
+ COMPILER_TARGET_BACKEND
+ "cuda"
+ EXECUTABLE_FORMAT
+ "\"PTXE\""
+ ARGS
+ "--cuda_use_streams=0"
+ DEPS
+ iree::hal::cuda::registration
+ INCLUDED_TESTS
+ "command_buffer"
+ # This test depends on iree_hal_cuda_stream_command_buffer_update_buffer
+ # via iree_hal_buffer_view_allocate_buffer, which is not implemented yet.
+ # "command_buffer_dispatch"
+)
diff --git a/runtime/src/iree/hal/cuda/cuda_allocator.c b/runtime/src/iree/hal/cuda/cuda_allocator.c
new file mode 100644
index 0000000..e0b6eaf
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/cuda_allocator.c
@@ -0,0 +1,327 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/cuda_allocator.h"
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/cuda/cuda_buffer.h"
+#include "iree/hal/cuda/dynamic_symbols.h"
+#include "iree/hal/cuda/status_util.h"
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_ALLOCATION_TRACKING
+static const char* IREE_HAL_CUDA_ALLOCATOR_ID = "CUDA";
+#endif
+
+typedef struct iree_hal_cuda_allocator_t {
+ iree_hal_resource_t resource;
+ iree_hal_device_t* base_device;
+ iree_hal_cuda_context_wrapper_t* context;
+ CUdevice device;
+ CUstream stream;
+ bool supports_concurrent_managed_access;
+
+ IREE_STATISTICS(iree_hal_allocator_statistics_t statistics;)
+} iree_hal_cuda_allocator_t;
+
+static const iree_hal_allocator_vtable_t iree_hal_cuda_allocator_vtable;
+
+static iree_hal_cuda_allocator_t* iree_hal_cuda_allocator_cast(
+ iree_hal_allocator_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_cuda_allocator_vtable);
+ return (iree_hal_cuda_allocator_t*)base_value;
+}
+
+iree_status_t iree_hal_cuda_allocator_create(
+ iree_hal_device_t* base_device, iree_hal_cuda_context_wrapper_t* context,
+ CUdevice device, CUstream stream, iree_hal_allocator_t** out_allocator) {
+ IREE_ASSERT_ARGUMENT(base_device);
+ IREE_ASSERT_ARGUMENT(context);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // To support device-local + host-visible memory we need concurrent managed
+ // access indicating that the host and devices can concurrently access the
+ // device memory. If we don't have this feature then we fall back to forcing
+ // all device-local + host-visible memory into host-local + device-visible
+ // page-locked memory. The compiler tries to avoid this for high-traffic
+ // buffers except for readback staging buffers.
+ int supports_concurrent_managed_access = 0;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, CU_RESULT_TO_STATUS(
+ context->syms,
+ cuDeviceGetAttribute(
+ &supports_concurrent_managed_access,
+ CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, device),
+ "cuDeviceGetAttribute"));
+
+ IREE_TRACE_ZONE_APPEND_TEXT(
+ z0, supports_concurrent_managed_access
+ ? "has CONCURRENT_MANAGED_ACCESS"
+ : "no CONCURRENT_MANAGED_ACCESS (expect slow accesses on "
+ "device-local + host-visible memory)");
+
+ iree_hal_cuda_allocator_t* allocator = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ context->host_allocator, sizeof(*allocator), (void**)&allocator);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_cuda_allocator_vtable,
+ &allocator->resource);
+ allocator->base_device = base_device;
+ allocator->context = context;
+ allocator->device = device;
+ allocator->stream = stream;
+ allocator->supports_concurrent_managed_access =
+ supports_concurrent_managed_access != 0;
+ *out_allocator = (iree_hal_allocator_t*)allocator;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_cuda_allocator_destroy(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
+ iree_hal_cuda_allocator_t* allocator =
+ iree_hal_cuda_allocator_cast(base_allocator);
+ iree_allocator_t host_allocator = allocator->context->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(host_allocator, allocator);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_allocator_t iree_hal_cuda_allocator_host_allocator(
+ const iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
+ iree_hal_cuda_allocator_t* allocator =
+ (iree_hal_cuda_allocator_t*)base_allocator;
+ return allocator->context->host_allocator;
+}
+
+static iree_status_t iree_hal_cuda_allocator_trim(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
+ return iree_ok_status();
+}
+
+static void iree_hal_cuda_allocator_query_statistics(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_hal_allocator_statistics_t* IREE_RESTRICT out_statistics) {
+ IREE_STATISTICS({
+ iree_hal_cuda_allocator_t* allocator =
+ iree_hal_cuda_allocator_cast(base_allocator);
+ memcpy(out_statistics, &allocator->statistics, sizeof(*out_statistics));
+ });
+}
+
+static iree_hal_buffer_compatibility_t
+iree_hal_cuda_allocator_query_compatibility(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_device_size_t allocation_size) {
+ iree_hal_cuda_allocator_t* allocator =
+ iree_hal_cuda_allocator_cast(base_allocator);
+
+ // If concurrent managed access is not supported then we disallow mapping of
+ // device local memory.
+ if (!allocator->supports_concurrent_managed_access &&
+ iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_MAPPING) &&
+ iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL |
+ IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) {
+ return IREE_HAL_BUFFER_COMPATIBILITY_NONE;
+ }
+
+ // All buffers can be allocated on the heap.
+ iree_hal_buffer_compatibility_t compatibility =
+ IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE;
+
+ // CUDA supports host <-> device for all copies.
+ if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER)) {
+ compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER;
+ }
+
+ // Buffers can only be used on the queue if they are device visible.
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) {
+ if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_DISPATCH)) {
+ compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_DISPATCH;
+ }
+ }
+
+ return compatibility;
+}
+
+static void iree_hal_cuda_buffer_free(iree_hal_cuda_context_wrapper_t* context,
+ iree_hal_memory_type_t memory_type,
+ CUdeviceptr device_ptr, void* host_ptr) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ if (iree_all_bits_set(memory_type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)) {
+ // Device local.
+ CUDA_IGNORE_ERROR(context->syms, cuMemFree(device_ptr));
+ } else {
+ // Host local.
+ CUDA_IGNORE_ERROR(context->syms, cuMemFreeHost(host_ptr));
+ }
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_cuda_allocator_allocate_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_device_size_t allocation_size, iree_const_byte_span_t initial_data,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+ iree_hal_cuda_allocator_t* allocator =
+ iree_hal_cuda_allocator_cast(base_allocator);
+ // Guard against the corner case where the requested buffer size is 0. The
+ // application is unlikely to do anything when requesting a 0-byte buffer; but
+ // it can happen in real world use cases. So we should at least not crash.
+ if (allocation_size == 0) allocation_size = 4;
+
+ // If concurrent managed access is not supported then make device-local +
+ // host-visible allocations fall back to host-local + device-visible
+ // page-locked memory. This will be significantly slower for the device to
+ // access but the compiler only uses this type for readback staging buffers
+ // and it's better to function than function fast.
+ iree_hal_memory_type_t memory_type = params->type;
+ if (!allocator->supports_concurrent_managed_access &&
+ iree_all_bits_set(memory_type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL |
+ IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) {
+ memory_type &= ~(IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL |
+ IREE_HAL_MEMORY_TYPE_HOST_VISIBLE);
+ memory_type |=
+ IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
+ }
+
+ iree_status_t status = iree_ok_status();
+ void* host_ptr = NULL;
+ CUdeviceptr device_ptr = 0;
+ IREE_TRACE_ZONE_BEGIN_NAMED(z0, "iree_hal_cuda_buffer_allocate");
+ if (iree_all_bits_set(memory_type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)) {
+ // Device local case.
+ if (iree_all_bits_set(memory_type, IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) {
+ status =
+ CU_RESULT_TO_STATUS(allocator->context->syms,
+ cuMemAllocManaged(&device_ptr, allocation_size,
+ CU_MEM_ATTACH_GLOBAL));
+ if (iree_status_is_ok(status)) {
+ // Prefetch the buffer on the GPU device.
+ status = CU_RESULT_TO_STATUS(
+ allocator->context->syms,
+ cuMemPrefetchAsync(device_ptr, allocation_size, allocator->device,
+ allocator->stream));
+ }
+ host_ptr = (void*)device_ptr;
+ } else {
+ // Device only.
+ status = CU_RESULT_TO_STATUS(allocator->context->syms,
+ cuMemAlloc(&device_ptr, allocation_size));
+ }
+ } else {
+ unsigned int flags = CU_MEMHOSTALLOC_DEVICEMAP;
+ if (!iree_all_bits_set(memory_type, IREE_HAL_MEMORY_TYPE_HOST_CACHED)) {
+ flags |= CU_MEMHOSTALLOC_WRITECOMBINED;
+ }
+ status =
+ CU_RESULT_TO_STATUS(allocator->context->syms,
+ cuMemHostAlloc(&host_ptr, allocation_size, flags));
+ if (iree_status_is_ok(status)) {
+ status = CU_RESULT_TO_STATUS(
+ allocator->context->syms,
+ cuMemHostGetDevicePointer(&device_ptr, host_ptr, /*flags=*/0));
+ }
+ }
+ IREE_TRACE_ZONE_END(z0);
+
+ iree_hal_buffer_t* buffer = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_cuda_buffer_wrap(
+ base_allocator, memory_type, params->access, params->usage,
+ allocation_size,
+ /*byte_offset=*/0,
+ /*byte_length=*/allocation_size, device_ptr, host_ptr, &buffer);
+ }
+
+ // Copy the initial contents into the buffer. This may require staging.
+ if (iree_status_is_ok(status) &&
+ !iree_const_byte_span_is_empty(initial_data)) {
+ status = iree_hal_device_transfer_range(
+ allocator->base_device,
+ iree_hal_make_host_transfer_buffer_span((void*)initial_data.data,
+ initial_data.data_length),
+ 0, iree_hal_make_device_transfer_buffer(buffer), 0,
+ initial_data.data_length, IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT,
+ iree_infinite_timeout());
+ }
+
+ if (iree_status_is_ok(status)) {
+ IREE_TRACE_ALLOC_NAMED(IREE_HAL_CUDA_ALLOCATOR_ID,
+ (void*)iree_hal_cuda_buffer_device_pointer(buffer),
+ allocation_size);
+ IREE_STATISTICS(iree_hal_allocator_statistics_record_alloc(
+ &allocator->statistics, memory_type, allocation_size));
+ *out_buffer = buffer;
+ } else {
+ if (!buffer) {
+ iree_hal_cuda_buffer_free(allocator->context, memory_type, device_ptr,
+ host_ptr);
+ } else {
+ iree_hal_buffer_release(buffer);
+ }
+ }
+ return status;
+}
+
+static void iree_hal_cuda_allocator_deallocate_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_hal_buffer_t* IREE_RESTRICT base_buffer) {
+ iree_hal_cuda_allocator_t* allocator =
+ iree_hal_cuda_allocator_cast(base_allocator);
+ iree_hal_memory_type_t memory_type = iree_hal_buffer_memory_type(base_buffer);
+ iree_hal_cuda_buffer_free(allocator->context, memory_type,
+ iree_hal_cuda_buffer_device_pointer(base_buffer),
+ iree_hal_cuda_buffer_host_pointer(base_buffer));
+
+ IREE_TRACE_FREE_NAMED(
+ IREE_HAL_CUDA_ALLOCATOR_ID,
+ (void*)iree_hal_cuda_buffer_device_pointer(base_buffer));
+ IREE_STATISTICS(iree_hal_allocator_statistics_record_free(
+ &allocator->statistics, memory_type,
+ iree_hal_buffer_allocation_size(base_buffer)));
+
+ iree_hal_buffer_destroy(base_buffer);
+}
+
+static iree_status_t iree_hal_cuda_allocator_import_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+ iree_hal_buffer_release_callback_t release_callback,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "importing from external buffers not supported");
+}
+
+static iree_status_t iree_hal_cuda_allocator_export_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_hal_buffer_t* IREE_RESTRICT buffer,
+ iree_hal_external_buffer_type_t requested_type,
+ iree_hal_external_buffer_flags_t requested_flags,
+ iree_hal_external_buffer_t* IREE_RESTRICT out_external_buffer) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "exporting to external buffers not supported");
+}
+
+static const iree_hal_allocator_vtable_t iree_hal_cuda_allocator_vtable = {
+ .destroy = iree_hal_cuda_allocator_destroy,
+ .host_allocator = iree_hal_cuda_allocator_host_allocator,
+ .trim = iree_hal_cuda_allocator_trim,
+ .query_statistics = iree_hal_cuda_allocator_query_statistics,
+ .query_compatibility = iree_hal_cuda_allocator_query_compatibility,
+ .allocate_buffer = iree_hal_cuda_allocator_allocate_buffer,
+ .deallocate_buffer = iree_hal_cuda_allocator_deallocate_buffer,
+ .import_buffer = iree_hal_cuda_allocator_import_buffer,
+ .export_buffer = iree_hal_cuda_allocator_export_buffer,
+};
diff --git a/runtime/src/iree/hal/cuda/cuda_allocator.h b/runtime/src/iree/hal/cuda/cuda_allocator.h
new file mode 100644
index 0000000..4f22579
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/cuda_allocator.h
@@ -0,0 +1,28 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_ALLOCATOR_H_
+#define IREE_HAL_CUDA_ALLOCATOR_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cuda/context_wrapper.h"
+#include "iree/hal/cuda/status_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Create a cuda allocator.
+iree_status_t iree_hal_cuda_allocator_create(
+ iree_hal_device_t* base_device, iree_hal_cuda_context_wrapper_t* context,
+ CUdevice device, CUstream stream, iree_hal_allocator_t** out_allocator);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_ALLOCATOR_H_
diff --git a/runtime/src/iree/hal/cuda/cuda_buffer.c b/runtime/src/iree/hal/cuda/cuda_buffer.c
new file mode 100644
index 0000000..b69241f
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/cuda_buffer.c
@@ -0,0 +1,136 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/cuda_buffer.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+
+typedef struct iree_hal_cuda_buffer_t {
+ iree_hal_buffer_t base;
+ void* host_ptr;
+ CUdeviceptr device_ptr;
+} iree_hal_cuda_buffer_t;
+
+static const iree_hal_buffer_vtable_t iree_hal_cuda_buffer_vtable;
+
+static iree_hal_cuda_buffer_t* iree_hal_cuda_buffer_cast(
+ iree_hal_buffer_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_cuda_buffer_vtable);
+ return (iree_hal_cuda_buffer_t*)base_value;
+}
+
+iree_status_t iree_hal_cuda_buffer_wrap(
+ iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+ iree_hal_memory_access_t allowed_access,
+ iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
+ iree_device_size_t byte_offset, iree_device_size_t byte_length,
+ CUdeviceptr device_ptr, void* host_ptr, iree_hal_buffer_t** out_buffer) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ IREE_ASSERT_ARGUMENT(out_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_t host_allocator =
+ iree_hal_allocator_host_allocator(allocator);
+ iree_hal_cuda_buffer_t* buffer = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer);
+ if (iree_status_is_ok(status)) {
+ iree_hal_buffer_initialize(host_allocator, allocator, &buffer->base,
+ allocation_size, byte_offset, byte_length,
+ memory_type, allowed_access, allowed_usage,
+ &iree_hal_cuda_buffer_vtable, &buffer->base);
+ buffer->host_ptr = host_ptr;
+ buffer->device_ptr = device_ptr;
+ *out_buffer = &buffer->base;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_cuda_buffer_destroy(iree_hal_buffer_t* base_buffer) {
+ iree_hal_cuda_buffer_t* buffer = iree_hal_cuda_buffer_cast(base_buffer);
+ iree_allocator_t host_allocator = base_buffer->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_allocator_free(host_allocator, buffer);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_cuda_buffer_map_range(
+ iree_hal_buffer_t* base_buffer, iree_hal_mapping_mode_t mapping_mode,
+ iree_hal_memory_access_t memory_access,
+ iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
+ iree_hal_buffer_mapping_t* mapping) {
+ iree_hal_cuda_buffer_t* buffer = iree_hal_cuda_buffer_cast(base_buffer);
+
+ // TODO(benvanik): add upload/download for unmapped buffers.
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_memory_type(
+ iree_hal_buffer_memory_type(base_buffer),
+ IREE_HAL_MEMORY_TYPE_HOST_VISIBLE));
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_validate_usage(iree_hal_buffer_allowed_usage(base_buffer),
+ IREE_HAL_BUFFER_USAGE_MAPPING));
+
+ uint8_t* data_ptr = (uint8_t*)(buffer->host_ptr) + local_byte_offset;
+ // If we mapped for discard scribble over the bytes. This is not a mandated
+ // behavior but it will make debugging issues easier. Alternatively for
+ // heap buffers we could reallocate them such that ASAN yells, but that
+ // would only work if the entire buffer was discarded.
+#ifndef NDEBUG
+ if (iree_any_bit_set(memory_access, IREE_HAL_MEMORY_ACCESS_DISCARD)) {
+ memset(data_ptr, 0xCD, local_byte_length);
+ }
+#endif // !NDEBUG
+
+ mapping->contents = iree_make_byte_span(data_ptr, local_byte_length);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_buffer_unmap_range(
+ iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length, iree_hal_buffer_mapping_t* mapping) {
+ // Nothing to do (today).
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_buffer_invalidate_range(
+ iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length) {
+ // Nothing to do.
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_buffer_flush_range(
+ iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length) {
+ // Nothing to do.
+ return iree_ok_status();
+}
+
+CUdeviceptr iree_hal_cuda_buffer_device_pointer(
+ iree_hal_buffer_t* base_buffer) {
+ iree_hal_cuda_buffer_t* buffer = iree_hal_cuda_buffer_cast(base_buffer);
+ return buffer->device_ptr;
+}
+
+void* iree_hal_cuda_buffer_host_pointer(iree_hal_buffer_t* base_buffer) {
+ iree_hal_cuda_buffer_t* buffer = iree_hal_cuda_buffer_cast(base_buffer);
+ return buffer->host_ptr;
+}
+
+static const iree_hal_buffer_vtable_t iree_hal_cuda_buffer_vtable = {
+ .recycle = iree_hal_buffer_recycle,
+ .destroy = iree_hal_cuda_buffer_destroy,
+ .map_range = iree_hal_cuda_buffer_map_range,
+ .unmap_range = iree_hal_cuda_buffer_unmap_range,
+ .invalidate_range = iree_hal_cuda_buffer_invalidate_range,
+ .flush_range = iree_hal_cuda_buffer_flush_range,
+};
diff --git a/runtime/src/iree/hal/cuda/cuda_buffer.h b/runtime/src/iree/hal/cuda/cuda_buffer.h
new file mode 100644
index 0000000..2aaf037
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/cuda_buffer.h
@@ -0,0 +1,38 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_BUFFER_H_
+#define IREE_HAL_CUDA_BUFFER_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cuda/cuda_headers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Wraps a CUDA allocation in an iree_hal_buffer_t.
+iree_status_t iree_hal_cuda_buffer_wrap(
+ iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+ iree_hal_memory_access_t allowed_access,
+ iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
+ iree_device_size_t byte_offset, iree_device_size_t byte_length,
+ CUdeviceptr device_ptr, void* host_ptr, iree_hal_buffer_t** out_buffer);
+
+// Returns the CUDA base pointer for the given |buffer|.
+// This is the entire allocated_buffer and must be offset by the buffer
+// byte_offset and byte_length when used.
+CUdeviceptr iree_hal_cuda_buffer_device_pointer(iree_hal_buffer_t* buffer);
+
+// Returns the CUDA host pointer for the given |buffer|, if available.
+void* iree_hal_cuda_buffer_host_pointer(iree_hal_buffer_t* buffer);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_BUFFER_H_
diff --git a/runtime/src/iree/hal/cuda/cuda_device.c b/runtime/src/iree/hal/cuda/cuda_device.c
new file mode 100644
index 0000000..4cd0290
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/cuda_device.c
@@ -0,0 +1,407 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/cuda_device.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/internal/arena.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/cuda/context_wrapper.h"
+#include "iree/hal/cuda/cuda_allocator.h"
+#include "iree/hal/cuda/cuda_event.h"
+#include "iree/hal/cuda/descriptor_set_layout.h"
+#include "iree/hal/cuda/dynamic_symbols.h"
+#include "iree/hal/cuda/event_semaphore.h"
+#include "iree/hal/cuda/executable_layout.h"
+#include "iree/hal/cuda/graph_command_buffer.h"
+#include "iree/hal/cuda/nop_executable_cache.h"
+#include "iree/hal/cuda/status_util.h"
+#include "iree/hal/cuda/stream_command_buffer.h"
+#include "iree/hal/utils/buffer_transfer.h"
+#include "iree/hal/utils/deferred_command_buffer.h"
+
+//===----------------------------------------------------------------------===//
+// iree_hal_cuda_device_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cuda_device_t {
+ iree_hal_resource_t resource;
+ iree_string_view_t identifier;
+
+ // Block pool used for command buffers with a larger block size (as command
+ // buffers can contain inlined data uploads).
+ iree_arena_block_pool_t block_pool;
+
+ // Optional driver that owns the CUDA symbols. We retain it for our lifetime
+ // to ensure the symbols remains valid.
+ iree_hal_driver_t* driver;
+
+ // Parameters used to control device behavior.
+ iree_hal_cuda_device_params_t params;
+
+ CUdevice device;
+
+ // TODO: support multiple streams.
+ CUstream stream;
+ iree_hal_cuda_context_wrapper_t context_wrapper;
+ iree_hal_allocator_t* device_allocator;
+
+ // Cache of the direct stream command buffer initialized when in stream mode.
+ // TODO: have one cached per stream once there are multiple streams.
+ iree_hal_command_buffer_t* stream_command_buffer;
+} iree_hal_cuda_device_t;
+
+static const iree_hal_device_vtable_t iree_hal_cuda_device_vtable;
+
+static iree_hal_cuda_device_t* iree_hal_cuda_device_cast(
+ iree_hal_device_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_cuda_device_vtable);
+ return (iree_hal_cuda_device_t*)base_value;
+}
+
+void iree_hal_cuda_device_params_initialize(
+ iree_hal_cuda_device_params_t* out_params) {
+ out_params->arena_block_size = 32 * 1024;
+ out_params->queue_count = 8;
+ out_params->command_buffer_mode = IREE_HAL_CUDA_COMMAND_BUFFER_MODE_GRAPH;
+ out_params->allow_inline_execution = false;
+}
+
+static iree_status_t iree_hal_cuda_device_check_params(
+ const iree_hal_cuda_device_params_t* params) {
+ if (params->arena_block_size < 4096) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "arena block size too small (< 4096 bytes)");
+ }
+ if (params->queue_count == 0) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "at least one queue is required");
+ }
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_device_create_internal(
+ iree_hal_driver_t* driver, iree_string_view_t identifier,
+ const iree_hal_cuda_device_params_t* params, CUdevice cu_device,
+ CUstream stream, CUcontext context, iree_hal_cuda_dynamic_symbols_t* syms,
+ iree_allocator_t host_allocator, iree_hal_device_t** out_device) {
+ iree_hal_cuda_device_t* device = NULL;
+ iree_host_size_t total_size = iree_sizeof_struct(*device) + identifier.size;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(host_allocator, total_size, (void**)&device));
+ memset(device, 0, total_size);
+ iree_hal_resource_initialize(&iree_hal_cuda_device_vtable, &device->resource);
+ device->driver = driver;
+ iree_hal_driver_retain(device->driver);
+ iree_string_view_append_to_buffer(
+ identifier, &device->identifier,
+ (char*)device + iree_sizeof_struct(*device));
+ device->params = *params;
+ device->device = cu_device;
+ device->stream = stream;
+ device->context_wrapper.cu_context = context;
+ device->context_wrapper.host_allocator = host_allocator;
+ iree_arena_block_pool_initialize(params->arena_block_size, host_allocator,
+ &device->block_pool);
+ device->context_wrapper.syms = syms;
+
+ iree_status_t status = iree_hal_cuda_allocator_create(
+ (iree_hal_device_t*)device, &device->context_wrapper, cu_device, stream,
+ &device->device_allocator);
+
+ if (iree_status_is_ok(status) &&
+ params->command_buffer_mode == IREE_HAL_CUDA_COMMAND_BUFFER_MODE_STREAM) {
+ status = iree_hal_cuda_stream_command_buffer_create(
+ (iree_hal_device_t*)device, &device->context_wrapper,
+ IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION,
+ IREE_HAL_COMMAND_CATEGORY_ANY, device->stream, /*block_pool=*/NULL,
+ &device->stream_command_buffer);
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_device = (iree_hal_device_t*)device;
+ } else {
+ iree_hal_device_release((iree_hal_device_t*)device);
+ }
+ return status;
+}
+
+iree_status_t iree_hal_cuda_device_create(
+ iree_hal_driver_t* driver, iree_string_view_t identifier,
+ const iree_hal_cuda_device_params_t* params,
+ iree_hal_cuda_dynamic_symbols_t* syms, CUdevice device,
+ iree_allocator_t host_allocator, iree_hal_device_t** out_device) {
+ IREE_ASSERT_ARGUMENT(params);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(z0,
+ iree_hal_cuda_device_check_params(params));
+ CUcontext context;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, CU_RESULT_TO_STATUS(syms, cuCtxCreate(&context, 0, device)));
+ CUstream stream;
+ iree_status_t status = CU_RESULT_TO_STATUS(
+ syms, cuStreamCreate(&stream, CU_STREAM_NON_BLOCKING));
+
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_cuda_device_create_internal(driver, identifier, params,
+ device, stream, context, syms,
+ host_allocator, out_device);
+ }
+ if (!iree_status_is_ok(status)) {
+ if (stream) {
+ syms->cuStreamDestroy(stream);
+ }
+ syms->cuCtxDestroy(context);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_cuda_device_destroy(iree_hal_device_t* base_device) {
+ iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ iree_allocator_t host_allocator = iree_hal_device_host_allocator(base_device);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // There should be no more buffers live that use the allocator.
+ iree_hal_command_buffer_release(device->stream_command_buffer);
+ iree_hal_allocator_release(device->device_allocator);
+ CUDA_IGNORE_ERROR(device->context_wrapper.syms,
+ cuStreamDestroy(device->stream));
+
+ iree_arena_block_pool_deinitialize(&device->block_pool);
+
+ // Finally, destroy the device.
+ iree_hal_driver_release(device->driver);
+
+ iree_allocator_free(host_allocator, device);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_string_view_t iree_hal_cuda_device_id(
+ iree_hal_device_t* base_device) {
+ iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ return device->identifier;
+}
+
+static iree_allocator_t iree_hal_cuda_device_host_allocator(
+ iree_hal_device_t* base_device) {
+ iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ return device->context_wrapper.host_allocator;
+}
+
+static iree_hal_allocator_t* iree_hal_cuda_device_allocator(
+ iree_hal_device_t* base_device) {
+ iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ return device->device_allocator;
+}
+
+static iree_status_t iree_hal_cuda_device_trim(iree_hal_device_t* base_device) {
+ iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ iree_arena_block_pool_trim(&device->block_pool);
+ return iree_hal_allocator_trim(device->device_allocator);
+}
+
+static iree_status_t iree_hal_cuda_device_query_i32(
+ iree_hal_device_t* base_device, iree_string_view_t category,
+ iree_string_view_t key, int32_t* out_value) {
+ // iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ *out_value = 0;
+
+ if (iree_string_view_equal(category,
+ iree_make_cstring_view("hal.executable.format"))) {
+ *out_value =
+ iree_string_view_equal(key, iree_make_cstring_view("cuda-nvptx-fb"))
+ ? 1
+ : 0;
+ return iree_ok_status();
+ }
+
+ return iree_make_status(
+ IREE_STATUS_NOT_FOUND,
+ "unknown device configuration key value '%.*s :: %.*s'",
+ (int)category.size, category.data, (int)key.size, key.data);
+}
+
+static iree_status_t iree_hal_cuda_device_create_command_buffer(
+ iree_hal_device_t* base_device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ iree_hal_command_buffer_t** out_command_buffer) {
+ iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ if (device->params.allow_inline_execution &&
+ iree_all_bits_set(mode,
+ IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION)) {
+ // The caller has indicated the command buffer can be executed as it is
+ // recorded, implying that the command buffer cannot be reused and doesn't
+ // need to be persisted. This lets us lower the execution delay as we can
+ // directly route commands to a CUDA stream and let it eagerly flush.
+ return iree_hal_cuda_stream_command_buffer_create(
+ base_device, &device->context_wrapper, mode, command_categories,
+ device->stream, &device->block_pool, out_command_buffer);
+ }
+ switch (device->params.command_buffer_mode) {
+ case IREE_HAL_CUDA_COMMAND_BUFFER_MODE_GRAPH:
+ return iree_hal_cuda_graph_command_buffer_create(
+ base_device, &device->context_wrapper, mode, command_categories,
+ queue_affinity, &device->block_pool, out_command_buffer);
+ case IREE_HAL_CUDA_COMMAND_BUFFER_MODE_STREAM:
+ return iree_hal_deferred_command_buffer_create(
+ base_device, mode, command_categories, &device->block_pool,
+ iree_hal_device_host_allocator(base_device), out_command_buffer);
+ default:
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "invalid command buffer mode");
+ }
+}
+
+static iree_status_t iree_hal_cuda_device_create_descriptor_set(
+ iree_hal_device_t* base_device,
+ iree_hal_descriptor_set_layout_t* set_layout,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings,
+ iree_hal_descriptor_set_t** out_descriptor_set) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "non-push descriptor sets still need work");
+}
+
+static iree_status_t iree_hal_cuda_device_create_descriptor_set_layout(
+ iree_hal_device_t* base_device,
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ iree_hal_descriptor_set_layout_t** out_descriptor_set_layout) {
+ iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ return iree_hal_cuda_descriptor_set_layout_create(
+ &device->context_wrapper, usage_type, binding_count, bindings,
+ out_descriptor_set_layout);
+}
+
+static iree_status_t iree_hal_cuda_device_create_event(
+ iree_hal_device_t* base_device, iree_hal_event_t** out_event) {
+ iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ return iree_hal_cuda_event_create(&device->context_wrapper, out_event);
+}
+
+static iree_status_t iree_hal_cuda_device_create_executable_cache(
+ iree_hal_device_t* base_device, iree_string_view_t identifier,
+ iree_loop_t loop, iree_hal_executable_cache_t** out_executable_cache) {
+ iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ return iree_hal_cuda_nop_executable_cache_create(
+ &device->context_wrapper, identifier, out_executable_cache);
+}
+
+static iree_status_t iree_hal_cuda_device_create_executable_layout(
+ iree_hal_device_t* base_device, iree_host_size_t push_constants,
+ iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ iree_hal_executable_layout_t** out_executable_layout) {
+ iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ return iree_hal_cuda_executable_layout_create(
+ &device->context_wrapper, set_layout_count, set_layouts, push_constants,
+ out_executable_layout);
+}
+
+static iree_status_t iree_hal_cuda_device_create_semaphore(
+ iree_hal_device_t* base_device, uint64_t initial_value,
+ iree_hal_semaphore_t** out_semaphore) {
+ iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ return iree_hal_cuda_semaphore_create(&device->context_wrapper, initial_value,
+ out_semaphore);
+}
+
+static iree_status_t iree_hal_cuda_device_queue_submit(
+ iree_hal_device_t* base_device,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches) {
+ iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ for (int i = 0; i < batch_count; i++) {
+ for (int j = 0; j < batches[i].command_buffer_count; j++) {
+ iree_hal_command_buffer_t* command_buffer = batches[i].command_buffers[j];
+ if (iree_hal_cuda_stream_command_buffer_isa(command_buffer)) {
+ // Nothing to do for an inline command buffer; all the work has already
+ // been submitted. When we support semaphores we'll still need to signal
+ // their completion but do not have to worry about any waits: if there
+ // were waits we wouldn't have been able to execute inline!
+ } else if (iree_hal_cuda_graph_command_buffer_isa(command_buffer)) {
+ CUgraphExec exec = iree_hal_cuda_graph_command_buffer_exec(
+ batches[i].command_buffers[j]);
+ CUDA_RETURN_IF_ERROR(device->context_wrapper.syms,
+ cuGraphLaunch(exec, device->stream),
+ "cuGraphLaunch");
+ } else {
+ IREE_RETURN_IF_ERROR(iree_hal_deferred_command_buffer_apply(
+ batches[i].command_buffers[j], device->stream_command_buffer));
+ }
+ }
+ }
+ // TODO(thomasraoux): implement semaphores - for now this conservatively
+ // synchronizes after every submit.
+ CUDA_RETURN_IF_ERROR(device->context_wrapper.syms,
+ cuStreamSynchronize(device->stream),
+ "cuStreamSynchronize");
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_device_submit_and_wait(
+ iree_hal_device_t* base_device,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches,
+ iree_hal_semaphore_t* wait_semaphore, uint64_t wait_value,
+ iree_timeout_t timeout) {
+ // Submit...
+ IREE_RETURN_IF_ERROR(iree_hal_cuda_device_queue_submit(
+ base_device, command_categories, queue_affinity, batch_count, batches));
+
+ // ...and wait.
+ return iree_hal_semaphore_wait(wait_semaphore, wait_value, timeout);
+}
+
+static iree_status_t iree_hal_cuda_device_wait_semaphores(
+ iree_hal_device_t* base_device, iree_hal_wait_mode_t wait_mode,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "semaphore not implemented");
+}
+
+static iree_status_t iree_hal_cuda_device_wait_idle(
+ iree_hal_device_t* base_device, iree_timeout_t timeout) {
+ iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
+ // Wait until the stream is done.
+ // TODO(thomasraoux): CUDA doesn't support a deadline for wait, figure out how
+ // to handle it better.
+ CUDA_RETURN_IF_ERROR(device->context_wrapper.syms,
+ cuStreamSynchronize(device->stream),
+ "cuStreamSynchronize");
+ return iree_ok_status();
+}
+
+static const iree_hal_device_vtable_t iree_hal_cuda_device_vtable = {
+ .destroy = iree_hal_cuda_device_destroy,
+ .id = iree_hal_cuda_device_id,
+ .host_allocator = iree_hal_cuda_device_host_allocator,
+ .device_allocator = iree_hal_cuda_device_allocator,
+ .trim = iree_hal_cuda_device_trim,
+ .query_i32 = iree_hal_cuda_device_query_i32,
+ .create_command_buffer = iree_hal_cuda_device_create_command_buffer,
+ .create_descriptor_set = iree_hal_cuda_device_create_descriptor_set,
+ .create_descriptor_set_layout =
+ iree_hal_cuda_device_create_descriptor_set_layout,
+ .create_event = iree_hal_cuda_device_create_event,
+ .create_executable_cache = iree_hal_cuda_device_create_executable_cache,
+ .create_executable_layout = iree_hal_cuda_device_create_executable_layout,
+ .create_semaphore = iree_hal_cuda_device_create_semaphore,
+ .transfer_range = iree_hal_device_submit_transfer_range_and_wait,
+ .queue_submit = iree_hal_cuda_device_queue_submit,
+ .submit_and_wait = iree_hal_cuda_device_submit_and_wait,
+ .wait_semaphores = iree_hal_cuda_device_wait_semaphores,
+ .wait_idle = iree_hal_cuda_device_wait_idle,
+};
diff --git a/runtime/src/iree/hal/cuda/cuda_device.h b/runtime/src/iree/hal/cuda/cuda_device.h
new file mode 100644
index 0000000..d7b5790
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/cuda_device.h
@@ -0,0 +1,30 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_CUDA_DEVICE_H_
+#define IREE_HAL_CUDA_CUDA_DEVICE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cuda/api.h"
+#include "iree/hal/cuda/dynamic_symbols.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a device that owns and manages its own CUcontext.
+iree_status_t iree_hal_cuda_device_create(
+ iree_hal_driver_t* driver, iree_string_view_t identifier,
+ const iree_hal_cuda_device_params_t* params,
+ iree_hal_cuda_dynamic_symbols_t* syms, CUdevice device,
+ iree_allocator_t host_allocator, iree_hal_device_t** out_device);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_CUDA_DEVICE_H_
diff --git a/runtime/src/iree/hal/cuda/cuda_driver.c b/runtime/src/iree/hal/cuda/cuda_driver.c
new file mode 100644
index 0000000..e78b4e9
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/cuda_driver.c
@@ -0,0 +1,228 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cuda/api.h"
+#include "iree/hal/cuda/cuda_device.h"
+#include "iree/hal/cuda/dynamic_symbols.h"
+#include "iree/hal/cuda/status_util.h"
+
+typedef struct iree_hal_cuda_driver_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+ // Identifier used for the driver in the IREE driver registry.
+ // We allow overriding so that multiple CUDA versions can be exposed in the
+ // same process.
+ iree_string_view_t identifier;
+ iree_hal_cuda_device_params_t default_params;
+ int default_device_index;
+ // CUDA symbols.
+ iree_hal_cuda_dynamic_symbols_t syms;
+} iree_hal_cuda_driver_t;
+
+// Pick a fixed lenght size for device names.
+#define IREE_MAX_CUDA_DEVICE_NAME_LENGTH 100
+
+static const iree_hal_driver_vtable_t iree_hal_cuda_driver_vtable;
+
+static iree_hal_cuda_driver_t* iree_hal_cuda_driver_cast(
+ iree_hal_driver_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_cuda_driver_vtable);
+ return (iree_hal_cuda_driver_t*)base_value;
+}
+
+IREE_API_EXPORT void iree_hal_cuda_driver_options_initialize(
+ iree_hal_cuda_driver_options_t* out_options) {
+ memset(out_options, 0, sizeof(*out_options));
+ out_options->default_device_index = 0;
+}
+
+static iree_status_t iree_hal_cuda_driver_create_internal(
+ iree_string_view_t identifier,
+ const iree_hal_cuda_device_params_t* default_params,
+ const iree_hal_cuda_driver_options_t* options,
+ iree_allocator_t host_allocator, iree_hal_driver_t** out_driver) {
+ iree_hal_cuda_driver_t* driver = NULL;
+ iree_host_size_t total_size = iree_sizeof_struct(*driver) + identifier.size;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(host_allocator, total_size, (void**)&driver));
+
+ iree_hal_resource_initialize(&iree_hal_cuda_driver_vtable, &driver->resource);
+ driver->host_allocator = host_allocator;
+ iree_string_view_append_to_buffer(
+ identifier, &driver->identifier,
+ (char*)driver + iree_sizeof_struct(*driver));
+ memcpy(&driver->default_params, default_params,
+ sizeof(driver->default_params));
+ driver->default_device_index = options->default_device_index;
+
+ iree_status_t status =
+ iree_hal_cuda_dynamic_symbols_initialize(host_allocator, &driver->syms);
+ if (iree_status_is_ok(status)) {
+ *out_driver = (iree_hal_driver_t*)driver;
+ } else {
+ iree_hal_driver_release((iree_hal_driver_t*)driver);
+ }
+ return status;
+}
+
+static void iree_hal_cuda_driver_destroy(iree_hal_driver_t* base_driver) {
+ iree_hal_cuda_driver_t* driver = iree_hal_cuda_driver_cast(base_driver);
+ iree_allocator_t host_allocator = driver->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_cuda_dynamic_symbols_deinitialize(&driver->syms);
+ iree_allocator_free(host_allocator, driver);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_cuda_driver_create(
+ iree_string_view_t identifier,
+ const iree_hal_cuda_device_params_t* default_params,
+ const iree_hal_cuda_driver_options_t* options,
+ iree_allocator_t host_allocator, iree_hal_driver_t** out_driver) {
+ IREE_ASSERT_ARGUMENT(default_params);
+ IREE_ASSERT_ARGUMENT(options);
+ IREE_ASSERT_ARGUMENT(out_driver);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status = iree_hal_cuda_driver_create_internal(
+ identifier, default_params, options, host_allocator, out_driver);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// Populates device information from the given CUDA physical device handle.
+// |out_device_info| must point to valid memory and additional data will be
+// appended to |buffer_ptr| and the new pointer is returned.
+static uint8_t* iree_hal_cuda_populate_device_info(
+ CUdevice device, iree_hal_cuda_dynamic_symbols_t* syms, uint8_t* buffer_ptr,
+ iree_hal_device_info_t* out_device_info) {
+ char device_name[IREE_MAX_CUDA_DEVICE_NAME_LENGTH];
+ CUDA_IGNORE_ERROR(syms,
+ cuDeviceGetName(device_name, sizeof(device_name), device));
+ memset(out_device_info, 0, sizeof(*out_device_info));
+ out_device_info->device_id = (iree_hal_device_id_t)device;
+
+ iree_string_view_t device_name_string =
+ iree_make_string_view(device_name, strlen(device_name));
+ buffer_ptr += iree_string_view_append_to_buffer(
+ device_name_string, &out_device_info->name, (char*)buffer_ptr);
+ return buffer_ptr;
+}
+
+// Return true if the device support all the extension required.
+static bool iree_hal_cuda_is_valid_device(iree_hal_cuda_driver_t* driver,
+ CUdevice device) {
+ return true;
+}
+
+static iree_status_t iree_hal_cuda_driver_query_available_devices(
+ iree_hal_driver_t* base_driver, iree_allocator_t host_allocator,
+ iree_hal_device_info_t** out_device_infos,
+ iree_host_size_t* out_device_info_count) {
+ iree_hal_cuda_driver_t* driver = iree_hal_cuda_driver_cast(base_driver);
+ // Query the number of available CUDA devices.
+ int device_count = 0;
+ CUDA_RETURN_IF_ERROR(&driver->syms, cuDeviceGetCount(&device_count),
+ "cuDeviceGetCount");
+
+ // Allocate the return infos and populate with the devices.
+ iree_hal_device_info_t* device_infos = NULL;
+ iree_host_size_t total_size = device_count * sizeof(iree_hal_device_info_t);
+ for (iree_host_size_t i = 0; i < device_count; ++i) {
+ total_size += IREE_MAX_CUDA_DEVICE_NAME_LENGTH * sizeof(char);
+ }
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&device_infos);
+ int valid_device_count = 0;
+ if (iree_status_is_ok(status)) {
+ uint8_t* buffer_ptr =
+ (uint8_t*)device_infos + device_count * sizeof(iree_hal_device_info_t);
+ for (iree_host_size_t i = 0; i < device_count; ++i) {
+ CUdevice device;
+ iree_status_t status = CU_RESULT_TO_STATUS(
+ &driver->syms, cuDeviceGet(&device, i), "cuDeviceGet");
+ if (!iree_status_is_ok(status)) break;
+ if (!iree_hal_cuda_is_valid_device(driver, device)) continue;
+ buffer_ptr = iree_hal_cuda_populate_device_info(
+ device, &driver->syms, buffer_ptr, &device_infos[valid_device_count]);
+ valid_device_count++;
+ }
+ }
+ if (iree_status_is_ok(status)) {
+ *out_device_info_count = valid_device_count;
+ *out_device_infos = device_infos;
+ } else {
+ iree_allocator_free(host_allocator, device_infos);
+ }
+ return status;
+}
+
+static iree_status_t iree_hal_cuda_driver_select_default_device(
+ iree_hal_driver_t* base_driver, iree_hal_cuda_dynamic_symbols_t* syms,
+ int default_device_index, iree_allocator_t host_allocator,
+ CUdevice* out_device) {
+ iree_hal_device_info_t* out_device_infos;
+ iree_host_size_t device_count;
+ IREE_RETURN_IF_ERROR(iree_hal_cuda_driver_query_available_devices(
+ base_driver, host_allocator, &out_device_infos, &device_count));
+ iree_status_t status = iree_ok_status();
+ if (device_count == 0) {
+ status = iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "no compatible CUDA devices were found");
+ } else if (default_device_index >= device_count) {
+ status = iree_make_status(IREE_STATUS_NOT_FOUND,
+ "default device %d not found (of %ld enumerated)",
+ default_device_index, device_count);
+ } else {
+ *out_device = (CUdevice)out_device_infos[default_device_index].device_id;
+ }
+ iree_allocator_free(host_allocator, out_device_infos);
+ return status;
+}
+
+static iree_status_t iree_hal_cuda_driver_create_device(
+ iree_hal_driver_t* base_driver, iree_hal_device_id_t device_id,
+ iree_allocator_t host_allocator, iree_hal_device_t** out_device) {
+ iree_hal_cuda_driver_t* driver = iree_hal_cuda_driver_cast(base_driver);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, CU_RESULT_TO_STATUS(&driver->syms, cuInit(0), "cuInit"));
+ // Use either the specified device (enumerated earlier) or whatever default
+ // one was specified when the driver was created.
+ CUdevice device = (CUdevice)device_id;
+ if (device == 0) {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_cuda_driver_select_default_device(
+ base_driver, &driver->syms, driver->default_device_index,
+ host_allocator, &device));
+ }
+
+ iree_string_view_t device_name = iree_make_cstring_view("cuda");
+
+ // Attempt to create the device.
+ iree_status_t status = iree_hal_cuda_device_create(
+ base_driver, device_name, &driver->default_params, &driver->syms, device,
+ host_allocator, out_device);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static const iree_hal_driver_vtable_t iree_hal_cuda_driver_vtable = {
+ .destroy = iree_hal_cuda_driver_destroy,
+ .query_available_devices = iree_hal_cuda_driver_query_available_devices,
+ .create_device = iree_hal_cuda_driver_create_device,
+};
diff --git a/runtime/src/iree/hal/cuda/cuda_event.c b/runtime/src/iree/hal/cuda/cuda_event.c
new file mode 100644
index 0000000..ce4d5dd
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/cuda_event.c
@@ -0,0 +1,61 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/cuda_event.h"
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+
+// Dummy events for now, don't do anything.
+typedef struct iree_hal_cuda_event_t {
+ iree_hal_resource_t resource;
+ iree_hal_cuda_context_wrapper_t* context_wrapper;
+} iree_hal_cuda_event_t;
+
+static const iree_hal_event_vtable_t iree_hal_cuda_event_vtable;
+
+static iree_hal_cuda_event_t* iree_hal_cuda_event_cast(
+ iree_hal_event_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_cuda_event_vtable);
+ return (iree_hal_cuda_event_t*)base_value;
+}
+
+iree_status_t iree_hal_cuda_event_create(
+ iree_hal_cuda_context_wrapper_t* context_wrapper,
+ iree_hal_event_t** out_event) {
+ IREE_ASSERT_ARGUMENT(context_wrapper);
+ IREE_ASSERT_ARGUMENT(out_event);
+ *out_event = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_cuda_event_t* event = NULL;
+ iree_status_t status = iree_allocator_malloc(context_wrapper->host_allocator,
+ sizeof(*event), (void**)&event);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_cuda_event_vtable, &event->resource);
+ event->context_wrapper = context_wrapper;
+ *out_event = (iree_hal_event_t*)event;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_cuda_event_destroy(iree_hal_event_t* base_event) {
+ iree_hal_cuda_event_t* event = iree_hal_cuda_event_cast(base_event);
+ iree_allocator_t host_allocator = event->context_wrapper->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(host_allocator, event);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static const iree_hal_event_vtable_t iree_hal_cuda_event_vtable = {
+ .destroy = iree_hal_cuda_event_destroy,
+};
diff --git a/runtime/src/iree/hal/cuda/cuda_event.h b/runtime/src/iree/hal/cuda/cuda_event.h
new file mode 100644
index 0000000..cf18b47
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/cuda_event.h
@@ -0,0 +1,31 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_EVENT_H_
+#define IREE_HAL_CUDA_EVENT_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cuda/context_wrapper.h"
+#include "iree/hal/cuda/cuda_headers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a dummy event object. Object will be represented by CUDA Graph edges
+// so nothing is created at creation time. When an event is signaled in the
+// command buffer we will add the appropriate edges to enforce the right
+// synchronization.
+iree_status_t iree_hal_cuda_event_create(
+ iree_hal_cuda_context_wrapper_t* context_wrapper,
+ iree_hal_event_t** out_event);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_EVENT_H_
diff --git a/runtime/src/iree/hal/cuda/cuda_headers.h b/runtime/src/iree/hal/cuda/cuda_headers.h
new file mode 100644
index 0000000..cdfbff7
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/cuda_headers.h
@@ -0,0 +1,12 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_CUDA_HEADERS_H_
+#define IREE_HAL_CUDA_CUDA_HEADERS_H_
+
+#include "cuda.h" // IWYU pragma: export
+
+#endif // IREE_HAL_CUDA_CUDA_HEADERS_H_
diff --git a/runtime/src/iree/hal/cuda/descriptor_set_layout.c b/runtime/src/iree/hal/cuda/descriptor_set_layout.c
new file mode 100644
index 0000000..062cc7e
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/descriptor_set_layout.c
@@ -0,0 +1,81 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/descriptor_set_layout.h"
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+
+typedef struct iree_hal_cuda_descriptor_set_layout_t {
+ iree_hal_resource_t resource;
+ iree_hal_cuda_context_wrapper_t* context;
+ iree_host_size_t binding_count;
+} iree_hal_cuda_descriptor_set_layout_t;
+
+static const iree_hal_descriptor_set_layout_vtable_t
+ iree_hal_cuda_descriptor_set_layout_vtable;
+
+static iree_hal_cuda_descriptor_set_layout_t*
+iree_hal_cuda_descriptor_set_layout_cast(
+ iree_hal_descriptor_set_layout_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_cuda_descriptor_set_layout_vtable);
+ return (iree_hal_cuda_descriptor_set_layout_t*)base_value;
+}
+
+iree_status_t iree_hal_cuda_descriptor_set_layout_create(
+ iree_hal_cuda_context_wrapper_t* context,
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ iree_hal_descriptor_set_layout_t** out_descriptor_set_layout) {
+ IREE_ASSERT_ARGUMENT(context);
+ IREE_ASSERT_ARGUMENT(!binding_count || bindings);
+ IREE_ASSERT_ARGUMENT(out_descriptor_set_layout);
+ *out_descriptor_set_layout = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_cuda_descriptor_set_layout_t* descriptor_set_layout = NULL;
+ iree_status_t status = iree_allocator_malloc(context->host_allocator,
+ sizeof(*descriptor_set_layout),
+ (void**)&descriptor_set_layout);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_cuda_descriptor_set_layout_vtable,
+ &descriptor_set_layout->resource);
+ descriptor_set_layout->context = context;
+ descriptor_set_layout->binding_count = binding_count;
+ *out_descriptor_set_layout =
+ (iree_hal_descriptor_set_layout_t*)descriptor_set_layout;
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_host_size_t iree_hal_cuda_descriptor_set_layout_binding_count(
+ iree_hal_descriptor_set_layout_t* base_descriptor_set_layout) {
+ iree_hal_cuda_descriptor_set_layout_t* descriptor_set_layout =
+ iree_hal_cuda_descriptor_set_layout_cast(base_descriptor_set_layout);
+ return descriptor_set_layout->binding_count;
+}
+
+static void iree_hal_cuda_descriptor_set_layout_destroy(
+ iree_hal_descriptor_set_layout_t* base_descriptor_set_layout) {
+ iree_hal_cuda_descriptor_set_layout_t* descriptor_set_layout =
+ iree_hal_cuda_descriptor_set_layout_cast(base_descriptor_set_layout);
+ iree_allocator_t host_allocator =
+ descriptor_set_layout->context->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(host_allocator, descriptor_set_layout);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static const iree_hal_descriptor_set_layout_vtable_t
+ iree_hal_cuda_descriptor_set_layout_vtable = {
+ .destroy = iree_hal_cuda_descriptor_set_layout_destroy,
+};
diff --git a/runtime/src/iree/hal/cuda/descriptor_set_layout.h b/runtime/src/iree/hal/cuda/descriptor_set_layout.h
new file mode 100644
index 0000000..c630d4c
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/descriptor_set_layout.h
@@ -0,0 +1,33 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_DESCRIPTOR_SET_LAYOUT_H_
+#define IREE_HAL_CUDA_DESCRIPTOR_SET_LAYOUT_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cuda/context_wrapper.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+iree_status_t iree_hal_cuda_descriptor_set_layout_create(
+ iree_hal_cuda_context_wrapper_t* context,
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ iree_hal_descriptor_set_layout_t** out_descriptor_set_layout);
+
+// Return the binding count for the given descriptor set layout.
+iree_host_size_t iree_hal_cuda_descriptor_set_layout_binding_count(
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_DESCRIPTOR_SET_LAYOUT_H_
diff --git a/runtime/src/iree/hal/cuda/dynamic_symbol_tables.h b/runtime/src/iree/hal/cuda/dynamic_symbol_tables.h
new file mode 100644
index 0000000..9b3f5c9
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/dynamic_symbol_tables.h
@@ -0,0 +1,55 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+CU_PFN_DECL(cuCtxCreate, CUcontext*, unsigned int, CUdevice)
+CU_PFN_DECL(cuCtxDestroy, CUcontext)
+CU_PFN_DECL(cuDeviceGet, CUdevice*, int)
+CU_PFN_DECL(cuDeviceGetCount, int*)
+CU_PFN_DECL(cuDeviceGetName, char*, int, CUdevice)
+CU_PFN_DECL(cuDeviceGetAttribute, int*, CUdevice_attribute, CUdevice)
+CU_PFN_DECL(cuGetErrorName, CUresult, const char**)
+CU_PFN_DECL(cuGetErrorString, CUresult, const char**)
+CU_PFN_DECL(cuGraphAddMemcpyNode, CUgraphNode*, CUgraph, const CUgraphNode*,
+ size_t, const CUDA_MEMCPY3D*, CUcontext)
+CU_PFN_DECL(cuGraphAddMemsetNode, CUgraphNode*, CUgraph, const CUgraphNode*,
+ size_t, const CUDA_MEMSET_NODE_PARAMS*, CUcontext)
+CU_PFN_DECL(cuGraphAddKernelNode, CUgraphNode*, CUgraph, const CUgraphNode*,
+ size_t, const CUDA_KERNEL_NODE_PARAMS*)
+CU_PFN_DECL(cuGraphCreate, CUgraph*, unsigned int)
+CU_PFN_DECL(cuGraphDestroy, CUgraph)
+CU_PFN_DECL(cuGraphExecDestroy, CUgraphExec)
+CU_PFN_DECL(cuGraphGetNodes, CUgraph, CUgraphNode*, size_t*)
+CU_PFN_DECL(cuGraphInstantiate, CUgraphExec*, CUgraph, CUgraphNode*, char*,
+ size_t)
+CU_PFN_DECL(cuGraphLaunch, CUgraphExec, CUstream)
+CU_PFN_DECL(cuInit, unsigned int)
+CU_PFN_DECL(cuMemAllocManaged, CUdeviceptr*, size_t, unsigned int)
+CU_PFN_DECL(cuMemPrefetchAsync, CUdeviceptr, size_t, CUdevice, CUstream)
+CU_PFN_DECL(cuMemAlloc, CUdeviceptr*, size_t)
+CU_PFN_DECL(cuMemFree, CUdeviceptr)
+CU_PFN_DECL(cuMemFreeHost, void*)
+CU_PFN_DECL(cuMemHostAlloc, void**, size_t, unsigned int)
+CU_PFN_DECL(cuMemHostGetDevicePointer, CUdeviceptr*, void*, unsigned int)
+CU_PFN_DECL(cuModuleGetFunction, CUfunction*, CUmodule, const char*)
+CU_PFN_DECL(cuModuleLoadDataEx, CUmodule*, const void*, unsigned int,
+ CUjit_option*, void**)
+CU_PFN_DECL(cuModuleUnload, CUmodule)
+CU_PFN_DECL(cuStreamCreate, CUstream*, unsigned int)
+CU_PFN_DECL(cuStreamDestroy, CUstream)
+CU_PFN_DECL(cuStreamSynchronize, CUstream)
+CU_PFN_DECL(cuStreamWaitEvent, CUstream, CUevent, unsigned int)
+CU_PFN_DECL(cuMemsetD32Async, unsigned long long, unsigned int, size_t,
+ CUstream)
+CU_PFN_DECL(cuMemsetD16Async, unsigned long long, unsigned short, size_t,
+ CUstream)
+CU_PFN_DECL(cuMemsetD8Async, unsigned long long, unsigned char, size_t,
+ CUstream)
+CU_PFN_DECL(cuMemcpyAsync, CUdeviceptr, CUdeviceptr, size_t, CUstream)
+CU_PFN_DECL(cuMemcpyHtoDAsync_v2, CUdeviceptr, const void*, size_t, CUstream)
+CU_PFN_DECL(cuFuncSetAttribute, CUfunction, CUfunction_attribute, int)
+CU_PFN_DECL(cuLaunchKernel, CUfunction, unsigned int, unsigned int,
+ unsigned int, unsigned int, unsigned int, unsigned int,
+ unsigned int, CUstream, void**, void**)
diff --git a/runtime/src/iree/hal/cuda/dynamic_symbols.c b/runtime/src/iree/hal/cuda/dynamic_symbols.c
new file mode 100644
index 0000000..84b93ad
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/dynamic_symbols.c
@@ -0,0 +1,72 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/dynamic_symbols.h"
+
+#include <string.h>
+
+#include "iree/base/internal/dynamic_library.h"
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+
+static const char* kCUDALoaderSearchNames[] = {
+#if defined(IREE_PLATFORM_WINDOWS)
+ "nvcuda.dll",
+#else
+ "libcuda.so",
+#endif
+};
+
+#define concat(A, B) A B
+
+// Load CUDA entry points, prefer _v2 version if it exists.
+static iree_status_t iree_hal_cuda_dynamic_symbols_resolve_all(
+ iree_hal_cuda_dynamic_symbols_t* syms) {
+#define CU_PFN_DECL(cudaSymbolName, ...) \
+ { \
+ static const char* kName = #cudaSymbolName; \
+ IREE_RETURN_IF_ERROR(iree_dynamic_library_lookup_symbol( \
+ syms->loader_library, kName, (void**)&syms->cudaSymbolName)); \
+ static const char* kNameV2 = concat(#cudaSymbolName, "_v2"); \
+ void* funV2; \
+ iree_dynamic_library_lookup_symbol(syms->loader_library, kNameV2, &funV2); \
+ if (funV2) syms->cudaSymbolName = funV2; \
+ }
+#include "iree/hal/cuda/dynamic_symbol_tables.h" // IWYU pragma: keep
+#undef CU_PFN_DECL
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_cuda_dynamic_symbols_initialize(
+ iree_allocator_t allocator, iree_hal_cuda_dynamic_symbols_t* out_syms) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ memset(out_syms, 0, sizeof(*out_syms));
+ iree_status_t status = iree_dynamic_library_load_from_files(
+ IREE_ARRAYSIZE(kCUDALoaderSearchNames), kCUDALoaderSearchNames,
+ IREE_DYNAMIC_LIBRARY_FLAG_NONE, allocator, &out_syms->loader_library);
+ if (iree_status_is_not_found(status)) {
+ iree_status_ignore(status);
+ return iree_make_status(
+ IREE_STATUS_UNAVAILABLE,
+ "CUDA runtime library not available; ensure installed and on path");
+ }
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_cuda_dynamic_symbols_resolve_all(out_syms);
+ }
+ if (!iree_status_is_ok(status)) {
+ iree_hal_cuda_dynamic_symbols_deinitialize(out_syms);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_hal_cuda_dynamic_symbols_deinitialize(
+ iree_hal_cuda_dynamic_symbols_t* syms) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_dynamic_library_release(syms->loader_library);
+ memset(syms, 0, sizeof(*syms));
+ IREE_TRACE_ZONE_END(z0);
+}
diff --git a/runtime/src/iree/hal/cuda/dynamic_symbols.h b/runtime/src/iree/hal/cuda/dynamic_symbols.h
new file mode 100644
index 0000000..ccdba6c
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/dynamic_symbols.h
@@ -0,0 +1,47 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_DYNAMIC_SYMBOLS_H_
+#define IREE_HAL_CUDA_DYNAMIC_SYMBOLS_H_
+
+#include "iree/base/api.h"
+#include "iree/base/internal/dynamic_library.h"
+#include "iree/hal/cuda/cuda_headers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// DynamicSymbols allow loading dynamically a subset of CUDA driver API. It
+// loads all the function declared in `dynamic_symbol_tables.def` and fail if
+// any of the symbol is not available. The functions signatures are matching
+// the declarations in `cuda.h`.
+typedef struct iree_hal_cuda_dynamic_symbols_t {
+ iree_dynamic_library_t* loader_library;
+
+#define CU_PFN_DECL(cudaSymbolName, ...) \
+ CUresult (*cudaSymbolName)(__VA_ARGS__);
+#include "iree/hal/cuda/dynamic_symbol_tables.h" // IWYU pragma: export
+#undef CU_PFN_DECL
+} iree_hal_cuda_dynamic_symbols_t;
+
+// Initializes |out_syms| in-place with dynamically loaded CUDA symbols.
+// iree_hal_cuda_dynamic_symbols_deinitialize must be used to release the
+// library resources.
+iree_status_t iree_hal_cuda_dynamic_symbols_initialize(
+ iree_allocator_t allocator, iree_hal_cuda_dynamic_symbols_t* out_syms);
+
+// Deinitializes |syms| by unloading the backing library. All function pointers
+// will be invalidated. They _may_ still work if there are other reasons the
+// library remains loaded so be careful.
+void iree_hal_cuda_dynamic_symbols_deinitialize(
+ iree_hal_cuda_dynamic_symbols_t* syms);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_DYNAMIC_SYMBOLS_H_
diff --git a/runtime/src/iree/hal/cuda/dynamic_symbols_test.cc b/runtime/src/iree/hal/cuda/dynamic_symbols_test.cc
new file mode 100644
index 0000000..ab5136c
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/dynamic_symbols_test.cc
@@ -0,0 +1,50 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/dynamic_symbols.h"
+
+#include <iostream>
+
+#include "iree/base/api.h"
+#include "iree/testing/gtest.h"
+
+namespace iree {
+namespace hal {
+namespace cuda {
+namespace {
+
+#define CUDE_CHECK_ERRORS(expr) \
+ { \
+ CUresult status = expr; \
+ ASSERT_EQ(CUDA_SUCCESS, status); \
+ }
+
+TEST(DynamicSymbolsTest, CreateFromSystemLoader) {
+ iree_hal_cuda_dynamic_symbols_t symbols;
+ iree_status_t status = iree_hal_cuda_dynamic_symbols_initialize(
+ iree_allocator_system(), &symbols);
+ if (!iree_status_is_ok(status)) {
+ iree_status_fprint(stderr, status);
+ iree_status_ignore(status);
+ std::cerr << "Symbols cannot be loaded, skipping test.";
+ GTEST_SKIP();
+ }
+
+ int device_count = 0;
+ CUDE_CHECK_ERRORS(symbols.cuInit(0));
+ CUDE_CHECK_ERRORS(symbols.cuDeviceGetCount(&device_count));
+ if (device_count > 0) {
+ CUdevice device;
+ CUDE_CHECK_ERRORS(symbols.cuDeviceGet(&device, /*ordinal=*/0));
+ }
+
+ iree_hal_cuda_dynamic_symbols_deinitialize(&symbols);
+}
+
+} // namespace
+} // namespace cuda
+} // namespace hal
+} // namespace iree
diff --git a/runtime/src/iree/hal/cuda/event_semaphore.c b/runtime/src/iree/hal/cuda/event_semaphore.c
new file mode 100644
index 0000000..17a5bfb
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/event_semaphore.c
@@ -0,0 +1,93 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/event_semaphore.h"
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+
+typedef struct iree_hal_cuda_semaphore_t {
+ iree_hal_resource_t resource;
+ iree_hal_cuda_context_wrapper_t* context;
+ uint64_t initial_value;
+} iree_hal_cuda_semaphore_t;
+
+static const iree_hal_semaphore_vtable_t iree_hal_cuda_semaphore_vtable;
+
+static iree_hal_cuda_semaphore_t* iree_hal_cuda_semaphore_cast(
+ iree_hal_semaphore_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_cuda_semaphore_vtable);
+ return (iree_hal_cuda_semaphore_t*)base_value;
+}
+
+iree_status_t iree_hal_cuda_semaphore_create(
+ iree_hal_cuda_context_wrapper_t* context, uint64_t initial_value,
+ iree_hal_semaphore_t** out_semaphore) {
+ IREE_ASSERT_ARGUMENT(context);
+ IREE_ASSERT_ARGUMENT(out_semaphore);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_cuda_semaphore_t* semaphore = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ context->host_allocator, sizeof(*semaphore), (void**)&semaphore);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_cuda_semaphore_vtable,
+ &semaphore->resource);
+ semaphore->context = context;
+ semaphore->initial_value = initial_value;
+ *out_semaphore = (iree_hal_semaphore_t*)semaphore;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_cuda_semaphore_destroy(
+ iree_hal_semaphore_t* base_semaphore) {
+ iree_hal_cuda_semaphore_t* semaphore =
+ iree_hal_cuda_semaphore_cast(base_semaphore);
+ iree_allocator_t host_allocator = semaphore->context->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(host_allocator, semaphore);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_cuda_semaphore_query(
+ iree_hal_semaphore_t* base_semaphore, uint64_t* out_value) {
+ // TODO: Support semaphores completely.
+ *out_value = 0;
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED, "Not impemented on CUDA");
+}
+
+static iree_status_t iree_hal_cuda_semaphore_signal(
+ iree_hal_semaphore_t* base_semaphore, uint64_t new_value) {
+ // TODO: Support semaphores completely. Return OK currently as everything is
+ // synchronized for each submit to allow things to run.
+ return iree_ok_status();
+}
+
+static void iree_hal_cuda_semaphore_fail(iree_hal_semaphore_t* base_semaphore,
+ iree_status_t status) {}
+
+static iree_status_t iree_hal_cuda_semaphore_wait(
+ iree_hal_semaphore_t* base_semaphore, uint64_t value,
+ iree_timeout_t timeout) {
+ // TODO: Support semaphores completely. Return OK currently as everything is
+ // synchronized for each submit to allow things to run.
+ return iree_ok_status();
+}
+
+static const iree_hal_semaphore_vtable_t iree_hal_cuda_semaphore_vtable = {
+ .destroy = iree_hal_cuda_semaphore_destroy,
+ .query = iree_hal_cuda_semaphore_query,
+ .signal = iree_hal_cuda_semaphore_signal,
+ .fail = iree_hal_cuda_semaphore_fail,
+ .wait = iree_hal_cuda_semaphore_wait,
+};
diff --git a/runtime/src/iree/hal/cuda/event_semaphore.h b/runtime/src/iree/hal/cuda/event_semaphore.h
new file mode 100644
index 0000000..3580bf2
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/event_semaphore.h
@@ -0,0 +1,30 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_SEMAPHORE_H_
+#define IREE_HAL_CUDA_SEMAPHORE_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cuda/context_wrapper.h"
+#include "iree/hal/cuda/status_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Create a cuda allocator.
+iree_status_t iree_hal_cuda_semaphore_create(
+ iree_hal_cuda_context_wrapper_t* context, uint64_t initial_value,
+ iree_hal_semaphore_t** out_semaphore);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_SEMAPHORE_H_
diff --git a/runtime/src/iree/hal/cuda/executable_layout.c b/runtime/src/iree/hal/cuda/executable_layout.c
new file mode 100644
index 0000000..892e03f
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/executable_layout.c
@@ -0,0 +1,126 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/executable_layout.h"
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/cuda/descriptor_set_layout.h"
+
+typedef struct iree_hal_cuda_executable_layout_t {
+ iree_hal_resource_t resource;
+ iree_hal_cuda_context_wrapper_t* context;
+ iree_host_size_t push_constant_base_index;
+ iree_host_size_t push_constant_count;
+ iree_host_size_t set_layout_count;
+ iree_hal_descriptor_set_layout_t* set_layouts[];
+} iree_hal_cuda_executable_layout_t;
+
+static const iree_hal_executable_layout_vtable_t
+ iree_hal_cuda_executable_layout_vtable;
+
+static iree_hal_cuda_executable_layout_t* iree_hal_cuda_executable_layout_cast(
+ iree_hal_executable_layout_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_cuda_executable_layout_vtable);
+ return (iree_hal_cuda_executable_layout_t*)base_value;
+}
+
+static void iree_hal_cuda_executable_layout_destroy(
+ iree_hal_executable_layout_t* base_executable_layout) {
+ iree_hal_cuda_executable_layout_t* executable_layout =
+ iree_hal_cuda_executable_layout_cast(base_executable_layout);
+ iree_allocator_t host_allocator = executable_layout->context->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ for (iree_host_size_t i = 0; i < executable_layout->set_layout_count; ++i) {
+ iree_hal_descriptor_set_layout_release(executable_layout->set_layouts[i]);
+ }
+ iree_allocator_free(host_allocator, executable_layout);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_hal_cuda_executable_layout_create(
+ iree_hal_cuda_context_wrapper_t* context, iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ iree_host_size_t push_constant_count,
+ iree_hal_executable_layout_t** out_executable_layout) {
+ IREE_ASSERT_ARGUMENT(context);
+ IREE_ASSERT_ARGUMENT(!set_layout_count || set_layouts);
+ IREE_ASSERT_ARGUMENT(out_executable_layout);
+ *out_executable_layout = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ if (push_constant_count > IREE_HAL_CUDA_MAX_PUSH_CONSTANT_COUNT) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "push constant count %zu over the limit of %d",
+ push_constant_count,
+ IREE_HAL_CUDA_MAX_PUSH_CONSTANT_COUNT);
+ }
+
+ // Currently the executable layout doesn't do anything.
+ // TODO: Handle creating the argument layout at that time hadling both push
+ // constant and buffers.
+ iree_hal_cuda_executable_layout_t* executable_layout = NULL;
+ iree_host_size_t total_size =
+ sizeof(*executable_layout) +
+ set_layout_count * sizeof(*executable_layout->set_layouts);
+ iree_status_t status = iree_allocator_malloc(
+ context->host_allocator, total_size, (void**)&executable_layout);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_cuda_executable_layout_vtable,
+ &executable_layout->resource);
+ executable_layout->context = context;
+ executable_layout->set_layout_count = set_layout_count;
+ iree_host_size_t binding_number = 0;
+ for (iree_host_size_t i = 0; i < set_layout_count; ++i) {
+ executable_layout->set_layouts[i] = set_layouts[i];
+ iree_hal_descriptor_set_layout_retain(set_layouts[i]);
+ binding_number +=
+ iree_hal_cuda_descriptor_set_layout_binding_count(set_layouts[i]);
+ }
+ executable_layout->push_constant_base_index = binding_number;
+ executable_layout->push_constant_count = push_constant_count;
+ *out_executable_layout = (iree_hal_executable_layout_t*)executable_layout;
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_host_size_t iree_hal_cuda_base_binding_index(
+ iree_hal_executable_layout_t* base_executable_layout, uint32_t set) {
+ iree_hal_cuda_executable_layout_t* executable_layout =
+ iree_hal_cuda_executable_layout_cast(base_executable_layout);
+ iree_host_size_t base_binding = 0;
+ for (iree_host_size_t i = 0; i < set; ++i) {
+ iree_host_size_t binding_count =
+ iree_hal_cuda_descriptor_set_layout_binding_count(
+ executable_layout->set_layouts[i]);
+ base_binding += binding_count;
+ }
+ return base_binding;
+}
+
+iree_host_size_t iree_hal_cuda_push_constant_index(
+ iree_hal_executable_layout_t* base_executable_layout) {
+ iree_hal_cuda_executable_layout_t* executable_layout =
+ iree_hal_cuda_executable_layout_cast(base_executable_layout);
+ return executable_layout->push_constant_base_index;
+}
+
+iree_host_size_t iree_hal_cuda_executable_layout_num_constants(
+ iree_hal_executable_layout_t* base_executable_layout) {
+ iree_hal_cuda_executable_layout_t* executable_layout =
+ iree_hal_cuda_executable_layout_cast(base_executable_layout);
+ return executable_layout->push_constant_count;
+}
+
+static const iree_hal_executable_layout_vtable_t
+ iree_hal_cuda_executable_layout_vtable = {
+ .destroy = iree_hal_cuda_executable_layout_destroy,
+};
diff --git a/runtime/src/iree/hal/cuda/executable_layout.h b/runtime/src/iree/hal/cuda/executable_layout.h
new file mode 100644
index 0000000..b7810e0
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/executable_layout.h
@@ -0,0 +1,43 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_EXECUTABLE_LAYOUT_H_
+#define IREE_HAL_CUDA_EXECUTABLE_LAYOUT_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cuda/context_wrapper.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#define IREE_HAL_CUDA_MAX_PUSH_CONSTANT_COUNT 64
+
+// Creates the kernel arguments.
+iree_status_t iree_hal_cuda_executable_layout_create(
+ iree_hal_cuda_context_wrapper_t* context, iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ iree_host_size_t push_constant_count,
+ iree_hal_executable_layout_t** out_executable_layout);
+
+// Return the base binding index for the given set.
+iree_host_size_t iree_hal_cuda_base_binding_index(
+ iree_hal_executable_layout_t* executable_layout, uint32_t set);
+
+// Return the base index for push constant data.
+iree_host_size_t iree_hal_cuda_push_constant_index(
+ iree_hal_executable_layout_t* base_executable_layout);
+
+// Return the number of constants in the executable layout.
+iree_host_size_t iree_hal_cuda_executable_layout_num_constants(
+ iree_hal_executable_layout_t* base_executable_layout);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_EXECUTABLE_LAYOUT_H_
diff --git a/runtime/src/iree/hal/cuda/graph_command_buffer.c b/runtime/src/iree/hal/cuda/graph_command_buffer.c
new file mode 100644
index 0000000..d5ea450
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/graph_command_buffer.c
@@ -0,0 +1,583 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/graph_command_buffer.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/cuda/cuda_buffer.h"
+#include "iree/hal/cuda/dynamic_symbols.h"
+#include "iree/hal/cuda/executable_layout.h"
+#include "iree/hal/cuda/native_executable.h"
+#include "iree/hal/cuda/status_util.h"
+#include "iree/hal/utils/resource_set.h"
+
+#define IREE_HAL_CUDA_MAX_BINDING_COUNT 64
+// Kernel arguments contains binding and push constants.
+#define IREE_HAL_CUDA_MAX_KERNEL_ARG 128
+
+// Command buffer implementation that directly maps to cuda graph.
+// This records the commands on the calling thread without additional threading
+// indirection.
+typedef struct iree_hal_cuda_graph_command_buffer_t {
+ iree_hal_command_buffer_t base;
+ iree_hal_cuda_context_wrapper_t* context;
+
+ // Maintains a reference to all resources used within the command buffer.
+ // Reset on each begin.
+ iree_hal_resource_set_t* resource_set;
+
+ // Staging arena used for host->device transfers.
+ // Used for when we need CUDA to be able to reference memory as it performs
+ // asynchronous operations.
+ iree_arena_allocator_t arena;
+
+ CUgraph graph;
+ CUgraphExec exec;
+
+ // Keep track of the last node added to the command buffer as we are currently
+ // serializing all the nodes (each node depends on the previous one).
+ CUgraphNode last_node;
+ int32_t push_constant[IREE_HAL_CUDA_MAX_PUSH_CONSTANT_COUNT];
+ // Keep track of the current set of kernel arguments.
+ void* current_descriptor[];
+} iree_hal_cuda_graph_command_buffer_t;
+
+static const iree_hal_command_buffer_vtable_t
+ iree_hal_cuda_graph_command_buffer_vtable;
+
+static iree_hal_cuda_graph_command_buffer_t*
+iree_hal_cuda_graph_command_buffer_cast(iree_hal_command_buffer_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_cuda_graph_command_buffer_vtable);
+ return (iree_hal_cuda_graph_command_buffer_t*)base_value;
+}
+
+iree_status_t iree_hal_cuda_graph_command_buffer_create(
+ iree_hal_device_t* device, iree_hal_cuda_context_wrapper_t* context,
+ iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ iree_arena_block_pool_t* block_pool,
+ iree_hal_command_buffer_t** out_command_buffer) {
+ IREE_ASSERT_ARGUMENT(context);
+ IREE_ASSERT_ARGUMENT(block_pool);
+ IREE_ASSERT_ARGUMENT(out_command_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_cuda_graph_command_buffer_t* command_buffer = NULL;
+ size_t total_size = sizeof(*command_buffer) +
+ IREE_HAL_CUDA_MAX_KERNEL_ARG * sizeof(void*) +
+ IREE_HAL_CUDA_MAX_KERNEL_ARG * sizeof(CUdeviceptr);
+ iree_status_t status = iree_allocator_malloc(
+ context->host_allocator, total_size, (void**)&command_buffer);
+ if (iree_status_is_ok(status)) {
+ iree_hal_command_buffer_initialize(
+ device, mode, command_categories, queue_affinity,
+ &iree_hal_cuda_graph_command_buffer_vtable, &command_buffer->base);
+ command_buffer->context = context;
+ iree_arena_initialize(block_pool, &command_buffer->arena);
+ command_buffer->graph = NULL;
+ command_buffer->exec = NULL;
+ command_buffer->last_node = NULL;
+
+ CUdeviceptr* device_ptrs =
+ (CUdeviceptr*)(command_buffer->current_descriptor +
+ IREE_HAL_CUDA_MAX_KERNEL_ARG);
+ for (size_t i = 0; i < IREE_HAL_CUDA_MAX_KERNEL_ARG; i++) {
+ command_buffer->current_descriptor[i] = &device_ptrs[i];
+ }
+
+ status = iree_hal_resource_set_allocate(block_pool,
+ &command_buffer->resource_set);
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_command_buffer = &command_buffer->base;
+ } else {
+ iree_hal_command_buffer_release(&command_buffer->base);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_cuda_graph_command_buffer_reset(
+ iree_hal_cuda_graph_command_buffer_t* command_buffer) {
+ if (command_buffer->graph != NULL) {
+ CUDA_IGNORE_ERROR(command_buffer->context->syms,
+ cuGraphDestroy(command_buffer->graph));
+ command_buffer->graph = NULL;
+ }
+
+ if (command_buffer->exec != NULL) {
+ CUDA_IGNORE_ERROR(command_buffer->context->syms,
+ cuGraphExecDestroy(command_buffer->exec));
+ command_buffer->exec = NULL;
+ }
+
+ command_buffer->last_node = NULL;
+
+ iree_hal_resource_set_reset(command_buffer->resource_set);
+ iree_arena_reset(&command_buffer->arena);
+}
+
+static void iree_hal_cuda_graph_command_buffer_destroy(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_cuda_graph_command_buffer_t* command_buffer =
+ iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_cuda_graph_command_buffer_reset(command_buffer);
+ iree_hal_resource_set_free(command_buffer->resource_set);
+ iree_arena_deinitialize(&command_buffer->arena);
+ iree_allocator_free(command_buffer->context->host_allocator, command_buffer);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+CUgraphExec iree_hal_cuda_graph_command_buffer_handle(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_cuda_graph_command_buffer_t* command_buffer =
+ iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
+ return command_buffer->exec;
+}
+
+bool iree_hal_cuda_graph_command_buffer_isa(
+ iree_hal_command_buffer_t* command_buffer) {
+ return iree_hal_command_buffer_dyn_cast(
+ command_buffer, &iree_hal_cuda_graph_command_buffer_vtable);
+}
+
+static void* iree_hal_cuda_graph_command_buffer_dyn_cast(
+ iree_hal_command_buffer_t* command_buffer, const void* vtable) {
+ if (vtable == &iree_hal_cuda_graph_command_buffer_vtable) {
+ IREE_HAL_ASSERT_TYPE(command_buffer, vtable);
+ return command_buffer;
+ }
+ return NULL;
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_begin(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_cuda_graph_command_buffer_t* command_buffer =
+ iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
+
+ // Reset any prior recorded commands.
+ iree_hal_cuda_graph_command_buffer_reset(command_buffer);
+
+ // Create a new empty graph to record into.
+ CUDA_RETURN_IF_ERROR(command_buffer->context->syms,
+ cuGraphCreate(&command_buffer->graph, /*flags=*/0),
+ "cuGraphCreate");
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_end(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_cuda_graph_command_buffer_t* command_buffer =
+ iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
+
+ // Reset state used during recording.
+ command_buffer->last_node = NULL;
+
+ // Compile the graph.
+ CUgraphNode error_node = NULL;
+ iree_status_t status =
+ CU_RESULT_TO_STATUS(command_buffer->context->syms,
+ cuGraphInstantiate(&command_buffer->exec,
+ command_buffer->graph, &error_node,
+ /*logBuffer=*/NULL,
+ /*bufferSize=*/0));
+ if (iree_status_is_ok(status)) {
+ // No longer need the source graph used for construction.
+ CUDA_IGNORE_ERROR(command_buffer->context->syms,
+ cuGraphDestroy(command_buffer->graph));
+ command_buffer->graph = NULL;
+ }
+
+ return iree_ok_status();
+}
+
+static void iree_hal_cuda_graph_command_buffer_begin_debug_group(
+ iree_hal_command_buffer_t* base_command_buffer, iree_string_view_t label,
+ iree_hal_label_color_t label_color,
+ const iree_hal_label_location_t* location) {
+ // TODO(benvanik): tracy event stack.
+}
+
+static void iree_hal_cuda_graph_command_buffer_end_debug_group(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ // TODO(benvanik): tracy event stack.
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_execution_barrier(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_hal_execution_barrier_flags_t flags,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ // TODO: Implement barrier with Graph edges. Right now all the nodes are
+ // serialized.
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_signal_event(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ // TODO: Implement barrier with Graph edges. Right now all the nodes are
+ // serialized.
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_reset_event(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ // TODO: Implement barrier with Graph edges. Right now all the nodes are
+ // serialized.
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_wait_events(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_host_size_t event_count, const iree_hal_event_t** events,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ // TODO: Implement barrier with Graph edges. Right now all the nodes are
+ // serialized.
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_discard_buffer(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_buffer_t* buffer) {
+ // We could mark the memory as invalidated so that if managed CUDA does not
+ // try to copy it back to the host.
+ return iree_ok_status();
+}
+
+// Splats a pattern value of 1, 2, or 4 bytes out to a 4 byte value.
+static uint32_t iree_hal_cuda_splat_pattern(const void* pattern,
+ size_t pattern_length) {
+ switch (pattern_length) {
+ case 1: {
+ uint32_t pattern_value = *(const uint8_t*)(pattern);
+ return (pattern_value << 24) | (pattern_value << 16) |
+ (pattern_value << 8) | pattern_value;
+ }
+ case 2: {
+ uint32_t pattern_value = *(const uint16_t*)(pattern);
+ return (pattern_value << 16) | pattern_value;
+ }
+ case 4: {
+ uint32_t pattern_value = *(const uint32_t*)(pattern);
+ return pattern_value;
+ }
+ default:
+ return 0; // Already verified that this should not be possible.
+ }
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_fill_buffer(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length, const void* pattern,
+ iree_host_size_t pattern_length) {
+ iree_hal_cuda_graph_command_buffer_t* command_buffer =
+ iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
+
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &target_buffer));
+
+ CUdeviceptr target_device_buffer = iree_hal_cuda_buffer_device_pointer(
+ iree_hal_buffer_allocated_buffer(target_buffer));
+ target_offset += iree_hal_buffer_byte_offset(target_buffer);
+ uint32_t dword_pattern = iree_hal_cuda_splat_pattern(pattern, pattern_length);
+ CUDA_MEMSET_NODE_PARAMS params = {
+ .dst = target_device_buffer + target_offset,
+ .elementSize = pattern_length,
+ // width in number of elements despite what driver documentation says.
+ .width = length / pattern_length,
+ .height = 1,
+ .value = dword_pattern,
+ };
+ // Serialize all the nodes for now.
+ CUgraphNode dep[] = {command_buffer->last_node};
+ size_t numNode = command_buffer->last_node ? 1 : 0;
+ CUDA_RETURN_IF_ERROR(
+ command_buffer->context->syms,
+ cuGraphAddMemsetNode(&command_buffer->last_node, command_buffer->graph,
+ dep, numNode, ¶ms,
+ command_buffer->context->cu_context),
+ "cuGraphAddMemsetNode");
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_update_buffer(
+ iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
+ iree_host_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length) {
+ iree_hal_cuda_graph_command_buffer_t* command_buffer =
+ iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
+
+ // Allocate scratch space in the arena for the data and copy it in.
+ // The update buffer API requires that the command buffer capture the host
+ // memory at the time the method is called in case the caller wants to reuse
+ // the memory. Because CUDA memcpys are async if we didn't copy it's possible
+ // for the reused memory to change before the stream reaches the copy
+ // operation and get the wrong data.
+ uint8_t* storage = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_arena_allocate(&command_buffer->arena, length, (void**)&storage));
+ memcpy(storage, (const uint8_t*)source_buffer + source_offset, length);
+
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &target_buffer));
+
+ CUdeviceptr target_device_buffer = iree_hal_cuda_buffer_device_pointer(
+ iree_hal_buffer_allocated_buffer(target_buffer));
+ CUDA_MEMCPY3D params = {
+ .srcMemoryType = CU_MEMORYTYPE_HOST,
+ .srcHost = storage,
+ .dstMemoryType = CU_MEMORYTYPE_DEVICE,
+ .dstDevice = target_device_buffer,
+ .dstXInBytes = iree_hal_buffer_byte_offset(target_buffer) + target_offset,
+ .WidthInBytes = length,
+ .Height = 1,
+ .Depth = 1,
+ };
+ // Serialize all the nodes for now.
+ CUgraphNode dep[] = {command_buffer->last_node};
+ size_t numNode = command_buffer->last_node ? 1 : 0;
+ CUDA_RETURN_IF_ERROR(
+ command_buffer->context->syms,
+ cuGraphAddMemcpyNode(&command_buffer->last_node, command_buffer->graph,
+ dep, numNode, ¶ms,
+ command_buffer->context->cu_context),
+ "cuGraphAddMemcpyNode");
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_copy_buffer(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length) {
+ iree_hal_cuda_graph_command_buffer_t* command_buffer =
+ iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
+
+ const iree_hal_buffer_t* buffers[2] = {source_buffer, target_buffer};
+ IREE_RETURN_IF_ERROR(
+ iree_hal_resource_set_insert(command_buffer->resource_set, 2, buffers));
+
+ CUdeviceptr target_device_buffer = iree_hal_cuda_buffer_device_pointer(
+ iree_hal_buffer_allocated_buffer(target_buffer));
+ target_offset += iree_hal_buffer_byte_offset(target_buffer);
+ CUdeviceptr source_device_buffer = iree_hal_cuda_buffer_device_pointer(
+ iree_hal_buffer_allocated_buffer(source_buffer));
+ source_offset += iree_hal_buffer_byte_offset(source_buffer);
+ CUDA_MEMCPY3D params = {
+ .srcMemoryType = CU_MEMORYTYPE_DEVICE,
+ .srcDevice = source_device_buffer,
+ .srcXInBytes = source_offset,
+ .dstMemoryType = CU_MEMORYTYPE_DEVICE,
+ .dstDevice = target_device_buffer,
+ .dstXInBytes = target_offset,
+ .WidthInBytes = length,
+ .Height = 1,
+ .Depth = 1,
+ };
+ // Serialize all the nodes for now.
+ CUgraphNode dep[] = {command_buffer->last_node};
+ size_t numNode = command_buffer->last_node ? 1 : 0;
+ CUDA_RETURN_IF_ERROR(
+ command_buffer->context->syms,
+ cuGraphAddMemcpyNode(&command_buffer->last_node, command_buffer->graph,
+ dep, numNode, ¶ms,
+ command_buffer->context->cu_context),
+ "cuGraphAddMemcpyNode");
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_push_constants(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, iree_host_size_t offset,
+ const void* values, iree_host_size_t values_length) {
+ iree_hal_cuda_graph_command_buffer_t* command_buffer =
+ iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
+ iree_host_size_t constant_base_index = offset / sizeof(int32_t);
+ for (iree_host_size_t i = 0; i < values_length / sizeof(int32_t); i++) {
+ command_buffer->push_constant[i + constant_base_index] =
+ ((uint32_t*)values)[i];
+ }
+ return iree_ok_status();
+}
+
+// Tie together the binding index and its index in |bindings| array.
+typedef struct {
+ uint32_t index;
+ uint32_t binding;
+} iree_hal_cuda_binding_mapping_t;
+
+// Helper to sort the binding based on their binding index.
+static int compare_binding_index(const void* a, const void* b) {
+ const iree_hal_cuda_binding_mapping_t buffer_a =
+ *(const iree_hal_cuda_binding_mapping_t*)a;
+ const iree_hal_cuda_binding_mapping_t buffer_b =
+ *(const iree_hal_cuda_binding_mapping_t*)b;
+ return buffer_a.binding < buffer_b.binding ? -1 : 1;
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_push_descriptor_set(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings) {
+ iree_hal_cuda_graph_command_buffer_t* command_buffer =
+ iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
+ iree_host_size_t base_binding =
+ iree_hal_cuda_base_binding_index(executable_layout, set);
+ // Convention with the compiler side. We map bindings to kernel argument.
+ // We compact the bindings to get a dense set of arguments and keep them order
+ // based on the binding index.
+ // Sort the binding based on the binding index and map the array index to the
+ // argument index.
+ iree_hal_cuda_binding_mapping_t binding_used[IREE_HAL_CUDA_MAX_BINDING_COUNT];
+ for (iree_host_size_t i = 0; i < binding_count; i++) {
+ iree_hal_cuda_binding_mapping_t buffer = {i, bindings[i].binding};
+ binding_used[i] = buffer;
+ }
+ qsort(binding_used, binding_count, sizeof(iree_hal_cuda_binding_mapping_t),
+ compare_binding_index);
+ IREE_ASSERT_LT(binding_count, IREE_HAL_CUDA_MAX_BINDING_COUNT,
+ "binding count larger than the max expected");
+ for (iree_host_size_t i = 0; i < binding_count; i++) {
+ const iree_hal_descriptor_set_binding_t* binding =
+ &bindings[binding_used[i].index];
+ CUdeviceptr device_ptr =
+ iree_hal_cuda_buffer_device_pointer(
+ iree_hal_buffer_allocated_buffer(binding->buffer)) +
+ iree_hal_buffer_byte_offset(binding->buffer) + binding->offset;
+ *((CUdeviceptr*)command_buffer->current_descriptor[i + base_binding]) =
+ device_ptr;
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &binding->buffer));
+ }
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_bind_descriptor_set(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_hal_descriptor_set_t* descriptor_set,
+ iree_host_size_t dynamic_offset_count,
+ const iree_device_size_t* dynamic_offsets) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "need cuda implementation");
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_dispatch(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z) {
+ iree_hal_cuda_graph_command_buffer_t* command_buffer =
+ iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &executable));
+ iree_hal_executable_layout_t* layout =
+ iree_hal_cuda_executable_get_layout(executable, entry_point);
+ iree_host_size_t num_constants =
+ iree_hal_cuda_executable_layout_num_constants(layout);
+ iree_host_size_t constant_base_index =
+ iree_hal_cuda_push_constant_index(layout);
+ // Patch the push constants in the kernel arguments.
+ for (iree_host_size_t i = 0; i < num_constants; i++) {
+ *((uint32_t*)command_buffer->current_descriptor[i + constant_base_index]) =
+ command_buffer->push_constant[i];
+ }
+ int32_t block_size_x, block_size_y, block_size_z;
+ int32_t shared_memory_size;
+ IREE_RETURN_IF_ERROR(iree_hal_cuda_native_executable_block_size(
+ executable, entry_point, &block_size_x, &block_size_y, &block_size_z));
+ IREE_RETURN_IF_ERROR(iree_hal_cuda_native_executable_shared_memory_size(
+ executable, entry_point, &shared_memory_size));
+ CUDA_KERNEL_NODE_PARAMS params = {
+ .func = iree_hal_cuda_native_executable_for_entry_point(executable,
+ entry_point),
+ .blockDimX = block_size_x,
+ .blockDimY = block_size_y,
+ .blockDimZ = block_size_z,
+ .gridDimX = workgroup_x,
+ .gridDimY = workgroup_y,
+ .gridDimZ = workgroup_z,
+ .kernelParams = command_buffer->current_descriptor,
+ .sharedMemBytes = shared_memory_size,
+ };
+ // Serialize all the nodes for now.
+ CUgraphNode dep[] = {command_buffer->last_node};
+ size_t numNodes = command_buffer->last_node ? 1 : 0;
+ CUDA_RETURN_IF_ERROR(
+ command_buffer->context->syms,
+ cuGraphAddKernelNode(&command_buffer->last_node, command_buffer->graph,
+ dep, numNodes, ¶ms),
+ "cuGraphAddKernelNode");
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_graph_command_buffer_dispatch_indirect(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ iree_hal_buffer_t* workgroups_buffer,
+ iree_device_size_t workgroups_offset) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "need cuda implementation");
+}
+
+CUgraphExec iree_hal_cuda_graph_command_buffer_exec(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_cuda_graph_command_buffer_t* command_buffer =
+ (iree_hal_cuda_graph_command_buffer_t*)iree_hal_command_buffer_dyn_cast(
+ base_command_buffer, &iree_hal_cuda_graph_command_buffer_vtable);
+ IREE_ASSERT_TRUE(command_buffer);
+ return command_buffer->exec;
+}
+
+static const iree_hal_command_buffer_vtable_t
+ iree_hal_cuda_graph_command_buffer_vtable = {
+ .destroy = iree_hal_cuda_graph_command_buffer_destroy,
+ .dyn_cast = iree_hal_cuda_graph_command_buffer_dyn_cast,
+ .begin = iree_hal_cuda_graph_command_buffer_begin,
+ .end = iree_hal_cuda_graph_command_buffer_end,
+ .begin_debug_group =
+ iree_hal_cuda_graph_command_buffer_begin_debug_group,
+ .end_debug_group = iree_hal_cuda_graph_command_buffer_end_debug_group,
+ .execution_barrier =
+ iree_hal_cuda_graph_command_buffer_execution_barrier,
+ .signal_event = iree_hal_cuda_graph_command_buffer_signal_event,
+ .reset_event = iree_hal_cuda_graph_command_buffer_reset_event,
+ .wait_events = iree_hal_cuda_graph_command_buffer_wait_events,
+ .discard_buffer = iree_hal_cuda_graph_command_buffer_discard_buffer,
+ .fill_buffer = iree_hal_cuda_graph_command_buffer_fill_buffer,
+ .update_buffer = iree_hal_cuda_graph_command_buffer_update_buffer,
+ .copy_buffer = iree_hal_cuda_graph_command_buffer_copy_buffer,
+ .push_constants = iree_hal_cuda_graph_command_buffer_push_constants,
+ .push_descriptor_set =
+ iree_hal_cuda_graph_command_buffer_push_descriptor_set,
+ .bind_descriptor_set =
+ iree_hal_cuda_graph_command_buffer_bind_descriptor_set,
+ .dispatch = iree_hal_cuda_graph_command_buffer_dispatch,
+ .dispatch_indirect =
+ iree_hal_cuda_graph_command_buffer_dispatch_indirect,
+};
diff --git a/runtime/src/iree/hal/cuda/graph_command_buffer.h b/runtime/src/iree/hal/cuda/graph_command_buffer.h
new file mode 100644
index 0000000..8ef4fda
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/graph_command_buffer.h
@@ -0,0 +1,46 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_GRAPH_COMMAND_BUFFER_H_
+#define IREE_HAL_CUDA_GRAPH_COMMAND_BUFFER_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cuda/context_wrapper.h"
+#include "iree/hal/cuda/cuda_headers.h"
+#include "iree/hal/cuda/dynamic_symbols.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_arena_block_pool_t iree_arena_block_pool_t;
+
+// Creates a command buffer that records into a CUDA graph.
+//
+// NOTE: the |block_pool| must remain live for the lifetime of the command
+// buffers that use it.
+iree_status_t iree_hal_cuda_graph_command_buffer_create(
+ iree_hal_device_t* device, iree_hal_cuda_context_wrapper_t* context,
+ iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ iree_arena_block_pool_t* block_pool,
+ iree_hal_command_buffer_t** out_command_buffer);
+
+// Returns true if |command_buffer| is a CUDA graph-based command buffer.
+bool iree_hal_cuda_graph_command_buffer_isa(
+ iree_hal_command_buffer_t* command_buffer);
+
+// Returns the native cuda graph associated to the command buffer.
+CUgraphExec iree_hal_cuda_graph_command_buffer_exec(
+ iree_hal_command_buffer_t* command_buffer);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_GRAPH_COMMAND_BUFFER_H_
diff --git a/runtime/src/iree/hal/cuda/native_executable.c b/runtime/src/iree/hal/cuda/native_executable.c
new file mode 100644
index 0000000..5046595
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/native_executable.c
@@ -0,0 +1,188 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/native_executable.h"
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/cuda/dynamic_symbols.h"
+#include "iree/hal/cuda/executable_layout.h"
+#include "iree/hal/cuda/status_util.h"
+
+// flatcc schemas:
+#include "iree/base/internal/flatcc/parsing.h"
+#include "iree/schemas/cuda_executable_def_reader.h"
+#include "iree/schemas/cuda_executable_def_verifier.h"
+
+typedef struct iree_hal_cuda_native_executable_function_t {
+ CUfunction cu_function;
+ uint32_t block_size_x;
+ uint32_t block_size_y;
+ uint32_t block_size_z;
+ uint32_t shared_memory_size;
+} iree_hal_cuda_native_executable_function_t;
+
+typedef struct iree_hal_cuda_native_executable_t {
+ iree_hal_resource_t resource;
+ iree_hal_cuda_context_wrapper_t* context;
+ iree_hal_executable_layout_t** executable_layouts;
+ iree_host_size_t entry_count;
+ CUmodule module;
+ iree_hal_cuda_native_executable_function_t entry_functions[];
+} iree_hal_cuda_native_executable_t;
+
+static const iree_hal_executable_vtable_t
+ iree_hal_cuda_native_executable_vtable;
+
+static iree_hal_cuda_native_executable_t* iree_hal_cuda_native_executable_cast(
+ iree_hal_executable_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_cuda_native_executable_vtable);
+ return (iree_hal_cuda_native_executable_t*)base_value;
+}
+
+iree_status_t iree_hal_cuda_native_executable_create(
+ iree_hal_cuda_context_wrapper_t* context,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable) {
+ IREE_ASSERT_ARGUMENT(context);
+ IREE_ASSERT_ARGUMENT(executable_params);
+ IREE_ASSERT_ARGUMENT(out_executable);
+ *out_executable = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_cuda_native_executable_t* executable = NULL;
+
+ // TODO: Verify the flat buffer.
+ iree_CUDAExecutableDef_table_t executable_def =
+ iree_CUDAExecutableDef_as_root(executable_params->executable_data.data);
+
+ // Create the kernel module.
+ flatbuffers_string_t ptx_image =
+ iree_CUDAExecutableDef_ptx_image_get(executable_def);
+ flatbuffers_uint32_vec_t shared_memory_sizes =
+ iree_CUDAExecutableDef_shared_memory_size_get(executable_def);
+ flatbuffers_string_vec_t entry_points_vec =
+ iree_CUDAExecutableDef_entry_points_get(executable_def);
+ iree_CUDABlockSizeDef_vec_t block_sizes_vec =
+ iree_CUDAExecutableDef_block_sizes_get(executable_def);
+ iree_host_size_t entry_count = flatbuffers_string_vec_len(entry_points_vec);
+ iree_host_size_t total_size =
+ sizeof(*executable) +
+ entry_count * sizeof(iree_hal_cuda_native_executable_function_t) +
+ entry_count * sizeof(iree_hal_executable_layout_t*);
+ iree_status_t status = iree_allocator_malloc(context->host_allocator,
+ total_size, (void**)&executable);
+ CUmodule module = NULL;
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_cuda_native_executable_vtable,
+ &executable->resource);
+ executable->module = module;
+ executable->context = context;
+
+ executable->executable_layouts =
+ (void*)((char*)executable + sizeof(*executable) +
+ entry_count *
+ sizeof(iree_hal_cuda_native_executable_function_t));
+ status = CU_RESULT_TO_STATUS(
+ context->syms, cuModuleLoadDataEx(&module, ptx_image, 0, NULL, NULL),
+ "cuModuleLoadDataEx");
+ }
+
+ executable->entry_count = entry_count;
+ for (iree_host_size_t i = 0; i < entry_count; i++) {
+ if (iree_status_is_ok(status)) {
+ CUfunction function = NULL;
+ const char* entry_name = flatbuffers_string_vec_at(entry_points_vec, i);
+ status = CU_RESULT_TO_STATUS(
+ context->syms, cuModuleGetFunction(&function, module, entry_name),
+ "cuModuleGetFunction");
+ if (iree_status_is_ok(status)) {
+ status = CU_RESULT_TO_STATUS(
+ context->syms,
+ cuFuncSetAttribute(function,
+ CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
+ shared_memory_sizes[i]),
+ "cuFuncSetAttribute");
+ }
+ executable->entry_functions[i].cu_function = function;
+ executable->entry_functions[i].block_size_x = block_sizes_vec[i].x;
+ executable->entry_functions[i].block_size_y = block_sizes_vec[i].y;
+ executable->entry_functions[i].block_size_z = block_sizes_vec[i].z;
+ executable->entry_functions[i].shared_memory_size =
+ shared_memory_sizes[i];
+ executable->executable_layouts[i] =
+ executable_params->executable_layouts[i];
+ iree_hal_executable_layout_retain(
+ executable_params->executable_layouts[i]);
+ }
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_executable = (iree_hal_executable_t*)executable;
+ } else {
+ iree_hal_executable_destroy((iree_hal_executable_t*)executable);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_cuda_native_executable_destroy(
+ iree_hal_executable_t* base_executable) {
+ iree_hal_cuda_native_executable_t* executable =
+ iree_hal_cuda_native_executable_cast(base_executable);
+ iree_allocator_t host_allocator = executable->context->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ for (iree_host_size_t i = 0; i < executable->entry_count; ++i) {
+ iree_hal_executable_layout_release(executable->executable_layouts[i]);
+ }
+ iree_allocator_free(host_allocator, executable);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+CUfunction iree_hal_cuda_native_executable_for_entry_point(
+ iree_hal_executable_t* base_executable, int32_t entry_point) {
+ iree_hal_cuda_native_executable_t* executable =
+ iree_hal_cuda_native_executable_cast(base_executable);
+ return executable->entry_functions[entry_point].cu_function;
+}
+
+iree_status_t iree_hal_cuda_native_executable_block_size(
+ iree_hal_executable_t* base_executable, int32_t entry_point, uint32_t* x,
+ uint32_t* y, uint32_t* z) {
+ iree_hal_cuda_native_executable_t* executable =
+ iree_hal_cuda_native_executable_cast(base_executable);
+ *x = executable->entry_functions[entry_point].block_size_x;
+ *y = executable->entry_functions[entry_point].block_size_y;
+ *z = executable->entry_functions[entry_point].block_size_z;
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_cuda_native_executable_shared_memory_size(
+ iree_hal_executable_t* base_executable, int32_t entry_point,
+ uint32_t* shared_memory_size) {
+ iree_hal_cuda_native_executable_t* executable =
+ iree_hal_cuda_native_executable_cast(base_executable);
+ *shared_memory_size =
+ executable->entry_functions[entry_point].shared_memory_size;
+ return iree_ok_status();
+}
+
+iree_hal_executable_layout_t* iree_hal_cuda_executable_get_layout(
+ iree_hal_executable_t* base_executable, int32_t entry_point) {
+ iree_hal_cuda_native_executable_t* executable =
+ iree_hal_cuda_native_executable_cast(base_executable);
+ return executable->executable_layouts[entry_point];
+}
+
+static const iree_hal_executable_vtable_t
+ iree_hal_cuda_native_executable_vtable = {
+ .destroy = iree_hal_cuda_native_executable_destroy,
+};
diff --git a/runtime/src/iree/hal/cuda/native_executable.h b/runtime/src/iree/hal/cuda/native_executable.h
new file mode 100644
index 0000000..8c19376
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/native_executable.h
@@ -0,0 +1,50 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_NATIVE_EXECUTABLE_H_
+#define IREE_HAL_CUDA_NATIVE_EXECUTABLE_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cuda/context_wrapper.h"
+#include "iree/hal/cuda/cuda_headers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates an executable from a PTX module. The module may contain several
+// kernels that can be extracted along with the associated block size.
+iree_status_t iree_hal_cuda_native_executable_create(
+ iree_hal_cuda_context_wrapper_t* context,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable);
+
+CUfunction iree_hal_cuda_native_executable_for_entry_point(
+ iree_hal_executable_t* executable, int32_t entry_point);
+
+// Return the block size of the given |entry_point| within the executable.
+iree_status_t iree_hal_cuda_native_executable_block_size(
+ iree_hal_executable_t* executable, int32_t entry_point, uint32_t* x,
+ uint32_t* y, uint32_t* z);
+
+// Return the shared memory size of the given |entry_point| within the
+// executable.
+iree_status_t iree_hal_cuda_native_executable_shared_memory_size(
+ iree_hal_executable_t* executable, int32_t entry_point,
+ uint32_t* shared_memory_size);
+
+/// Return the layout associated with the entry point.
+iree_hal_executable_layout_t* iree_hal_cuda_executable_get_layout(
+ iree_hal_executable_t* executable, int32_t entry_point);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_NATIVE_EXECUTABLE_H_
diff --git a/runtime/src/iree/hal/cuda/nop_executable_cache.c b/runtime/src/iree/hal/cuda/nop_executable_cache.c
new file mode 100644
index 0000000..c65795f
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/nop_executable_cache.c
@@ -0,0 +1,90 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/nop_executable_cache.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/cuda/native_executable.h"
+
+typedef struct iree_hal_cuda_nop_executable_cache_t {
+ iree_hal_resource_t resource;
+ iree_hal_cuda_context_wrapper_t* context;
+} iree_hal_cuda_nop_executable_cache_t;
+
+static const iree_hal_executable_cache_vtable_t
+ iree_hal_cuda_nop_executable_cache_vtable;
+
+static iree_hal_cuda_nop_executable_cache_t*
+iree_hal_cuda_nop_executable_cache_cast(
+ iree_hal_executable_cache_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_cuda_nop_executable_cache_vtable);
+ return (iree_hal_cuda_nop_executable_cache_t*)base_value;
+}
+
+iree_status_t iree_hal_cuda_nop_executable_cache_create(
+ iree_hal_cuda_context_wrapper_t* context, iree_string_view_t identifier,
+ iree_hal_executable_cache_t** out_executable_cache) {
+ IREE_ASSERT_ARGUMENT(out_executable_cache);
+ *out_executable_cache = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_cuda_nop_executable_cache_t* executable_cache = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(context->host_allocator, sizeof(*executable_cache),
+ (void**)&executable_cache);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_cuda_nop_executable_cache_vtable,
+ &executable_cache->resource);
+ executable_cache->context = context;
+
+ *out_executable_cache = (iree_hal_executable_cache_t*)executable_cache;
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_cuda_nop_executable_cache_destroy(
+ iree_hal_executable_cache_t* base_executable_cache) {
+ iree_hal_cuda_nop_executable_cache_t* executable_cache =
+ iree_hal_cuda_nop_executable_cache_cast(base_executable_cache);
+ iree_allocator_t host_allocator = executable_cache->context->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(host_allocator, executable_cache);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static bool iree_hal_cuda_nop_executable_cache_can_prepare_format(
+ iree_hal_executable_cache_t* base_executable_cache,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format) {
+ return iree_string_view_equal(executable_format,
+ iree_make_cstring_view("PTXE"));
+}
+
+static iree_status_t iree_hal_cuda_nop_executable_cache_prepare_executable(
+ iree_hal_executable_cache_t* base_executable_cache,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable) {
+ iree_hal_cuda_nop_executable_cache_t* executable_cache =
+ iree_hal_cuda_nop_executable_cache_cast(base_executable_cache);
+ return iree_hal_cuda_native_executable_create(
+ executable_cache->context, executable_params, out_executable);
+}
+
+static const iree_hal_executable_cache_vtable_t
+ iree_hal_cuda_nop_executable_cache_vtable = {
+ .destroy = iree_hal_cuda_nop_executable_cache_destroy,
+ .can_prepare_format =
+ iree_hal_cuda_nop_executable_cache_can_prepare_format,
+ .prepare_executable =
+ iree_hal_cuda_nop_executable_cache_prepare_executable,
+};
diff --git a/runtime/src/iree/hal/cuda/nop_executable_cache.h b/runtime/src/iree/hal/cuda/nop_executable_cache.h
new file mode 100644
index 0000000..dcb38a9
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/nop_executable_cache.h
@@ -0,0 +1,29 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_NOP_EXECUTABLE_CACHE_H_
+#define IREE_HAL_CUDA_NOP_EXECUTABLE_CACHE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cuda/context_wrapper.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a no-op executable cache that does not cache at all.
+// This is useful to isolate pipeline caching behavior and verify compilation
+// behavior.
+iree_status_t iree_hal_cuda_nop_executable_cache_create(
+ iree_hal_cuda_context_wrapper_t* context, iree_string_view_t identifier,
+ iree_hal_executable_cache_t** out_executable_cache);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_NOP_EXECUTABLE_CACHE_H_
diff --git a/runtime/src/iree/hal/cuda/registration/CMakeLists.txt b/runtime/src/iree/hal/cuda/registration/CMakeLists.txt
new file mode 100644
index 0000000..a26a0e9
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/registration/CMakeLists.txt
@@ -0,0 +1,31 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+iree_add_all_subdirs()
+
+if(NOT IREE_HAL_DRIVER_CUDA)
+ return()
+endif()
+
+iree_cc_library(
+ NAME
+ registration
+ HDRS
+ "driver_module.h"
+ SRCS
+ "driver_module.c"
+ DEPS
+ iree::base
+ iree::base::cc
+ iree::base::core_headers
+ iree::base::internal::flags
+ iree::base::tracing
+ iree::hal
+ iree::hal::cuda
+ DEFINES
+ "IREE_HAL_HAVE_CUDA_DRIVER_MODULE=1"
+ PUBLIC
+)
diff --git a/runtime/src/iree/hal/cuda/registration/driver_module.c b/runtime/src/iree/hal/cuda/registration/driver_module.c
new file mode 100644
index 0000000..8215dfb
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/registration/driver_module.c
@@ -0,0 +1,85 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/registration/driver_module.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/flags.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/cuda/api.h"
+
+#define IREE_HAL_CUDA_DRIVER_ID 0x43554441u // CUDA
+
+// Force using CUDA streams until we support command buffer caching to avoid the
+// overhead of graph creation.
+IREE_FLAG(
+ bool, cuda_use_streams, true,
+ "Use CUDA streams for executing command buffers (instead of graphs).");
+
+IREE_FLAG(bool, cuda_allow_inline_execution, false,
+ "Allow command buffers to execute inline against CUDA streams when "
+ "possible.");
+
+IREE_FLAG(int32_t, cuda_default_index, 0, "Index of the default CUDA device.");
+
+static iree_status_t iree_hal_cuda_driver_factory_enumerate(
+ void* self, const iree_hal_driver_info_t** out_driver_infos,
+ iree_host_size_t* out_driver_info_count) {
+ // NOTE: we could query supported cuda versions or featuresets here.
+ static const iree_hal_driver_info_t driver_infos[1] = {{
+ .driver_id = IREE_HAL_CUDA_DRIVER_ID,
+ .driver_name = iree_string_view_literal("cuda"),
+ .full_name = iree_string_view_literal("CUDA (dynamic)"),
+ }};
+ *out_driver_info_count = IREE_ARRAYSIZE(driver_infos);
+ *out_driver_infos = driver_infos;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_driver_factory_try_create(
+ void* self, iree_hal_driver_id_t driver_id, iree_allocator_t allocator,
+ iree_hal_driver_t** out_driver) {
+ IREE_ASSERT_ARGUMENT(out_driver);
+ *out_driver = NULL;
+ if (driver_id != IREE_HAL_CUDA_DRIVER_ID) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "no driver with ID %016" PRIu64
+ " is provided by this factory",
+ driver_id);
+ }
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_cuda_device_params_t default_params;
+ iree_hal_cuda_device_params_initialize(&default_params);
+ if (FLAG_cuda_use_streams) {
+ default_params.command_buffer_mode =
+ IREE_HAL_CUDA_COMMAND_BUFFER_MODE_STREAM;
+ }
+ default_params.allow_inline_execution = FLAG_cuda_allow_inline_execution;
+
+ iree_hal_cuda_driver_options_t driver_options;
+ iree_hal_cuda_driver_options_initialize(&driver_options);
+ driver_options.default_device_index = FLAG_cuda_default_index;
+
+ iree_string_view_t identifier = iree_make_cstring_view("cuda");
+ iree_status_t status = iree_hal_cuda_driver_create(
+ identifier, &default_params, &driver_options, allocator, out_driver);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_cuda_driver_module_register(iree_hal_driver_registry_t* registry) {
+ static const iree_hal_driver_factory_t factory = {
+ .self = NULL,
+ .enumerate = iree_hal_cuda_driver_factory_enumerate,
+ .try_create = iree_hal_cuda_driver_factory_try_create,
+ };
+ return iree_hal_driver_registry_register_factory(registry, &factory);
+}
diff --git a/runtime/src/iree/hal/cuda/registration/driver_module.h b/runtime/src/iree/hal/cuda/registration/driver_module.h
new file mode 100644
index 0000000..1de341e
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/registration/driver_module.h
@@ -0,0 +1,24 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_REGISTRATION_DRIVER_MODULE_H_
+#define IREE_HAL_CUDA_REGISTRATION_DRIVER_MODULE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+IREE_API_EXPORT iree_status_t
+iree_hal_cuda_driver_module_register(iree_hal_driver_registry_t* registry);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_REGISTRATION_DRIVER_MODULE_H_
diff --git a/runtime/src/iree/hal/cuda/status_util.c b/runtime/src/iree/hal/cuda/status_util.c
new file mode 100644
index 0000000..7532ecd
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/status_util.c
@@ -0,0 +1,32 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/status_util.h"
+
+#include <stddef.h>
+
+#include "iree/hal/cuda/dynamic_symbols.h"
+
+iree_status_t iree_hal_cuda_result_to_status(
+ iree_hal_cuda_dynamic_symbols_t* syms, CUresult result, const char* file,
+ uint32_t line) {
+ if (IREE_LIKELY(result == CUDA_SUCCESS)) {
+ return iree_ok_status();
+ }
+
+ const char* error_name = NULL;
+ if (syms->cuGetErrorName(result, &error_name) != CUDA_SUCCESS) {
+ error_name = "UNKNOWN";
+ }
+
+ const char* error_string = NULL;
+ if (syms->cuGetErrorString(result, &error_string) != CUDA_SUCCESS) {
+ error_string = "Unknown error.";
+ }
+ return iree_make_status_with_location(file, line, IREE_STATUS_INTERNAL,
+ "CUDA driver error '%s' (%d): %s",
+ error_name, result, error_string);
+}
diff --git a/runtime/src/iree/hal/cuda/status_util.h b/runtime/src/iree/hal/cuda/status_util.h
new file mode 100644
index 0000000..270048e
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/status_util.h
@@ -0,0 +1,54 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_STATUS_UTIL_H_
+#define IREE_HAL_CUDA_STATUS_UTIL_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/cuda/dynamic_symbols.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Converts a CUresult to an iree_status_t.
+//
+// Usage:
+// iree_status_t status = CU_RESULT_TO_STATUS(cuDoThing(...));
+#define CU_RESULT_TO_STATUS(syms, expr, ...) \
+ iree_hal_cuda_result_to_status((syms), ((syms)->expr), __FILE__, __LINE__)
+
+// IREE_RETURN_IF_ERROR but implicitly converts the CUresult return value to
+// a Status.
+//
+// Usage:
+// CUDA_RETURN_IF_ERROR(cuDoThing(...), "message");
+#define CUDA_RETURN_IF_ERROR(syms, expr, ...) \
+ IREE_RETURN_IF_ERROR(iree_hal_cuda_result_to_status((syms), ((syms)->expr), \
+ __FILE__, __LINE__), \
+ __VA_ARGS__)
+
+// IREE_IGNORE_ERROR but implicitly converts the CUresult return value to a
+// Status.
+//
+// Usage:
+// CUDA_IGNORE_ERROR(cuDoThing(...));
+#define CUDA_IGNORE_ERROR(syms, expr) \
+ IREE_IGNORE_ERROR(iree_hal_cuda_result_to_status((syms), ((syms)->expr), \
+ __FILE__, __LINE__))
+
+// Converts a CUresult to a Status object.
+iree_status_t iree_hal_cuda_result_to_status(
+ iree_hal_cuda_dynamic_symbols_t* syms, CUresult result, const char* file,
+ uint32_t line);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_STATUS_UTIL_H_
diff --git a/runtime/src/iree/hal/cuda/stream_command_buffer.c b/runtime/src/iree/hal/cuda/stream_command_buffer.c
new file mode 100644
index 0000000..08b908b
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/stream_command_buffer.c
@@ -0,0 +1,411 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/cuda/stream_command_buffer.h"
+
+#include "iree/base/tracing.h"
+#include "iree/hal/cuda/cuda_buffer.h"
+#include "iree/hal/cuda/cuda_event.h"
+#include "iree/hal/cuda/executable_layout.h"
+#include "iree/hal/cuda/native_executable.h"
+#include "iree/hal/cuda/status_util.h"
+
+#define IREE_HAL_CUDA_MAX_BINDING_COUNT 64
+// Kernel arguments contains binding and push constants.
+#define IREE_HAL_CUDA_MAX_KERNEL_ARG 128
+// This records the commands on the calling thread without additional threading
+// indirection.
+
+typedef struct {
+ iree_hal_command_buffer_t base;
+ iree_hal_cuda_context_wrapper_t* context;
+ CUstream stream;
+
+ // Staging arena used for host->device transfers.
+ // Used for when we need CUDA to be able to reference memory as it performs
+ // asynchronous operations.
+ iree_arena_allocator_t arena;
+
+ int32_t push_constant[IREE_HAL_CUDA_MAX_PUSH_CONSTANT_COUNT];
+ // Keep track of the current set of kernel arguments.
+ void* current_descriptor[IREE_HAL_CUDA_MAX_KERNEL_ARG];
+ CUdeviceptr* device_ptrs[IREE_HAL_CUDA_MAX_KERNEL_ARG];
+} iree_hal_cuda_stream_command_buffer_t;
+
+static const iree_hal_command_buffer_vtable_t
+ iree_hal_cuda_stream_command_buffer_vtable;
+
+static iree_hal_cuda_stream_command_buffer_t*
+iree_hal_cuda_stream_command_buffer_cast(
+ iree_hal_command_buffer_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_cuda_stream_command_buffer_vtable);
+ return (iree_hal_cuda_stream_command_buffer_t*)base_value;
+}
+
+iree_status_t iree_hal_cuda_stream_command_buffer_create(
+ iree_hal_device_t* device, iree_hal_cuda_context_wrapper_t* context,
+ iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories, CUstream stream,
+ iree_arena_block_pool_t* block_pool,
+ iree_hal_command_buffer_t** out_command_buffer) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(context);
+ IREE_ASSERT_ARGUMENT(out_command_buffer);
+ *out_command_buffer = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_cuda_stream_command_buffer_t* command_buffer = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(context->host_allocator, sizeof(*command_buffer),
+ (void**)&command_buffer);
+ if (iree_status_is_ok(status)) {
+ iree_hal_command_buffer_initialize(
+ device, mode, command_categories, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &iree_hal_cuda_stream_command_buffer_vtable, &command_buffer->base);
+ command_buffer->context = context;
+ command_buffer->stream = stream;
+ iree_arena_initialize(block_pool, &command_buffer->arena);
+ for (size_t i = 0; i < IREE_HAL_CUDA_MAX_KERNEL_ARG; i++) {
+ command_buffer->current_descriptor[i] = &command_buffer->device_ptrs[i];
+ }
+ }
+
+ *out_command_buffer = &command_buffer->base;
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_cuda_stream_command_buffer_destroy(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_cuda_stream_command_buffer_t* command_buffer =
+ iree_hal_cuda_stream_command_buffer_cast(base_command_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_arena_deinitialize(&command_buffer->arena);
+ iree_allocator_free(command_buffer->context->host_allocator, command_buffer);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+bool iree_hal_cuda_stream_command_buffer_isa(
+ iree_hal_command_buffer_t* command_buffer) {
+ return iree_hal_command_buffer_dyn_cast(
+ command_buffer, &iree_hal_cuda_stream_command_buffer_vtable);
+}
+
+static void* iree_hal_cuda_stream_command_buffer_dyn_cast(
+ iree_hal_command_buffer_t* command_buffer, const void* vtable) {
+ if (vtable == &iree_hal_cuda_stream_command_buffer_vtable) {
+ IREE_HAL_ASSERT_TYPE(command_buffer, vtable);
+ return command_buffer;
+ }
+ return NULL;
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_begin(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_cuda_stream_command_buffer_t* command_buffer =
+ iree_hal_cuda_stream_command_buffer_cast(base_command_buffer);
+ iree_arena_reset(&command_buffer->arena);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_end(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_execution_barrier(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_hal_execution_barrier_flags_t flags,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ // TODO(jinchen62): implement CUDA barrier
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_signal_event(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ // TODO(jinchen62): implement CUDA barrier
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_reset_event(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ // TODO(jinchen62): implement CUDA barrier
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_wait_events(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_host_size_t event_count, const iree_hal_event_t** events,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ // TODO(jinchen62): implement CUDA barrier
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_discard_buffer(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_buffer_t* buffer) {
+ // We could mark the memory as invalidated so that if managed CUDA does not
+ // try to copy it back to the host.
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_fill_buffer(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length, const void* pattern,
+ iree_host_size_t pattern_length) {
+ iree_hal_cuda_stream_command_buffer_t* command_buffer =
+ iree_hal_cuda_stream_command_buffer_cast(base_command_buffer);
+
+ CUdeviceptr target_device_buffer = iree_hal_cuda_buffer_device_pointer(
+ iree_hal_buffer_allocated_buffer(target_buffer));
+ target_offset += iree_hal_buffer_byte_offset(target_buffer);
+ CUdeviceptr dst = target_device_buffer + target_offset;
+ size_t num_elements = length / pattern_length;
+ switch (pattern_length) {
+ case 4: {
+ CUDA_RETURN_IF_ERROR(
+ command_buffer->context->syms,
+ cuMemsetD32Async(dst, *(const uint32_t*)(pattern), num_elements,
+ command_buffer->stream),
+ "cuMemsetD32Async");
+ break;
+ }
+ case 2: {
+ CUDA_RETURN_IF_ERROR(
+ command_buffer->context->syms,
+ cuMemsetD16Async(dst, *(const uint16_t*)(pattern), num_elements,
+ command_buffer->stream),
+ "cuMemsetD16Async");
+ break;
+ }
+ case 1: {
+ CUDA_RETURN_IF_ERROR(
+ command_buffer->context->syms,
+ cuMemsetD8Async(dst, *(const uint8_t*)(pattern), num_elements,
+ command_buffer->stream),
+ "cuMemsetD8Async");
+ break;
+ }
+ default:
+ return iree_make_status(IREE_STATUS_INTERNAL,
+ "unsupported fill pattern length");
+ }
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_update_buffer(
+ iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
+ iree_host_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length) {
+ iree_hal_cuda_stream_command_buffer_t* command_buffer =
+ iree_hal_cuda_stream_command_buffer_cast(base_command_buffer);
+
+ // Allocate scratch space in the arena for the data and copy it in.
+ // The update buffer API requires that the command buffer capture the host
+ // memory at the time the method is called in case the caller wants to reuse
+ // the memory. Because CUDA memcpys are async if we didn't copy it's possible
+ // for the reused memory to change before the stream reaches the copy
+ // operation and get the wrong data.
+ const uint8_t* src = (const uint8_t*)source_buffer + source_offset;
+ if (command_buffer->arena.block_pool) {
+ uint8_t* storage = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_arena_allocate(&command_buffer->arena, length, (void**)&storage));
+ memcpy(storage, src, length);
+ src = storage;
+ }
+
+ // Issue the copy using the scratch memory as the source.
+ CUdeviceptr target_device_buffer = iree_hal_cuda_buffer_device_pointer(
+ iree_hal_buffer_allocated_buffer(target_buffer));
+ CUdeviceptr dst = target_device_buffer +
+ iree_hal_buffer_byte_offset(target_buffer) + target_offset;
+ CUDA_RETURN_IF_ERROR(
+ command_buffer->context->syms,
+ cuMemcpyHtoDAsync_v2(dst, src, length, command_buffer->stream),
+ "cuMemcpyHtoDAsync_v2");
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_copy_buffer(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length) {
+ iree_hal_cuda_stream_command_buffer_t* command_buffer =
+ iree_hal_cuda_stream_command_buffer_cast(base_command_buffer);
+
+ CUdeviceptr target_device_buffer = iree_hal_cuda_buffer_device_pointer(
+ iree_hal_buffer_allocated_buffer(target_buffer));
+ target_offset += iree_hal_buffer_byte_offset(target_buffer);
+ CUdeviceptr source_device_buffer = iree_hal_cuda_buffer_device_pointer(
+ iree_hal_buffer_allocated_buffer(source_buffer));
+ source_offset += iree_hal_buffer_byte_offset(source_buffer);
+ CUdeviceptr dst = target_device_buffer + target_offset;
+ CUdeviceptr src = source_device_buffer + source_offset;
+ CUDA_RETURN_IF_ERROR(command_buffer->context->syms,
+ cuMemcpyAsync(dst, src, length, command_buffer->stream),
+ "cuMemcpyAsync");
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_push_constants(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, iree_host_size_t offset,
+ const void* values, iree_host_size_t values_length) {
+ iree_hal_cuda_stream_command_buffer_t* command_buffer =
+ iree_hal_cuda_stream_command_buffer_cast(base_command_buffer);
+ iree_host_size_t constant_base_index = offset / sizeof(int32_t);
+ for (iree_host_size_t i = 0; i < values_length / sizeof(int32_t); i++) {
+ command_buffer->push_constant[i + constant_base_index] =
+ ((uint32_t*)values)[i];
+ }
+ return iree_ok_status();
+}
+
+// Tie together the binding index and its index in |bindings| array.
+typedef struct {
+ uint32_t index;
+ uint32_t binding;
+} iree_hal_cuda_binding_mapping_t;
+
+// Helper to sort the binding based on their binding index.
+static int compare_binding_index(const void* a, const void* b) {
+ const iree_hal_cuda_binding_mapping_t buffer_a =
+ *(const iree_hal_cuda_binding_mapping_t*)a;
+ const iree_hal_cuda_binding_mapping_t buffer_b =
+ *(const iree_hal_cuda_binding_mapping_t*)b;
+ return buffer_a.binding < buffer_b.binding ? -1 : 1;
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_push_descriptor_set(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings) {
+ iree_hal_cuda_stream_command_buffer_t* command_buffer =
+ iree_hal_cuda_stream_command_buffer_cast(base_command_buffer);
+ iree_host_size_t base_binding =
+ iree_hal_cuda_base_binding_index(executable_layout, set);
+ // Convention with the compiler side. We map bindings to kernel argument.
+ // We compact the bindings to get a dense set of arguments and keep them order
+ // based on the binding index.
+ // Sort the binding based on the binding index and map the array index to the
+ // argument index.
+ iree_hal_cuda_binding_mapping_t binding_used[IREE_HAL_CUDA_MAX_BINDING_COUNT];
+ for (iree_host_size_t i = 0; i < binding_count; i++) {
+ iree_hal_cuda_binding_mapping_t buffer = {i, bindings[i].binding};
+ binding_used[i] = buffer;
+ }
+ qsort(binding_used, binding_count, sizeof(iree_hal_cuda_binding_mapping_t),
+ compare_binding_index);
+ assert(binding_count < IREE_HAL_CUDA_MAX_BINDING_COUNT &&
+ "binding count larger than the max expected.");
+ for (iree_host_size_t i = 0; i < binding_count; i++) {
+ iree_hal_descriptor_set_binding_t binding = bindings[binding_used[i].index];
+ CUdeviceptr device_ptr =
+ iree_hal_cuda_buffer_device_pointer(
+ iree_hal_buffer_allocated_buffer(binding.buffer)) +
+ iree_hal_buffer_byte_offset(binding.buffer) + binding.offset;
+ *((CUdeviceptr*)command_buffer->current_descriptor[i + base_binding]) =
+ device_ptr;
+ }
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_bind_descriptor_set(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_hal_descriptor_set_t* descriptor_set,
+ iree_host_size_t dynamic_offset_count,
+ const iree_device_size_t* dynamic_offsets) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "need cuda implementation of bind descriptor set");
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_dispatch(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z) {
+ iree_hal_cuda_stream_command_buffer_t* command_buffer =
+ iree_hal_cuda_stream_command_buffer_cast(base_command_buffer);
+ iree_hal_executable_layout_t* layout =
+ iree_hal_cuda_executable_get_layout(executable, entry_point);
+ iree_host_size_t num_constants =
+ iree_hal_cuda_executable_layout_num_constants(layout);
+ iree_host_size_t constant_base_index =
+ iree_hal_cuda_push_constant_index(layout);
+ // Patch the push constants in the kernel arguments.
+ for (iree_host_size_t i = 0; i < num_constants; i++) {
+ *((uint32_t*)command_buffer->current_descriptor[i + constant_base_index]) =
+ command_buffer->push_constant[i];
+ }
+
+ int32_t block_size_x, block_size_y, block_size_z;
+ int32_t shared_memory_size;
+ IREE_RETURN_IF_ERROR(iree_hal_cuda_native_executable_block_size(
+ executable, entry_point, &block_size_x, &block_size_y, &block_size_z));
+ IREE_RETURN_IF_ERROR(iree_hal_cuda_native_executable_shared_memory_size(
+ executable, entry_point, &shared_memory_size));
+ CUfunction func =
+ iree_hal_cuda_native_executable_for_entry_point(executable, entry_point);
+ CUDA_RETURN_IF_ERROR(
+ command_buffer->context->syms,
+ cuLaunchKernel(func, workgroup_x, workgroup_y, workgroup_z, block_size_x,
+ block_size_y, block_size_z, shared_memory_size,
+ command_buffer->stream, command_buffer->current_descriptor,
+ NULL),
+ "cuLaunchKernel");
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_cuda_stream_command_buffer_dispatch_indirect(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ iree_hal_buffer_t* workgroups_buffer,
+ iree_device_size_t workgroups_offset) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "need cuda implementation of dispatch indirect");
+}
+
+static const iree_hal_command_buffer_vtable_t
+ iree_hal_cuda_stream_command_buffer_vtable = {
+ .destroy = iree_hal_cuda_stream_command_buffer_destroy,
+ .dyn_cast = iree_hal_cuda_stream_command_buffer_dyn_cast,
+ .begin = iree_hal_cuda_stream_command_buffer_begin,
+ .end = iree_hal_cuda_stream_command_buffer_end,
+ .execution_barrier =
+ iree_hal_cuda_stream_command_buffer_execution_barrier,
+ .signal_event = iree_hal_cuda_stream_command_buffer_signal_event,
+ .reset_event = iree_hal_cuda_stream_command_buffer_reset_event,
+ .wait_events = iree_hal_cuda_stream_command_buffer_wait_events,
+ .discard_buffer = iree_hal_cuda_stream_command_buffer_discard_buffer,
+ .fill_buffer = iree_hal_cuda_stream_command_buffer_fill_buffer,
+ .update_buffer = iree_hal_cuda_stream_command_buffer_update_buffer,
+ .copy_buffer = iree_hal_cuda_stream_command_buffer_copy_buffer,
+ .push_constants = iree_hal_cuda_stream_command_buffer_push_constants,
+ .push_descriptor_set =
+ iree_hal_cuda_stream_command_buffer_push_descriptor_set,
+ .bind_descriptor_set =
+ iree_hal_cuda_stream_command_buffer_bind_descriptor_set,
+ .dispatch = iree_hal_cuda_stream_command_buffer_dispatch,
+ .dispatch_indirect =
+ iree_hal_cuda_stream_command_buffer_dispatch_indirect,
+};
diff --git a/runtime/src/iree/hal/cuda/stream_command_buffer.h b/runtime/src/iree/hal/cuda/stream_command_buffer.h
new file mode 100644
index 0000000..d22d3ff
--- /dev/null
+++ b/runtime/src/iree/hal/cuda/stream_command_buffer.h
@@ -0,0 +1,46 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_CUDA_STREAM_COMMAND_BUFFER_H_
+#define IREE_HAL_CUDA_STREAM_COMMAND_BUFFER_H_
+
+#include "iree/base/internal/arena.h"
+#include "iree/hal/api.h"
+#include "iree/hal/cuda/context_wrapper.h"
+#include "iree/hal/cuda/cuda_headers.h"
+#include "iree/hal/cuda/dynamic_symbols.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a cuda stream command buffer that immediately issues commands against
+// the given |stream|. Access to |stream| must be synchronized by the user.
+//
+// If |block_pool| is non-NULL then the stream command buffer will retain copies
+// of input data until reset. If NULL then the caller must ensure the lifetime
+// of input data outlives the command buffer.
+//
+// This command buffer is used to both replay deferred command buffers and
+// perform inline execution. When replaying the scratch data required for things
+// like buffer updates is retained by the source deferred command buffer and as
+// such the |block_pool| and can be NULL to avoid a double copy.
+iree_status_t iree_hal_cuda_stream_command_buffer_create(
+ iree_hal_device_t* device, iree_hal_cuda_context_wrapper_t* context,
+ iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories, CUstream stream,
+ iree_arena_block_pool_t* block_pool,
+ iree_hal_command_buffer_t** out_command_buffer);
+
+// Returns true if |command_buffer| is a CUDA stream-based command buffer.
+bool iree_hal_cuda_stream_command_buffer_isa(
+ iree_hal_command_buffer_t* command_buffer);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_CUDA_STREAM_COMMAND_BUFFER_H_
diff --git a/runtime/src/iree/hal/descriptor_set.c b/runtime/src/iree/hal/descriptor_set.c
new file mode 100644
index 0000000..65bdd6d
--- /dev/null
+++ b/runtime/src/iree/hal/descriptor_set.c
@@ -0,0 +1,37 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/descriptor_set.h"
+
+#include <stddef.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/detail.h"
+#include "iree/hal/device.h"
+#include "iree/hal/resource.h"
+
+#define _VTABLE_DISPATCH(descriptor_set, method_name) \
+ IREE_HAL_VTABLE_DISPATCH(descriptor_set, iree_hal_descriptor_set, method_name)
+
+IREE_HAL_API_RETAIN_RELEASE(descriptor_set);
+
+IREE_API_EXPORT iree_status_t iree_hal_descriptor_set_create(
+ iree_hal_device_t* device, iree_hal_descriptor_set_layout_t* set_layout,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings,
+ iree_hal_descriptor_set_t** out_descriptor_set) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(set_layout);
+ IREE_ASSERT_ARGUMENT(!binding_count || bindings);
+ IREE_ASSERT_ARGUMENT(out_descriptor_set);
+ *out_descriptor_set = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status =
+ IREE_HAL_VTABLE_DISPATCH(device, iree_hal_device, create_descriptor_set)(
+ device, set_layout, binding_count, bindings, out_descriptor_set);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/descriptor_set.h b/runtime/src/iree/hal/descriptor_set.h
new file mode 100644
index 0000000..11c7957
--- /dev/null
+++ b/runtime/src/iree/hal/descriptor_set.h
@@ -0,0 +1,102 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_DESCRIPTOR_SET_H_
+#define IREE_HAL_DESCRIPTOR_SET_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/buffer.h"
+#include "iree/hal/descriptor_set_layout.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_hal_device_t iree_hal_device_t;
+
+//===----------------------------------------------------------------------===//
+// Types and Enums
+//===----------------------------------------------------------------------===//
+
+// Specifies a descriptor set binding.
+// The range specified by [offset, length) will be made available to executables
+// on the given binding. If the descriptor type is dynamic then the range will
+// be [offset + dynamic_offset, length).
+//
+// The IREE HAL buffer type may internally be offset; such offset is applied
+// here as if it were the base address of the buffer. Note that the offset will
+// be applied at the time the binding is recording into the command buffer.
+//
+// Maps to VkDescriptorSetBinding.
+typedef struct iree_hal_descriptor_set_binding_t {
+ // The binding number of this entry and corresponds to a resource of the
+ // same binding number in the executable interface.
+ uint32_t binding;
+ // Buffer bound to the binding number.
+ // May be NULL if the binding is not used by the executable.
+ iree_hal_buffer_t* buffer;
+ // Offset, in bytes, into the buffer that the binding starts at.
+ // If the descriptor type is dynamic this will be added to the dynamic
+ // offset provided during binding.
+ iree_device_size_t offset;
+ // Length, in bytes, of the buffer that is available to the executable.
+ // This can be IREE_WHOLE_BUFFER, however note that if the entire buffer
+ // contents are larger than supported by the device (~128MiB, usually) this
+ // will fail. If the descriptor type is dynamic this will be used for all
+ // ranges regardless of offset.
+ iree_device_size_t length;
+} iree_hal_descriptor_set_binding_t;
+
+//===----------------------------------------------------------------------===//
+// iree_hal_descriptor_set_t
+//===----------------------------------------------------------------------===//
+
+// Opaque handle to a descriptor set object.
+// A "descriptor" is effectively a bound memory range and each dispatch can use
+// one or more "descriptor sets" to access their I/O memory. Each descriptor set
+// conforms to a template "descriptor set layout".
+//
+// Maps to VkDescriptorSet:
+// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkDescriptorSet.html
+typedef struct iree_hal_descriptor_set_t iree_hal_descriptor_set_t;
+
+// Creates a descriptor set of the given layout and bindings.
+// Descriptor sets are immutable and retain their bindings.
+IREE_API_EXPORT iree_status_t iree_hal_descriptor_set_create(
+ iree_hal_device_t* device, iree_hal_descriptor_set_layout_t* set_layout,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings,
+ iree_hal_descriptor_set_t** out_descriptor_set);
+
+// Retains the given |set| for the caller.
+IREE_API_EXPORT void iree_hal_descriptor_set_retain(
+ iree_hal_descriptor_set_t* descriptor_set);
+
+// Releases the given |set| from the caller.
+IREE_API_EXPORT void iree_hal_descriptor_set_release(
+ iree_hal_descriptor_set_t* descriptor_set);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_descriptor_set_t implementation details
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_descriptor_set_vtable_t {
+ void(IREE_API_PTR* destroy)(iree_hal_descriptor_set_t* descriptor_set);
+} iree_hal_descriptor_set_vtable_t;
+IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_descriptor_set_vtable_t);
+
+IREE_API_EXPORT void iree_hal_descriptor_set_destroy(
+ iree_hal_descriptor_set_t* descriptor_set);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_DESCRIPTOR_SET_H_
diff --git a/runtime/src/iree/hal/descriptor_set_layout.c b/runtime/src/iree/hal/descriptor_set_layout.c
new file mode 100644
index 0000000..76a3893
--- /dev/null
+++ b/runtime/src/iree/hal/descriptor_set_layout.c
@@ -0,0 +1,38 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/descriptor_set_layout.h"
+
+#include <stddef.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/detail.h"
+#include "iree/hal/device.h"
+#include "iree/hal/resource.h"
+
+#define _VTABLE_DISPATCH(descriptor_set_layout, method_name) \
+ IREE_HAL_VTABLE_DISPATCH(descriptor_set_layout, \
+ iree_hal_descriptor_set_layout, method_name)
+
+IREE_HAL_API_RETAIN_RELEASE(descriptor_set_layout);
+
+IREE_API_EXPORT iree_status_t iree_hal_descriptor_set_layout_create(
+ iree_hal_device_t* device,
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ iree_hal_descriptor_set_layout_t** out_descriptor_set_layout) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(!binding_count || bindings);
+ IREE_ASSERT_ARGUMENT(out_descriptor_set_layout);
+ *out_descriptor_set_layout = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = IREE_HAL_VTABLE_DISPATCH(device, iree_hal_device,
+ create_descriptor_set_layout)(
+ device, usage_type, binding_count, bindings, out_descriptor_set_layout);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/descriptor_set_layout.h b/runtime/src/iree/hal/descriptor_set_layout.h
new file mode 100644
index 0000000..36e3940
--- /dev/null
+++ b/runtime/src/iree/hal/descriptor_set_layout.h
@@ -0,0 +1,104 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_DESCRIPTOR_SET_LAYOUT_H_
+#define IREE_HAL_DESCRIPTOR_SET_LAYOUT_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/buffer.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_hal_device_t iree_hal_device_t;
+
+//===----------------------------------------------------------------------===//
+// Types and Enums
+//===----------------------------------------------------------------------===//
+
+// Specifies the type of a descriptor in a descriptor set.
+typedef enum iree_hal_descriptor_type_e {
+ IREE_HAL_DESCRIPTOR_TYPE_UNIFORM_BUFFER = 6,
+ IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER = 7,
+ IREE_HAL_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC = 8,
+ IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC = 9,
+} iree_hal_descriptor_type_t;
+
+// Specifies the usage type of the descriptor set.
+typedef enum iree_hal_descriptor_set_layout_usage_type_e {
+ // Descriptor set will be initialized once and never changed.
+ IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_IMMUTABLE = 0,
+ // Descriptor set is never created and instead used with push descriptors.
+ IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_PUSH_ONLY = 1,
+} iree_hal_descriptor_set_layout_usage_type_t;
+
+// Specifies a descriptor set layout binding.
+//
+// Maps to VkDescriptorSetLayoutBinding.
+typedef struct iree_hal_descriptor_set_layout_binding_t {
+ // The binding number of this entry and corresponds to a resource of the
+ // same binding number in the executable interface.
+ uint32_t binding;
+ // Specifies which type of resource descriptors are used for this binding.
+ iree_hal_descriptor_type_t type;
+} iree_hal_descriptor_set_layout_binding_t;
+
+//===----------------------------------------------------------------------===//
+// iree_hal_descriptor_set_layout_t
+//===----------------------------------------------------------------------===//
+
+// Opaque handle to a descriptor set layout object.
+// A "descriptor" is effectively a bound memory range and each dispatch can use
+// one or more "descriptor sets" to access their I/O memory. A "descriptor set
+// layout" defines the types and usage semantics of the descriptors that make up
+// one set. Implementations can use this to verify program correctness and
+// accelerate reservation/allocation/computation of descriptor-related
+// operations.
+//
+// Maps to VkDescriptorSetLayout:
+// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkDescriptorSetLayout.html
+typedef struct iree_hal_descriptor_set_layout_t
+ iree_hal_descriptor_set_layout_t;
+
+// Creates a descriptor set layout with the given bindings.
+IREE_API_EXPORT iree_status_t iree_hal_descriptor_set_layout_create(
+ iree_hal_device_t* device,
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ iree_hal_descriptor_set_layout_t** out_descriptor_set_layout);
+
+// Retains the given |descriptor_set_layout| for the caller.
+IREE_API_EXPORT void iree_hal_descriptor_set_layout_retain(
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout);
+
+// Releases the given |descriptor_set_layout| from the caller.
+IREE_API_EXPORT void iree_hal_descriptor_set_layout_release(
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_descriptor_set_layout_t implementation details
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_descriptor_set_layout_vtable_t {
+ void(IREE_API_PTR* destroy)(
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout);
+} iree_hal_descriptor_set_layout_vtable_t;
+IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_descriptor_set_layout_vtable_t);
+
+IREE_API_EXPORT void iree_hal_descriptor_set_layout_destroy(
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_DESCRIPTOR_SET_LAYOUT_H_
diff --git a/runtime/src/iree/hal/detail.h b/runtime/src/iree/hal/detail.h
new file mode 100644
index 0000000..b4387e2
--- /dev/null
+++ b/runtime/src/iree/hal/detail.h
@@ -0,0 +1,67 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_DETAIL_H_
+#define IREE_HAL_DETAIL_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Dispatches a method on a HAL object vtable.
+//
+// In the future we can use this to compile in a mode where all indirect
+// dispatches are replaced by direct calls to static methods. For example,
+// by changing the macro to resolve to `iree_hal_[resource]_[method_name]` we
+// can rely on LTO to perform cross-compilation unit inlining/strip unused HAL
+// calls/etc. This will be particularly useful for super tiny builds
+// (web/embedded) where there's only ever one usable backend and debugging
+// features like command buffer validation aren't required.
+//
+// Some changes (mostly whackamole) are still required to fully support this and
+// it's critical there's a CI building with the setting as it's not hard to keep
+// working but very easy to accidentally break (by not routing through this
+// interface, using the vtable for object instance comparison, etc).
+#define IREE_HAL_VTABLE_DISPATCH(resource, type_prefix, method_name) \
+ ((const type_prefix##_vtable_t*)((const iree_hal_resource_t*)(resource)) \
+ ->vtable) \
+ ->method_name
+
+// Defines the iree_hal_<type_name>_retain/_release methods.
+#define IREE_HAL_API_RETAIN_RELEASE(type_name) \
+ IREE_API_EXPORT void iree_hal_##type_name##_destroy( \
+ iree_hal_##type_name##_t* type_name) { \
+ if (IREE_LIKELY(type_name)) { \
+ IREE_HAL_VTABLE_DISPATCH(type_name, iree_hal_##type_name, destroy) \
+ (type_name); \
+ } \
+ } \
+ IREE_API_EXPORT void iree_hal_##type_name##_retain( \
+ iree_hal_##type_name##_t* type_name) { \
+ if (IREE_LIKELY(type_name)) { \
+ iree_atomic_ref_count_inc( \
+ &((iree_hal_resource_t*)(type_name))->ref_count); \
+ } \
+ } \
+ IREE_API_EXPORT void iree_hal_##type_name##_release( \
+ iree_hal_##type_name##_t* type_name) { \
+ if (IREE_LIKELY(type_name) && \
+ iree_atomic_ref_count_dec( \
+ &((iree_hal_resource_t*)(type_name))->ref_count) == 1) { \
+ iree_hal_##type_name##_destroy(type_name); \
+ } \
+ }
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_DETAIL_H_
diff --git a/runtime/src/iree/hal/device.c b/runtime/src/iree/hal/device.c
new file mode 100644
index 0000000..d389906
--- /dev/null
+++ b/runtime/src/iree/hal/device.c
@@ -0,0 +1,287 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/device.h"
+
+#include "iree/base/tracing.h"
+#include "iree/hal/allocator.h"
+#include "iree/hal/buffer.h"
+#include "iree/hal/command_buffer.h"
+#include "iree/hal/detail.h"
+#include "iree/hal/resource.h"
+
+#define _VTABLE_DISPATCH(device, method_name) \
+ IREE_HAL_VTABLE_DISPATCH(device, iree_hal_device, method_name)
+
+IREE_HAL_API_RETAIN_RELEASE(device);
+
+IREE_API_EXPORT iree_string_view_t
+iree_hal_device_id(iree_hal_device_t* device) {
+ IREE_ASSERT_ARGUMENT(device);
+ return _VTABLE_DISPATCH(device, id)(device);
+}
+
+IREE_API_EXPORT iree_allocator_t
+iree_hal_device_host_allocator(iree_hal_device_t* device) {
+ IREE_ASSERT_ARGUMENT(device);
+ return _VTABLE_DISPATCH(device, host_allocator)(device);
+}
+
+IREE_API_EXPORT iree_hal_allocator_t* iree_hal_device_allocator(
+ iree_hal_device_t* device) {
+ IREE_ASSERT_ARGUMENT(device);
+ return _VTABLE_DISPATCH(device, device_allocator)(device);
+}
+
+IREE_API_EXPORT
+iree_status_t iree_hal_device_trim(iree_hal_device_t* device) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = _VTABLE_DISPATCH(device, trim)(device);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_device_query_i32(
+ iree_hal_device_t* device, iree_string_view_t category,
+ iree_string_view_t key, int32_t* out_value) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(out_value);
+
+ if (iree_string_view_equal(category,
+ iree_make_cstring_view("hal.device.id"))) {
+ *out_value =
+ iree_string_view_match_pattern(iree_hal_device_id(device), key) ? 1 : 0;
+ return iree_ok_status();
+ }
+
+ return _VTABLE_DISPATCH(device, query_i32)(device, category, key, out_value);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_device_transfer_range(
+ iree_hal_device_t* device, iree_hal_transfer_buffer_t source,
+ iree_device_size_t source_offset, iree_hal_transfer_buffer_t target,
+ iree_device_size_t target_offset, iree_device_size_t data_length,
+ iree_hal_transfer_buffer_flags_t flags, iree_timeout_t timeout) {
+ if (data_length == 0) {
+ return iree_ok_status(); // No-op.
+ }
+
+ // host->host is not allowed. We may want to support this one day to allow for
+ // parallelized copies and such, however the validation code differs quite a
+ // bit and it'd be better to have this as part of a task system API.
+ bool is_source_host = source.device_buffer == NULL;
+ bool is_target_host = target.device_buffer == NULL;
+ if (is_source_host && is_target_host) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "cannot perform host->host transfers via this API, use memcpy/memmove");
+ }
+
+ // Check for overlap - like memcpy we require that the two ranges don't have
+ // any overlap as we may use memcpy. This only matters if the buffers are
+ // both device buffers - host and device should never alias: behavior is
+ // undefined if a user tries to pass a mapped device pointer as if it was a
+ // host pointer.
+ if (!is_source_host && !is_target_host &&
+ iree_hal_buffer_test_overlap(source.device_buffer, source_offset,
+ data_length, target.device_buffer,
+ target_offset, data_length) !=
+ IREE_HAL_BUFFER_OVERLAP_DISJOINT) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "source and target ranges must not overlap within the same buffer");
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_TEXT(
+ z0, is_source_host ? "h2d" : (is_target_host ? "d2h" : "d2d"));
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, data_length);
+
+ // Defer to the backing implementation.
+ iree_status_t status = _VTABLE_DISPATCH(device, transfer_range)(
+ device, source, source_offset, target, target_offset, data_length, flags,
+ timeout);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_device_transfer_h2d(
+ iree_hal_device_t* device, const void* source, iree_hal_buffer_t* target,
+ iree_device_size_t target_offset, iree_device_size_t data_length,
+ iree_hal_transfer_buffer_flags_t flags, iree_timeout_t timeout) {
+ return iree_hal_device_transfer_range(
+ device,
+ iree_hal_make_host_transfer_buffer_span((void*)source, data_length), 0,
+ iree_hal_make_device_transfer_buffer(target), target_offset, data_length,
+ flags, timeout);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_device_transfer_d2h(
+ iree_hal_device_t* device, iree_hal_buffer_t* source,
+ iree_device_size_t source_offset, void* target,
+ iree_device_size_t data_length, iree_hal_transfer_buffer_flags_t flags,
+ iree_timeout_t timeout) {
+ return iree_hal_device_transfer_range(
+ device, iree_hal_make_device_transfer_buffer(source), source_offset,
+ iree_hal_make_host_transfer_buffer_span(target, data_length), 0,
+ data_length, flags, timeout);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_device_transfer_d2d(
+ iree_hal_device_t* device, iree_hal_buffer_t* source,
+ iree_device_size_t source_offset, iree_hal_buffer_t* target,
+ iree_device_size_t target_offset, iree_device_size_t data_length,
+ iree_hal_transfer_buffer_flags_t flags, iree_timeout_t timeout) {
+ return iree_hal_device_transfer_range(
+ device, iree_hal_make_device_transfer_buffer(source), source_offset,
+ iree_hal_make_device_transfer_buffer(target), target_offset, data_length,
+ flags, timeout);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_device_transfer_and_wait(
+ iree_hal_device_t* device, iree_hal_semaphore_t* wait_semaphore,
+ uint64_t wait_value, iree_host_size_t transfer_count,
+ const iree_hal_transfer_command_t* transfer_commands,
+ iree_timeout_t timeout) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(!transfer_count || transfer_commands);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // We only want to allow inline execution if we have not been instructed to
+ // wait on a semaphore and it hasn't yet been signaled.
+ iree_hal_command_buffer_mode_t mode = IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT;
+ if (wait_semaphore) {
+ uint64_t current_value = 0ull;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_semaphore_query(wait_semaphore, ¤t_value));
+ if (current_value >= wait_value) {
+ mode |= IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION;
+ }
+ } else {
+ mode |= IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION;
+ }
+
+ // Create a command buffer performing all of the transfer operations.
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_create_transfer_command_buffer(
+ device, mode, IREE_HAL_QUEUE_AFFINITY_ANY, transfer_count,
+ transfer_commands, &command_buffer));
+
+ // Perform a full submit-and-wait. On devices with multiple queues this can
+ // run out-of-order/overlapped with other work and return earlier than device
+ // idle.
+ iree_hal_semaphore_t* fence_semaphore = NULL;
+ iree_status_t status =
+ iree_hal_semaphore_create(device, 0ull, &fence_semaphore);
+ uint64_t signal_value = 1ull;
+ if (iree_status_is_ok(status)) {
+ iree_hal_submission_batch_t batch = {
+ .wait_semaphores =
+ {
+ .count = wait_semaphore != NULL ? 1 : 0,
+ .semaphores = &wait_semaphore,
+ .payload_values = &wait_value,
+ },
+ .command_buffer_count = 1,
+ .command_buffers = &command_buffer,
+ .signal_semaphores =
+ {
+ .count = 1,
+ .semaphores = &fence_semaphore,
+ .payload_values = &signal_value,
+ },
+ };
+ status = iree_hal_device_submit_and_wait(
+ device, IREE_HAL_COMMAND_CATEGORY_TRANSFER, IREE_HAL_QUEUE_AFFINITY_ANY,
+ 1, &batch, fence_semaphore, signal_value, timeout);
+ }
+
+ iree_hal_command_buffer_release(command_buffer);
+ iree_hal_semaphore_release(fence_semaphore);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// Validates that the submission is well-formed.
+static iree_status_t iree_hal_device_validate_submission(
+ iree_host_size_t batch_count, const iree_hal_submission_batch_t* batches) {
+ for (iree_host_size_t i = 0; i < batch_count; ++i) {
+ for (iree_host_size_t j = 0; j < batches[i].command_buffer_count; ++j) {
+ if (batches[i].wait_semaphores.count > 0 &&
+ iree_all_bits_set(
+ iree_hal_command_buffer_mode(batches[i].command_buffers[j]),
+ IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION)) {
+ // Inline command buffers are not allowed to wait (as they could have
+ // already been executed!). This is a requirement of the API so we
+ // validate it across all backends even if they don't support inline
+ // execution and ignore it.
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "inline command buffer submitted with a wait; inline command "
+ "buffers must be ready to execute immediately");
+ }
+ }
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_device_queue_submit(
+ iree_hal_device_t* device, iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(!batch_count || batches);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_device_validate_submission(batch_count, batches));
+ iree_status_t status = _VTABLE_DISPATCH(device, queue_submit)(
+ device, command_categories, queue_affinity, batch_count, batches);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_device_submit_and_wait(
+ iree_hal_device_t* device, iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches,
+ iree_hal_semaphore_t* wait_semaphore, uint64_t wait_value,
+ iree_timeout_t timeout) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(!batch_count || batches);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_device_validate_submission(batch_count, batches));
+ iree_status_t status = _VTABLE_DISPATCH(device, submit_and_wait)(
+ device, command_categories, queue_affinity, batch_count, batches,
+ wait_semaphore, wait_value, timeout);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_device_wait_semaphores(
+ iree_hal_device_t* device, iree_hal_wait_mode_t wait_mode,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout) {
+ IREE_ASSERT_ARGUMENT(device);
+ if (!semaphore_list || semaphore_list->count == 0) return iree_ok_status();
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = _VTABLE_DISPATCH(device, wait_semaphores)(
+ device, wait_mode, semaphore_list, timeout);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_device_wait_idle(iree_hal_device_t* device, iree_timeout_t timeout) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = _VTABLE_DISPATCH(device, wait_idle)(device, timeout);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/device.h b/runtime/src/iree/hal/device.h
new file mode 100644
index 0000000..7c97107
--- /dev/null
+++ b/runtime/src/iree/hal/device.h
@@ -0,0 +1,441 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_DEVICE_H_
+#define IREE_HAL_DEVICE_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/buffer.h"
+#include "iree/hal/command_buffer.h"
+#include "iree/hal/descriptor_set.h"
+#include "iree/hal/descriptor_set_layout.h"
+#include "iree/hal/event.h"
+#include "iree/hal/executable_cache.h"
+#include "iree/hal/executable_layout.h"
+#include "iree/hal/resource.h"
+#include "iree/hal/semaphore.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// Types and Enums
+//===----------------------------------------------------------------------===//
+
+// An opaque driver-specific handle to identify different devices.
+typedef uintptr_t iree_hal_device_id_t;
+
+#define IREE_HAL_DEVICE_ID_INVALID 0ull
+
+// Describes features supported by a device.
+// These flags indicate the availability of features that may be enabled at the
+// request of the calling application. Note that certain features may disable
+// runtime optimizations or require compilation flags to ensure the required
+// metadata is present in executables.
+enum iree_hal_device_feature_bits_t {
+ IREE_HAL_DEVICE_FEATURE_NONE = 0u,
+
+ // Device supports executable debugging.
+ // When present executables *may* be compiled with
+ // IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_DEBUGGING and will have usable
+ // debugging related methods. Note that if the input executables do not have
+ // embedded debugging information they still may not be able to perform
+ // disassembly or fine-grained breakpoint insertion.
+ IREE_HAL_DEVICE_FEATURE_SUPPORTS_DEBUGGING = 1u << 0,
+
+ // Device supports executable coverage information.
+ // When present executables *may* be compiled with
+ // IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_COVERAGE and will produce
+ // coverage buffers during dispatch. Note that input executables must have
+ // partial embedded debug information to allow mapping back to source offsets.
+ IREE_HAL_DEVICE_FEATURE_SUPPORTS_COVERAGE = 1u << 1,
+
+ // Device supports executable and command queue profiling.
+ // When present executables *may* be compiled with
+ // IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_PROFILING and will produce
+ // profiling buffers during dispatch. Note that input executables must have
+ // partial embedded debug information to allow mapping back to source offsets.
+ IREE_HAL_DEVICE_FEATURE_SUPPORTS_PROFILING = 1u << 2,
+};
+typedef uint32_t iree_hal_device_feature_t;
+
+// Describes an enumerated HAL device.
+typedef struct iree_hal_device_info_t {
+ // Opaque handle used by drivers. Not valid across driver instances.
+ iree_hal_device_id_t device_id;
+ // Name of the device as returned by the API.
+ iree_string_view_t name;
+} iree_hal_device_info_t;
+
+// A transfer source or destination.
+typedef struct iree_hal_transfer_buffer_t {
+ // A host-allocated void* buffer.
+ iree_byte_span_t host_buffer;
+ // A device-allocated buffer (may be of any memory type).
+ iree_hal_buffer_t* device_buffer;
+} iree_hal_transfer_buffer_t;
+
+static inline iree_hal_transfer_buffer_t iree_hal_make_host_transfer_buffer(
+ iree_byte_span_t host_buffer) {
+ iree_hal_transfer_buffer_t transfer_buffer = {
+ host_buffer,
+ NULL,
+ };
+ return transfer_buffer;
+}
+
+static inline iree_hal_transfer_buffer_t
+iree_hal_make_host_transfer_buffer_span(void* ptr, iree_host_size_t length) {
+ iree_hal_transfer_buffer_t transfer_buffer = {
+ iree_make_byte_span(ptr, length),
+ NULL,
+ };
+ return transfer_buffer;
+}
+
+static inline iree_hal_transfer_buffer_t iree_hal_make_device_transfer_buffer(
+ iree_hal_buffer_t* device_buffer) {
+ iree_hal_transfer_buffer_t transfer_buffer = {
+ iree_byte_span_empty(),
+ device_buffer,
+ };
+ return transfer_buffer;
+}
+
+// A list of semaphores and their corresponding payloads.
+// When signaling each semaphore will be set to the new payload value provided.
+// When waiting each semaphore must reach or exceed the payload value.
+typedef struct iree_hal_semaphore_list_t {
+ iree_host_size_t count;
+ iree_hal_semaphore_t** semaphores;
+ uint64_t* payload_values;
+} iree_hal_semaphore_list_t;
+
+// A single batch of command buffers submitted to a device queue.
+// All of the wait semaphores must reach or exceed the given payload value prior
+// to the batch beginning execution. Each command buffer begins execution in the
+// order it is present in the list, though note that the command buffers
+// execute concurrently and require internal synchronization via events if there
+// are any dependencies between them. Only after all command buffers have
+// completed will the signal semaphores be updated to the provided payload
+// values.
+//
+// Matches Vulkan's VkSubmitInfo:
+// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkSubmitInfo.html
+// Note that as the HAL only models timeline semaphores we take the payload
+// values directly in this struct; see:
+// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkTimelineSemaphoreSubmitInfo.html
+typedef struct iree_hal_submission_batch_t {
+ // Semaphores to wait on prior to executing any command buffer.
+ iree_hal_semaphore_list_t wait_semaphores;
+
+ // Command buffers to execute, in order.
+ iree_host_size_t command_buffer_count;
+ iree_hal_command_buffer_t** command_buffers;
+
+ // Semaphores to signal once all command buffers have completed execution.
+ iree_hal_semaphore_list_t signal_semaphores;
+} iree_hal_submission_batch_t;
+
+// Defines how a multi-wait operation treats the results of multiple semaphores.
+typedef enum iree_hal_wait_mode_e {
+ // Waits for all semaphores to reach or exceed their specified values.
+ IREE_HAL_WAIT_MODE_ALL = 0,
+ // Waits for one or more semaphores to reach or exceed their specified values.
+ IREE_HAL_WAIT_MODE_ANY = 1,
+} iree_hal_wait_mode_t;
+
+//===----------------------------------------------------------------------===//
+// iree_hal_device_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_device_t iree_hal_device_t;
+
+// Retains the given |device| for the caller.
+IREE_API_EXPORT void iree_hal_device_retain(iree_hal_device_t* device);
+
+// Releases the given |device| from the caller.
+IREE_API_EXPORT void iree_hal_device_release(iree_hal_device_t* device);
+
+// Returns the device identifier.
+// This identifier may vary based on the runtime device type; for example, a
+// Vulkan device may return `vulkan-v1.1` or `vulkan-v1.2-spec1`.
+IREE_API_EXPORT iree_string_view_t
+iree_hal_device_id(iree_hal_device_t* device);
+
+// Returns the host allocator used for objects.
+IREE_API_EXPORT iree_allocator_t
+iree_hal_device_host_allocator(iree_hal_device_t* device);
+
+// Returns a reference to the allocator of the device that can be used for
+// allocating buffers.
+IREE_API_EXPORT iree_hal_allocator_t* iree_hal_device_allocator(
+ iree_hal_device_t* device);
+
+// Trims pools and caches used by the HAL to the minimum required for live
+// allocations. This can be used on low-memory conditions or when
+// suspending/parking instances.
+IREE_API_EXPORT
+iree_status_t iree_hal_device_trim(iree_hal_device_t* device);
+
+// Queries a configuration value as an int32_t.
+// The |category| and |key| will be provided to the device driver to interpret
+// in a device-specific way and if recognized the value will be converted to an
+// int32_t and returned in |out_value|. Fails if the value represented by the
+// key is not convertable (overflows a 32-bit integer, not a number, etc).
+//
+// This is roughly equivalent to the `sysconf` linux syscall
+// (https://man7.org/linux/man-pages/man3/sysconf.3.html) in that the exact
+// set of categories and keys available and their interpretation is
+// target-dependent.
+//
+// Well-known queries (category :: key):
+// hal.device.id :: some-pattern-*
+// hal.device.feature :: some-pattern-*
+// hal.device.architecture :: some-pattern-*
+// hal.executable.format :: some-pattern-*
+//
+// Returned values must remain the same for the lifetime of the device as
+// callers may cache them to avoid redundant calls.
+IREE_API_EXPORT iree_status_t iree_hal_device_query_i32(
+ iree_hal_device_t* device, iree_string_view_t category,
+ iree_string_view_t key, int32_t* out_value);
+
+// Synchronously copies data from |source| into |target|.
+//
+// Supports host->device, device->host, and device->device transfer,
+// including across devices. This method will never fail based on device
+// capabilities but may incur some extreme transient allocations and copies in
+// order to perform the transfer.
+//
+// The ordering of the transfer is undefined with respect to queue execution on
+// the source or target device; some may require full device flushes in order to
+// perform this operation while others may immediately perform it while there is
+// still work outstanding.
+//
+// It is strongly recommended that buffer operations are performed on transfer
+// queues; using this synchronous function may incur additional cache flushes
+// and synchronous blocking behavior and is not supported on all buffer types.
+// See iree_hal_command_buffer_copy_buffer.
+IREE_API_EXPORT iree_status_t iree_hal_device_transfer_range(
+ iree_hal_device_t* device, iree_hal_transfer_buffer_t source,
+ iree_device_size_t source_offset, iree_hal_transfer_buffer_t target,
+ iree_device_size_t target_offset, iree_device_size_t data_length,
+ iree_hal_transfer_buffer_flags_t flags, iree_timeout_t timeout);
+
+// Synchronously copies data from host |source| into device |target|.
+// Convience wrapper around iree_hal_device_transfer_range.
+IREE_API_EXPORT iree_status_t iree_hal_device_transfer_h2d(
+ iree_hal_device_t* device, const void* source, iree_hal_buffer_t* target,
+ iree_device_size_t target_offset, iree_device_size_t data_length,
+ iree_hal_transfer_buffer_flags_t flags, iree_timeout_t timeout);
+
+// Synchronously copies data from device |source| into host |target|.
+// Convience wrapper around iree_hal_device_transfer_range.
+IREE_API_EXPORT iree_status_t iree_hal_device_transfer_d2h(
+ iree_hal_device_t* device, iree_hal_buffer_t* source,
+ iree_device_size_t source_offset, void* target,
+ iree_device_size_t data_length, iree_hal_transfer_buffer_flags_t flags,
+ iree_timeout_t timeout);
+
+// Synchronously copies data from device |source| into device |target|.
+// Convience wrapper around iree_hal_device_transfer_range.
+IREE_API_EXPORT iree_status_t iree_hal_device_transfer_d2d(
+ iree_hal_device_t* device, iree_hal_buffer_t* source,
+ iree_device_size_t source_offset, iree_hal_buffer_t* target,
+ iree_device_size_t target_offset, iree_device_size_t data_length,
+ iree_hal_transfer_buffer_flags_t flags, iree_timeout_t timeout);
+
+// Synchronously executes one or more transfer operations against a queue.
+// All buffers must be compatible with |device| and ranges must not overlap
+// (same as with memcpy).
+//
+// This is a blocking operation and may incur significant overheads as
+// internally it issues a command buffer with the transfer operations and waits
+// for it to complete. Users should do that themselves so that the work can be
+// issued concurrently and batched effectively. This is only useful as a
+// fallback for implementations that require it or tools where things like I/O
+// are transferred without worrying about performance. When submitting other
+// work it's preferable to use iree_hal_create_transfer_command_buffer and a
+// normal queue submission that allows for more fine-grained sequencing and
+// amortizes the submission cost by batching other work.
+//
+// The transfer will begin after the optional |wait_semaphore| reaches
+// |wait_value|. Behavior is undefined if no semaphore is provided and there are
+// in-flight operations concurrently using the buffer ranges.
+// Returns only after all transfers have completed and been flushed.
+IREE_API_EXPORT iree_status_t iree_hal_device_transfer_and_wait(
+ iree_hal_device_t* device, iree_hal_semaphore_t* wait_semaphore,
+ uint64_t wait_value, iree_host_size_t transfer_count,
+ const iree_hal_transfer_command_t* transfer_commands,
+ iree_timeout_t timeout);
+
+// Submits one or more batches of work to a device queue.
+//
+// The queue is selected based on the flags set in |command_categories| and the
+// |queue_affinity|. As the number of available queues can vary the
+// |queue_affinity| is used to hash into the available queues for the required
+// categories. For example if 2 queues support transfer commands and the
+// affinity is 5 the resulting queue could be index hash(5)=1. The affinity can
+// thus be treated as just a way to indicate whether two submissions must be
+// placed on to the same queue. Note that the exact hashing function is
+// implementation dependent.
+//
+// The submission behavior matches Vulkan's vkQueueSubmit, with each batch
+// executing its command buffers in the order they are defined but allowing the
+// command buffers to complete out-of-order. See:
+// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/vkQueueSubmit.html
+IREE_API_EXPORT iree_status_t iree_hal_device_queue_submit(
+ iree_hal_device_t* device, iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches);
+
+// Submits batches of work and waits until |wait_semaphore| reaches or exceeds
+// |wait_value|.
+//
+// This is equivalent to following iree_hal_device_queue_submit with a
+// iree_hal_semaphore_wait on |wait_timeout|/|wait_value| but
+// may help to reduce overhead by preventing thread wakeups, kernel calls, and
+// internal tracking.
+//
+// See iree_hal_device_queue_submit for more information about the queuing
+// behavior and iree_hal_semaphore_wait for the waiting behavior.
+IREE_API_EXPORT iree_status_t iree_hal_device_submit_and_wait(
+ iree_hal_device_t* device, iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches,
+ iree_hal_semaphore_t* wait_semaphore, uint64_t wait_value,
+ iree_timeout_t timeout);
+
+// Blocks the caller until the semaphores reach or exceed the specified payload
+// values or the |timeout| elapses. All semaphores in |semaphore_list| must be
+// created from this device (or be imported into it).
+//
+// |wait_mode| can be used to decide when the wait will proceed; whether *all*
+// semaphores in |semaphore_list| must be signaled or whether *any* (one or
+// more) can be signaled before an early return.
+//
+// Returns success if the wait is successful and semaphores have been signaled
+// satisfying the |wait_mode|.
+//
+// Returns IREE_STATUS_DEADLINE_EXCEEDED if the |timeout| elapses without the
+// |wait_mode| being satisfied. Note that even on success only a subset of the
+// semaphores may have been signaled and each can be queried to see which ones.
+//
+// Returns IREE_STATUS_ABORTED if one or more semaphores has failed. Callers can
+// use iree_hal_semaphore_query on the semaphores to find the ones that have
+// failed and get the status.
+IREE_API_EXPORT iree_status_t iree_hal_device_wait_semaphores(
+ iree_hal_device_t* device, iree_hal_wait_mode_t wait_mode,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout);
+
+// Blocks the caller until all outstanding requests on all queues have been
+// completed or the |timeout| elapses. This is equivalent to having waited
+// on all semaphores outstanding at the time of the call, meaning that if new
+// work is submitted by another thread it may not be waited on prior to this
+// call returning.
+//
+// Returns success if the device reaches an idle point during the call.
+//
+// Returns DEADLINE_EXCEEDED if the |timeout| elapses without the device having
+// become idle.
+IREE_API_EXPORT iree_status_t
+iree_hal_device_wait_idle(iree_hal_device_t* device, iree_timeout_t timeout);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_device_t implementation details
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_device_vtable_t {
+ void(IREE_API_PTR* destroy)(iree_hal_device_t* device);
+
+ iree_string_view_t(IREE_API_PTR* id)(iree_hal_device_t* device);
+
+ iree_allocator_t(IREE_API_PTR* host_allocator)(iree_hal_device_t* device);
+ iree_hal_allocator_t*(IREE_API_PTR* device_allocator)(
+ iree_hal_device_t* device);
+
+ iree_status_t(IREE_API_PTR* trim)(iree_hal_device_t* device);
+
+ iree_status_t(IREE_API_PTR* query_i32)(iree_hal_device_t* device,
+ iree_string_view_t category,
+ iree_string_view_t key,
+ int32_t* out_value);
+
+ iree_status_t(IREE_API_PTR* create_command_buffer)(
+ iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ iree_hal_command_buffer_t** out_command_buffer);
+
+ iree_status_t(IREE_API_PTR* create_descriptor_set)(
+ iree_hal_device_t* device, iree_hal_descriptor_set_layout_t* set_layout,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings,
+ iree_hal_descriptor_set_t** out_descriptor_set);
+
+ iree_status_t(IREE_API_PTR* create_descriptor_set_layout)(
+ iree_hal_device_t* device,
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ iree_hal_descriptor_set_layout_t** out_descriptor_set_layout);
+
+ iree_status_t(IREE_API_PTR* create_event)(iree_hal_device_t* device,
+ iree_hal_event_t** out_event);
+
+ iree_status_t(IREE_API_PTR* create_executable_cache)(
+ iree_hal_device_t* device, iree_string_view_t identifier,
+ iree_loop_t loop, iree_hal_executable_cache_t** out_executable_cache);
+
+ iree_status_t(IREE_API_PTR* create_executable_layout)(
+ iree_hal_device_t* device, iree_host_size_t push_constants,
+ iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ iree_hal_executable_layout_t** out_executable_layout);
+
+ iree_status_t(IREE_API_PTR* create_semaphore)(
+ iree_hal_device_t* device, uint64_t initial_value,
+ iree_hal_semaphore_t** out_semaphore);
+
+ iree_status_t(IREE_API_PTR* transfer_range)(
+ iree_hal_device_t* device, iree_hal_transfer_buffer_t source,
+ iree_device_size_t source_offset, iree_hal_transfer_buffer_t target,
+ iree_device_size_t target_offset, iree_device_size_t data_length,
+ iree_hal_transfer_buffer_flags_t flags, iree_timeout_t timeout);
+
+ iree_status_t(IREE_API_PTR* queue_submit)(
+ iree_hal_device_t* device, iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches);
+
+ iree_status_t(IREE_API_PTR* submit_and_wait)(
+ iree_hal_device_t* device, iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches,
+ iree_hal_semaphore_t* wait_semaphore, uint64_t wait_value,
+ iree_timeout_t timeout);
+
+ iree_status_t(IREE_API_PTR* wait_semaphores)(
+ iree_hal_device_t* device, iree_hal_wait_mode_t wait_mode,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout);
+
+ iree_status_t(IREE_API_PTR* wait_idle)(iree_hal_device_t* device,
+ iree_timeout_t timeout);
+} iree_hal_device_vtable_t;
+IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_device_vtable_t);
+
+IREE_API_EXPORT void iree_hal_device_destroy(iree_hal_device_t* device);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_DEVICE_H_
diff --git a/runtime/src/iree/hal/driver.c b/runtime/src/iree/hal/driver.c
new file mode 100644
index 0000000..778aaed
--- /dev/null
+++ b/runtime/src/iree/hal/driver.c
@@ -0,0 +1,59 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/driver.h"
+
+#include <stddef.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/detail.h"
+#include "iree/hal/resource.h"
+
+#define _VTABLE_DISPATCH(driver, method_name) \
+ IREE_HAL_VTABLE_DISPATCH(driver, iree_hal_driver, method_name)
+
+IREE_HAL_API_RETAIN_RELEASE(driver);
+
+IREE_API_EXPORT iree_status_t iree_hal_driver_query_available_devices(
+ iree_hal_driver_t* driver, iree_allocator_t allocator,
+ iree_hal_device_info_t** out_device_infos,
+ iree_host_size_t* out_device_info_count) {
+ IREE_ASSERT_ARGUMENT(driver);
+ IREE_ASSERT_ARGUMENT(out_device_infos);
+ IREE_ASSERT_ARGUMENT(out_device_info_count);
+ *out_device_info_count = 0;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = _VTABLE_DISPATCH(driver, query_available_devices)(
+ driver, allocator, out_device_infos, out_device_info_count);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_driver_create_device(
+ iree_hal_driver_t* driver, iree_hal_device_id_t device_id,
+ iree_allocator_t allocator, iree_hal_device_t** out_device) {
+ IREE_ASSERT_ARGUMENT(driver);
+ IREE_ASSERT_ARGUMENT(out_device);
+ *out_device = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = _VTABLE_DISPATCH(driver, create_device)(
+ driver, device_id, allocator, out_device);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_driver_create_default_device(
+ iree_hal_driver_t* driver, iree_allocator_t allocator,
+ iree_hal_device_t** out_device) {
+ IREE_ASSERT_ARGUMENT(driver);
+ IREE_ASSERT_ARGUMENT(out_device);
+ *out_device = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = _VTABLE_DISPATCH(driver, create_device)(
+ driver, IREE_HAL_DRIVER_ID_INVALID, allocator, out_device);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/driver.h b/runtime/src/iree/hal/driver.h
new file mode 100644
index 0000000..65cbd66
--- /dev/null
+++ b/runtime/src/iree/hal/driver.h
@@ -0,0 +1,117 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_DRIVER_H_
+#define IREE_HAL_DRIVER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/device.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// Types and Enums
+//===----------------------------------------------------------------------===//
+
+// An opaque factory-specific handle to identify different drivers.
+typedef uint64_t iree_hal_driver_id_t;
+
+#define IREE_HAL_DRIVER_ID_INVALID 0ull
+
+// Describes a driver providing device enumeration and creation.
+// The lifetime of memory referenced by this structure (such as strings) is
+// dependent on where it originated.
+//
+// * When using iree_hal_driver_registry_enumerate the driver info is copied
+// into memory owned by the caller.
+// * When queried from a live driver with iree_hal_driver_info the memory is
+// only guaranteed to live for as long as the driver is.
+// * When enumerating via factories the information may be valid only while the
+// driver registry lock is held.
+typedef struct iree_hal_driver_info_t {
+ IREE_API_UNSTABLE
+
+ // Opaque handle used by factories. Unique across all factories.
+ iree_hal_driver_id_t driver_id;
+
+ // Canonical name of the driver as used in command lines, documentation, etc.
+ // Examples: 'metal', 'vulkan'
+ iree_string_view_t driver_name;
+
+ // Full human-readable name of the driver for display.
+ // Examples: 'Vulkan 1.2 (NVIDIA)'.
+ iree_string_view_t full_name;
+
+ // TODO(benvanik): version information; useful if wanting to expose multiple
+ // versions that may have completely different implementations (like vulkan
+ // 1.0, 1.1, and 1.2) but allow a nice sort/selection process.
+ // TODO(benvanik): triple, feature flags, etc.
+} iree_hal_driver_info_t;
+
+//===----------------------------------------------------------------------===//
+// iree_hal_driver_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_driver_t iree_hal_driver_t;
+
+// Retains the given |driver| for the caller.
+IREE_API_EXPORT void iree_hal_driver_retain(iree_hal_driver_t* driver);
+
+// Releases the given |driver| from the caller.
+IREE_API_EXPORT void iree_hal_driver_release(iree_hal_driver_t* driver);
+
+// Queries available devices and returns them as a list.
+// The provided |allocator| will be used to allocate the returned list and after
+// the caller is done with it |out_device_infos| must be freed with that same
+// allocator by the caller.
+IREE_API_EXPORT iree_status_t iree_hal_driver_query_available_devices(
+ iree_hal_driver_t* driver, iree_allocator_t allocator,
+ iree_hal_device_info_t** out_device_infos,
+ iree_host_size_t* out_device_info_count);
+
+// Creates a device as queried with iree_hal_driver_query_available_devices.
+IREE_API_EXPORT iree_status_t iree_hal_driver_create_device(
+ iree_hal_driver_t* driver, iree_hal_device_id_t device_id,
+ iree_allocator_t allocator, iree_hal_device_t** out_device);
+
+// Creates the driver-defined "default" device. This may simply be the first
+// device enumerated.
+IREE_API_EXPORT iree_status_t iree_hal_driver_create_default_device(
+ iree_hal_driver_t* driver, iree_allocator_t allocator,
+ iree_hal_device_t** out_device);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_driver_t implementation details
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_driver_vtable_t {
+ void(IREE_API_PTR* destroy)(iree_hal_driver_t* driver);
+
+ iree_status_t(IREE_API_PTR* query_available_devices)(
+ iree_hal_driver_t* driver, iree_allocator_t allocator,
+ iree_hal_device_info_t** out_device_infos,
+ iree_host_size_t* out_device_info_count);
+
+ iree_status_t(IREE_API_PTR* create_device)(iree_hal_driver_t* driver,
+ iree_hal_device_id_t device_id,
+ iree_allocator_t allocator,
+ iree_hal_device_t** out_device);
+} iree_hal_driver_vtable_t;
+IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_driver_vtable_t);
+
+IREE_API_EXPORT void iree_hal_driver_destroy(iree_hal_driver_t* driver);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_DRIVER_H_
diff --git a/runtime/src/iree/hal/driver_registry.c b/runtime/src/iree/hal/driver_registry.c
new file mode 100644
index 0000000..d949e48
--- /dev/null
+++ b/runtime/src/iree/hal/driver_registry.c
@@ -0,0 +1,361 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/driver_registry.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/internal/call_once.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// iree_hal_driver_registry_t
+//===----------------------------------------------------------------------===//
+
+// 8 factories is enough for anyone, right?
+// But really this is here to prevent the need for dynamically allocated memory.
+// Because it's an implementation detail it's easy to grow in the future if we
+// want to support additional factories.
+//
+// An alternative would be to keep factories in an intrusive list - that way
+// there is no storage beyond the factory itself. This is less ideal as it would
+// force all factory storage to be in writeable memory and limit the ability for
+// the same factory to be registered with multiple registries (useful when
+// isolating/sandboxing/multi-versioning).
+#define IREE_HAL_MAX_DRIVER_FACTORY_COUNT 8
+
+struct iree_hal_driver_registry_t {
+ iree_allocator_t host_allocator;
+ iree_slim_mutex_t mutex;
+
+ // Factories in registration order. As factories are unregistered the list is
+ // shifted to be kept dense.
+ iree_host_size_t factory_count;
+ const iree_hal_driver_factory_t* factories[IREE_HAL_MAX_DRIVER_FACTORY_COUNT];
+};
+
+static iree_hal_driver_registry_t iree_hal_driver_registry_default_;
+static iree_once_flag iree_hal_driver_registry_default_flag_ =
+ IREE_ONCE_FLAG_INIT;
+static void iree_hal_driver_registry_default_initialize(void) {
+ memset(&iree_hal_driver_registry_default_, 0,
+ sizeof(iree_hal_driver_registry_default_));
+ iree_slim_mutex_initialize(&iree_hal_driver_registry_default_.mutex);
+}
+
+IREE_API_EXPORT iree_hal_driver_registry_t* iree_hal_driver_registry_default(
+ void) {
+ iree_call_once(&iree_hal_driver_registry_default_flag_,
+ iree_hal_driver_registry_default_initialize);
+ return &iree_hal_driver_registry_default_;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_driver_registry_allocate(iree_allocator_t host_allocator,
+ iree_hal_driver_registry_t** out_registry) {
+ IREE_ASSERT_ARGUMENT(out_registry);
+ *out_registry = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_driver_registry_t* registry = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(host_allocator, sizeof(*registry),
+ (void**)®istry));
+ registry->host_allocator = host_allocator;
+ iree_slim_mutex_initialize(®istry->mutex);
+
+ *out_registry = registry;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT void iree_hal_driver_registry_free(
+ iree_hal_driver_registry_t* registry) {
+ if (!registry) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_allocator_t host_allocator = registry->host_allocator;
+
+ iree_slim_mutex_deinitialize(®istry->mutex);
+ iree_allocator_free(host_allocator, registry);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_driver_registry_register_factory(
+ iree_hal_driver_registry_t* registry,
+ const iree_hal_driver_factory_t* factory) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_slim_mutex_lock(®istry->mutex);
+
+ // Fail if already present; not because having it in there would harm anything
+ // but because we can't then balance with unregisters if we were to skip it
+ // when present and want to keep the list small and not have callers fill it
+ // with tons of duplicate entries.
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < registry->factory_count; ++i) {
+ if (registry->factories[i] == factory) {
+ status = iree_make_status(IREE_STATUS_ALREADY_EXISTS,
+ "factory has already been registered");
+ break;
+ }
+ }
+
+ // Note that we check the capacity limit *after* checking for dupes so that
+ // callers will find issues with duplicate registrations easier. Otherwise,
+ // they'd just get a RESOURCE_EXHAUSTED and think there were too many unique
+ // factories registered already.
+ if (iree_status_is_ok(status) &&
+ registry->factory_count + 1 >= IREE_ARRAYSIZE(registry->factories)) {
+ status = iree_make_status(
+ IREE_STATUS_RESOURCE_EXHAUSTED,
+ "the maximum number of factories (%zu) have been registered",
+ IREE_ARRAYSIZE(registry->factories));
+ }
+
+ if (iree_status_is_ok(status)) {
+ registry->factories[registry->factory_count++] = factory;
+ }
+
+ iree_slim_mutex_unlock(®istry->mutex);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_driver_registry_unregister_factory(
+ iree_hal_driver_registry_t* registry,
+ const iree_hal_driver_factory_t* factory) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_slim_mutex_lock(®istry->mutex);
+
+ iree_status_t status = iree_ok_status();
+ iree_host_size_t index = -1;
+ for (iree_host_size_t i = 0; i < registry->factory_count; ++i) {
+ if (registry->factories[i] != factory) continue;
+ index = i;
+ break;
+ }
+ if (index == -1) {
+ status =
+ iree_make_status(IREE_STATUS_NOT_FOUND,
+ "factory to remove is not registered at this time");
+ }
+
+ if (iree_status_is_ok(status)) {
+ // Compact list. Note that registration order is preserved.
+ // C4090 bug in MSVC: https://tinyurl.com/y46hlogx
+ memmove((void*)®istry->factories[index], ®istry->factories[index + 1],
+ registry->factory_count - index - 1);
+ registry->factories[--registry->factory_count] = NULL;
+ }
+
+ iree_slim_mutex_unlock(®istry->mutex);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// Computes the total byte size required to store driver info strings.
+static iree_host_size_t iree_hal_driver_info_compute_storage_size(
+ const iree_hal_driver_info_t* driver_info) {
+ iree_host_size_t storage_size = 0;
+ storage_size += driver_info->driver_name.size;
+ storage_size += driver_info->full_name.size;
+ return storage_size;
+}
+
+// Copies |source_driver_info| into |target_driver_info| using |string_storage|
+// for the nested strings. Returns the total number of bytes added to
+// string_storage.
+static iree_host_size_t iree_hal_driver_info_copy(
+ const iree_hal_driver_info_t* source_driver_info,
+ iree_hal_driver_info_t* target_driver_info, char* string_storage) {
+ // Copy everything by default (primitive fields, etc).
+ memcpy(target_driver_info, source_driver_info, sizeof(*target_driver_info));
+
+ // Copy in each string field to the string storage and set the ptr.
+ iree_host_size_t storage_size = 0;
+ storage_size += iree_string_view_append_to_buffer(
+ source_driver_info->driver_name, &target_driver_info->driver_name,
+ string_storage + storage_size);
+ storage_size += iree_string_view_append_to_buffer(
+ source_driver_info->full_name, &target_driver_info->full_name,
+ string_storage + storage_size);
+ return storage_size;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_driver_registry_enumerate(
+ iree_hal_driver_registry_t* registry, iree_allocator_t allocator,
+ iree_hal_driver_info_t** out_driver_infos,
+ iree_host_size_t* out_driver_info_count) {
+ IREE_ASSERT_ARGUMENT(registry);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ *out_driver_info_count = 0;
+ *out_driver_infos = NULL;
+
+ iree_status_t status = iree_ok_status();
+ iree_slim_mutex_lock(®istry->mutex);
+
+ // Enumerate each factory and figure out how much memory we need to fully
+ // store all data we need to clone.
+ iree_host_size_t total_driver_info_count = 0;
+ iree_host_size_t total_storage_size = 0;
+ for (iree_host_size_t i = 0; i < registry->factory_count; ++i) {
+ const iree_hal_driver_factory_t* factory = registry->factories[i];
+ const iree_hal_driver_info_t* driver_infos = NULL;
+ iree_host_size_t driver_info_count = 0;
+ status =
+ factory->enumerate(factory->self, &driver_infos, &driver_info_count);
+ if (!iree_status_is_ok(status)) break;
+ total_driver_info_count += driver_info_count;
+ for (iree_host_size_t j = 0; j < driver_info_count; j++) {
+ total_storage_size +=
+ iree_hal_driver_info_compute_storage_size(&driver_infos[j]);
+ }
+ }
+
+ // Allocate the required memory for both the driver infos and the string
+ // storage in a single block.
+ iree_host_size_t total_driver_infos_size =
+ total_driver_info_count * sizeof(iree_hal_driver_info_t);
+ if (iree_status_is_ok(status)) {
+ status = iree_allocator_malloc(allocator,
+ total_driver_infos_size + total_storage_size,
+ (void**)out_driver_infos);
+ }
+
+ // Write driver info and associated nested resources to the output. We have
+ // to enumerate again but enumeration is expected to be immutable for a given
+ // registration and we hold the lock so we're safe.
+ if (iree_status_is_ok(status)) {
+ iree_hal_driver_info_t* driver_info_storage_ptr = *out_driver_infos;
+ char* string_storage_ptr =
+ (char*)(*out_driver_infos) + total_driver_infos_size;
+ for (iree_host_size_t i = 0; i < registry->factory_count; ++i) {
+ const iree_hal_driver_factory_t* factory = registry->factories[i];
+ const iree_hal_driver_info_t* driver_infos = NULL;
+ iree_host_size_t driver_info_count = 0;
+ status =
+ factory->enumerate(factory->self, &driver_infos, &driver_info_count);
+ if (!iree_status_is_ok(status)) break;
+ for (iree_host_size_t j = 0; j < driver_info_count; j++) {
+ string_storage_ptr += iree_hal_driver_info_copy(
+ &driver_infos[j], driver_info_storage_ptr, string_storage_ptr);
+ ++driver_info_storage_ptr;
+ }
+ }
+ *out_driver_info_count = total_driver_info_count;
+ }
+
+ iree_slim_mutex_unlock(®istry->mutex);
+
+ // Cleanup memory if we failed.
+ if (!iree_status_is_ok(status) && *out_driver_infos) {
+ iree_allocator_free(allocator, *out_driver_infos);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_driver_registry_try_create(
+ iree_hal_driver_registry_t* registry, iree_hal_driver_id_t driver_id,
+ iree_allocator_t allocator, iree_hal_driver_t** out_driver) {
+ IREE_ASSERT_ARGUMENT(registry);
+ if (driver_id == IREE_HAL_DRIVER_ID_INVALID) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "invalid driver id");
+ }
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, driver_id);
+
+ *out_driver = NULL;
+
+ iree_status_t status = iree_ok_status();
+ iree_slim_mutex_lock(®istry->mutex);
+
+ // TODO(benvanik): figure out a good way of lining this up. The issue is that
+ // the driver_id is something we return during enumeration but we really
+ // want it to be something dynamic. We could pack an epoch into it that is
+ // bumped each time the registry factory list is modified so we could tell
+ // when a factory was added/removed, etc. So:
+ // driver_id = [3 byte epoch] [1 byte index into factory list] [4 byte id]
+ // Not sure which status code to return if the epoch is a mismatch, maybe
+ // IREE_STATUS_UNAVAILABLE? If you are mutating the registry from multiple
+ // threads while also enumerating, that may just be enough of a footgun to
+ // bail and force the caller to resolve :)
+ status =
+ iree_make_status(IREE_STATUS_UNIMPLEMENTED, "driver creation by id nyi");
+
+ iree_slim_mutex_unlock(®istry->mutex);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_driver_registry_try_create_by_name(
+ iree_hal_driver_registry_t* registry, iree_string_view_t driver_name,
+ iree_allocator_t allocator, iree_hal_driver_t** out_driver) {
+ IREE_ASSERT_ARGUMENT(registry);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, driver_name.data, driver_name.size);
+
+ *out_driver = NULL;
+
+ // NOTE: we hold the lock the entire time here so that we can avoid
+ // allocations and avoid spurious failures by outside mutation of the
+ // registry.
+ iree_status_t status = iree_ok_status();
+ iree_slim_mutex_lock(®istry->mutex);
+
+ // Enumerate each factory and scan for the requested driver.
+ // NOTE: we scan in reverse so that we prefer the first hit in the most
+ // recently registered factory.
+ const iree_hal_driver_factory_t* hit_factory = NULL;
+ iree_hal_driver_id_t hit_driver_id = IREE_HAL_DRIVER_ID_INVALID;
+ for (iree_host_size_t i = 0; i < registry->factory_count; ++i) {
+ // Reach inside and grab the internal factory data structures.
+ const iree_hal_driver_factory_t* factory =
+ registry->factories[registry->factory_count - i - 1];
+ const iree_hal_driver_info_t* driver_infos = NULL;
+ iree_host_size_t driver_info_count = 0;
+ status =
+ factory->enumerate(factory->self, &driver_infos, &driver_info_count);
+ if (!iree_status_is_ok(status)) break;
+
+ // Scan for the specific driver by name.
+ // NOTE: we scan in reverse here too so multiple drivers with the same name
+ // from the same factory prefer the later drivers in the list.
+ for (iree_host_size_t j = 0; j < driver_info_count; j++) {
+ const iree_hal_driver_info_t* driver_info =
+ &driver_infos[driver_info_count - j - 1];
+ if (iree_string_view_equal(driver_name, driver_info->driver_name)) {
+ hit_factory = factory;
+ hit_driver_id = driver_info->driver_id;
+ break;
+ }
+ }
+ // Since we are scanning in reverse we stop searching when we find the first
+ // hit (aka the most recently added driver).
+ if (hit_driver_id != IREE_HAL_DRIVER_ID_INVALID) break;
+ }
+
+ // If we found a driver during the scan try to create it now.
+ // This may block the caller (with the lock held!), and may fail if for
+ // example a delay-loaded driver cannot be created even if it was enumerated.
+ if (hit_driver_id != IREE_HAL_DRIVER_ID_INVALID) {
+ status = hit_factory->try_create(hit_factory->self, hit_driver_id,
+ allocator, out_driver);
+ } else {
+ status =
+ iree_make_status(IREE_STATUS_NOT_FOUND, "no driver '%.*s' registered",
+ (int)driver_name.size, driver_name.data);
+ }
+
+ iree_slim_mutex_unlock(®istry->mutex);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/driver_registry.h b/runtime/src/iree/hal/driver_registry.h
new file mode 100644
index 0000000..fad02b4
--- /dev/null
+++ b/runtime/src/iree/hal/driver_registry.h
@@ -0,0 +1,168 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_DRIVER_REGISTRY_H_
+#define IREE_HAL_DRIVER_REGISTRY_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/driver.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// Types and Enums
+//===----------------------------------------------------------------------===//
+
+// Factory interface used for driver enumeration and creation.
+// The factory is designed to in many cases live in rodata by not requiring any
+// real code or processing when the driver is statically known to be available.
+// When drivers may be dynamically available based on system configuration a
+// factory can discover them and provide them during enumeration.
+//
+// Delay-loaded drivers that may require non-trivial setup time (such as those
+// implemented in dynamic libraries or over RPC) can be speculatively enumerated
+// by a factory and then rely on the try_create to actually perform the slow
+// work once the user has explicitly signaled that they are willing to pay the
+// cost (and deal with the consequences).
+//
+// WARNING: this API is unstable until the HAL is fully ported. Do not use.
+typedef struct iree_hal_driver_factory_t {
+ // TODO(benvanik): version field.
+ IREE_API_UNSTABLE
+
+ // User-defined pointer passed to all functions.
+ void* self;
+
+ // Queries the list of available drivers provided by the factory, if any.
+ // |out_driver_infos| will be populated with a *reference* to factory data
+ // structures (such as the driver name) that callers may choose to clone if
+ // needed.
+ //
+ // Implementers must make their factory enumeration results immutable for the
+ // duration they are registered, though the behavior of try_create is allowed
+ // to change call-to-call. If a factory needs to mutate its set of enumerated
+ // devices then it must do so by first unregistering itself and re-registering
+ // only after the changes have been made.
+ //
+ // Called with the driver registry lock held; may be called from any thread.
+ iree_status_t(IREE_API_PTR* enumerate)(
+ void* self, const iree_hal_driver_info_t** out_driver_infos,
+ iree_host_size_t* out_driver_info_count);
+
+ // Tries to create a driver as previously queried with enumerate.
+ // |driver_id| is the opaque ID returned from enumeration; note that there may
+ // be a significant amount of time between enumeration and creation and the
+ // driver registry lock may have been release between then.
+ //
+ // Delay-loaded drivers may still fail here if - for example - required system
+ // resources are unavailable or permission is denied.
+ //
+ // Called with the driver registry lock held; may be called from any thread.
+ iree_status_t(IREE_API_PTR* try_create)(void* self,
+ iree_hal_driver_id_t driver_id,
+ iree_allocator_t allocator,
+ iree_hal_driver_t** out_driver);
+} iree_hal_driver_factory_t;
+
+//===----------------------------------------------------------------------===//
+// iree_hal_driver_registry_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_driver_registry_t iree_hal_driver_registry_t;
+
+// Returns the default per-process driver registry.
+// In simple applications this is usually where you want to go to register and
+// create drivers. More sophisticated applications that want tighter control
+// over the visibility of drivers to certain callers such as when dealing with
+// requests from multiple users may choose to allocate their own registries and
+// manage their lifetime as desired.
+IREE_API_EXPORT iree_hal_driver_registry_t* iree_hal_driver_registry_default(
+ void);
+
+// Allocates a driver registry that can be used to register and enumerate
+// HAL drivers.
+//
+// Callers must free the registry with iree_hal_driver_registry_free when it is
+// no longer needed.
+IREE_API_EXPORT iree_status_t iree_hal_driver_registry_allocate(
+ iree_allocator_t host_allocator, iree_hal_driver_registry_t** out_registry);
+
+// Frees a driver registry.
+// All factories will be implicitly unregistered.
+IREE_API_EXPORT void iree_hal_driver_registry_free(
+ iree_hal_driver_registry_t* registry);
+
+// Registers a driver factory to serve future queries/requests for drivers.
+// See iree_hal_driver_registry_t for more information.
+//
+// Thread-safe. The factory is not retained and must be kept alive by the caller
+// until it is unregistered (or the application terminates).
+IREE_API_EXPORT iree_status_t iree_hal_driver_registry_register_factory(
+ iree_hal_driver_registry_t* registry,
+ const iree_hal_driver_factory_t* factory);
+
+// Unregisters a driver factory.
+// Unregistering a factory only prevents new drivers from being created;
+// existing drivers may remain live even after unregistering. Factories can
+// expect that no new drivers will be created via the factory after the call
+// returns.
+//
+// Thread-safe. As the factory is not retained by the registry the caller must
+// release its memory (if needed) after this call returns.
+IREE_API_EXPORT iree_status_t iree_hal_driver_registry_unregister_factory(
+ iree_hal_driver_registry_t* registry,
+ const iree_hal_driver_factory_t* factory);
+
+// Enumerates all drivers from registered factories and returns them as a list.
+// The provided |allocator| will be used to allocate the returned list and after
+// the caller is done with it |out_driver_infos| must be freed with that same
+// allocator by the caller.
+//
+// The set of drivers returned should be considered the superset of those that
+// may be available for successful creation as it's possible that delay-loaded
+// drivers may fail even if they appear in this list.
+//
+// Thread-safe. Note that the factory may be unregistered between the query
+// completing and any attempt to instantiate the driver.
+IREE_API_EXPORT iree_status_t iree_hal_driver_registry_enumerate(
+ iree_hal_driver_registry_t* registry, iree_allocator_t allocator,
+ iree_hal_driver_info_t** out_driver_infos,
+ iree_host_size_t* out_driver_info_count);
+
+// Attempts to create a driver registered with the driver registry by a specific
+// ID as returned during enumeration in iree_hal_driver_info_t::driver_id.
+// This can be used to specify the exact driver to create in cases where there
+// may be multiple factories providing drivers with the same name.
+//
+// Thread-safe. May block the caller if the driver is delay-loaded and needs to
+// perform additional loading/verification/etc before returning.
+IREE_API_EXPORT iree_status_t iree_hal_driver_registry_try_create(
+ iree_hal_driver_registry_t* registry, iree_hal_driver_id_t driver_id,
+ iree_allocator_t allocator, iree_hal_driver_t** out_driver);
+
+// Attempts to create a driver registered with the given canonical driver name.
+// Effectively enumerate + find by name + try_create if found. Factories are
+// searched in most-recently-added order such that it's possible to override
+// drivers with newer registrations when multiple factories provide the same
+// driver name.
+//
+// Thread-safe. May block the caller if the driver is delay-loaded and needs to
+// perform additional loading/verification/etc before returning.
+IREE_API_EXPORT iree_status_t iree_hal_driver_registry_try_create_by_name(
+ iree_hal_driver_registry_t* registry, iree_string_view_t driver_name,
+ iree_allocator_t allocator, iree_hal_driver_t** out_driver);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_DRIVER_REGISTRY_H_
diff --git a/runtime/src/iree/hal/drivers/BUILD b/runtime/src/iree/hal/drivers/BUILD
new file mode 100644
index 0000000..cdf2b78
--- /dev/null
+++ b/runtime/src/iree/hal/drivers/BUILD
@@ -0,0 +1,77 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("@bazel_skylib//rules:common_settings.bzl", "string_list_flag")
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+ALL_DRIVERS = [
+ "dylib",
+ "dylib-sync",
+ "vmvx",
+ "vmvx-sync",
+ "vulkan",
+ "cuda",
+]
+
+string_list_flag(
+ name = "enabled_drivers",
+ build_setting_default = [
+ "dylib",
+ "dylib-sync",
+ "vmvx",
+ "vmvx-sync",
+ "vulkan",
+ ],
+)
+
+[
+ config_setting(
+ name = "{}_enabled".format(driver),
+ flag_values = {
+ ":enabled_drivers": driver,
+ },
+ )
+ for driver in ALL_DRIVERS
+]
+
+iree_runtime_cc_library(
+ name = "drivers",
+ srcs = ["init.c"],
+ hdrs = ["init.h"],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:tracing",
+ ] + select({
+ ":dylib_enabled": ["//runtime/src/iree/hal/dylib/registration"],
+ "//conditions:default": [],
+ }) +
+ select({
+ ":dylib-sync_enabled": ["//runtime/src/iree/hal/dylib/registration:sync"],
+ "//conditions:default": [],
+ }) +
+ select({
+ ":vmvx_enabled": ["//runtime/src/iree/hal/vmvx/registration"],
+ "//conditions:default": [],
+ }) +
+ select({
+ ":vmvx-sync_enabled": ["//runtime/src/iree/hal/vmvx/registration:sync"],
+ "//conditions:default": [],
+ }) +
+ select({
+ ":vulkan_enabled": ["//runtime/src/iree/hal/vulkan/registration"],
+ "//conditions:default": [],
+ }) +
+ select({
+ ":cuda_enabled": ["//runtime/src/iree/hal/cuda/registration"],
+ "//conditions:default": [],
+ }),
+)
diff --git a/runtime/src/iree/hal/drivers/CMakeLists.txt b/runtime/src/iree/hal/drivers/CMakeLists.txt
new file mode 100644
index 0000000..5dadc57
--- /dev/null
+++ b/runtime/src/iree/hal/drivers/CMakeLists.txt
@@ -0,0 +1,44 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Doesn't use bazel_to_cmake because of custom configuration vars
+
+set(IREE_HAL_DRIVER_MODULES)
+if(IREE_HAL_DRIVER_CUDA)
+ list(APPEND IREE_HAL_DRIVER_MODULES iree::hal::cuda::registration)
+endif()
+if(IREE_HAL_DRIVER_DYLIB)
+ list(APPEND IREE_HAL_DRIVER_MODULES iree::hal::dylib::registration)
+endif()
+if(IREE_HAL_DRIVER_DYLIB_SYNC)
+ list(APPEND IREE_HAL_DRIVER_MODULES iree::hal::dylib::registration::sync)
+endif()
+if(IREE_HAL_DRIVER_VMVX)
+ list(APPEND IREE_HAL_DRIVER_MODULES iree::hal::vmvx::registration)
+endif()
+if(IREE_HAL_DRIVER_VMVX_SYNC)
+ list(APPEND IREE_HAL_DRIVER_MODULES iree::hal::vmvx::registration::sync)
+endif()
+if(IREE_HAL_DRIVER_VULKAN)
+ list(APPEND IREE_HAL_DRIVER_MODULES iree::hal::vulkan::registration)
+endif()
+if(IREE_HAL_DRIVER_EXPERIMENTAL_ROCM)
+ list(APPEND IREE_HAL_DRIVER_MODULES experimental::rocm::registration)
+endif()
+
+iree_cc_library(
+ NAME
+ drivers
+ HDRS
+ "init.h"
+ SRCS
+ "init.c"
+ DEPS
+ iree::base
+ iree::base::tracing
+ ${IREE_HAL_DRIVER_MODULES}
+ PUBLIC
+)
diff --git a/runtime/src/iree/hal/drivers/init.c b/runtime/src/iree/hal/drivers/init.c
new file mode 100644
index 0000000..71f9d20
--- /dev/null
+++ b/runtime/src/iree/hal/drivers/init.c
@@ -0,0 +1,80 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/drivers/init.h"
+
+#include "iree/base/tracing.h"
+
+#if defined(IREE_HAL_HAVE_CUDA_DRIVER_MODULE)
+#include "iree/hal/cuda/registration/driver_module.h"
+#endif // IREE_HAL_HAVE_CUDA_DRIVER_MODULE
+
+#if defined(IREE_HAL_HAVE_DYLIB_DRIVER_MODULE)
+#include "iree/hal/dylib/registration/driver_module.h"
+#endif // IREE_HAL_HAVE_DYLIB_DRIVER_MODULE
+
+#if defined(IREE_HAL_HAVE_DYLIB_SYNC_DRIVER_MODULE)
+#include "iree/hal/dylib/registration/driver_module_sync.h"
+#endif // IREE_HAL_HAVE_DYLIB_SYNC_DRIVER_MODULE
+
+#if defined(IREE_HAL_HAVE_VMVX_DRIVER_MODULE)
+#include "iree/hal/vmvx/registration/driver_module.h"
+#endif // IREE_HAL_HAVE_VMVX_DRIVER_MODULE
+
+#if defined(IREE_HAL_HAVE_VMVX_SYNC_DRIVER_MODULE)
+#include "iree/hal/vmvx/registration/driver_module_sync.h"
+#endif // IREE_HAL_HAVE_VMVX_SYNC_DRIVER_MODULE
+
+#if defined(IREE_HAL_HAVE_VULKAN_DRIVER_MODULE)
+#include "iree/hal/vulkan/registration/driver_module.h"
+#endif // IREE_HAL_HAVE_VULKAN_DRIVER_MODULE
+
+#if defined(IREE_HAL_HAVE_EXPERIMENTAL_ROCM_DRIVER_MODULE)
+#include "experimental/rocm/registration/driver_module.h"
+#endif // IREE_HAL_HAVE_EXPERIMENTAL_ROCM_DRIVER_MODULE
+
+IREE_API_EXPORT iree_status_t
+iree_hal_register_all_available_drivers(iree_hal_driver_registry_t* registry) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+#if defined(IREE_HAL_HAVE_CUDA_DRIVER_MODULE)
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_cuda_driver_module_register(registry));
+#endif // IREE_HAL_HAVE_CUDA_DRIVER_MODULE
+
+#if defined(IREE_HAL_HAVE_DYLIB_DRIVER_MODULE)
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_dylib_driver_module_register(registry));
+#endif // IREE_HAL_HAVE_DYLIB_DRIVER_MODULE
+
+#if defined(IREE_HAL_HAVE_DYLIB_SYNC_DRIVER_MODULE)
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_dylib_sync_driver_module_register(registry));
+#endif // IREE_HAL_HAVE_DYLIB_SYNC_DRIVER_MODULE
+
+#if defined(IREE_HAL_HAVE_VMVX_DRIVER_MODULE)
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_vmvx_driver_module_register(registry));
+#endif // IREE_HAL_HAVE_VMVX_DRIVER_MODULE
+
+#if defined(IREE_HAL_HAVE_VMVX_SYNC_DRIVER_MODULE)
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_vmvx_sync_driver_module_register(registry));
+#endif // IREE_HAL_HAVE_VMVX_SYNC_DRIVER_MODULE
+
+#if defined(IREE_HAL_HAVE_VULKAN_DRIVER_MODULE)
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_vulkan_driver_module_register(registry));
+#endif // IREE_HAL_HAVE_VULKAN_DRIVER_MODULE
+
+#if defined(IREE_HAL_HAVE_EXPERIMENTAL_ROCM_DRIVER_MODULE)
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_rocm_driver_module_register(registry));
+#endif // IREE_HAL_HAVE_EXPERIMENTAL_ROCM_DRIVER_MODULE
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
diff --git a/runtime/src/iree/hal/drivers/init.h b/runtime/src/iree/hal/drivers/init.h
new file mode 100644
index 0000000..849816c
--- /dev/null
+++ b/runtime/src/iree/hal/drivers/init.h
@@ -0,0 +1,31 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_DRIVERS_INIT_H_
+#define IREE_HAL_DRIVERS_INIT_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Registers all drivers that were linked into the current binary based on the
+// build configuration. Note that there may be no drivers available.
+//
+// This only registers IREE core drivers (those under iree/hal/). User-provided
+// drivers must be directly registered or directly created, though a user could
+// create their own user_register_all_available_drivers() that calls this as
+// well as registering their drivers.
+IREE_API_EXPORT iree_status_t
+iree_hal_register_all_available_drivers(iree_hal_driver_registry_t* registry);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_DRIVERS_INIT_H_
diff --git a/runtime/src/iree/hal/dylib/BUILD b/runtime/src/iree/hal/dylib/BUILD
new file mode 100644
index 0000000..236a474
--- /dev/null
+++ b/runtime/src/iree/hal/dylib/BUILD
@@ -0,0 +1,11 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
diff --git a/runtime/src/iree/hal/dylib/CMakeLists.txt b/runtime/src/iree/hal/dylib/CMakeLists.txt
new file mode 100644
index 0000000..c6326d7
--- /dev/null
+++ b/runtime/src/iree/hal/dylib/CMakeLists.txt
@@ -0,0 +1,13 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/dylib/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/hal/dylib/cts/CMakeLists.txt b/runtime/src/iree/hal/dylib/cts/CMakeLists.txt
new file mode 100644
index 0000000..5bc7537
--- /dev/null
+++ b/runtime/src/iree/hal/dylib/cts/CMakeLists.txt
@@ -0,0 +1,48 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+if(IREE_BYTECODE_MODULE_FORCE_SYSTEM_DYLIB_LINKER)
+ set(EXECUTABLE_FORMAT_PREFIX "system")
+else()
+ set (EXECUTABLE_FORMAT_PREFIX "embedded")
+endif()
+
+set(EXECUTABLE_FORMAT "\"${EXECUTABLE_FORMAT_PREFIX}-elf-\" IREE_ARCH")
+
+iree_hal_cts_test_suite(
+ DRIVER_NAME
+ dylib
+ DRIVER_REGISTRATION_HDR
+ "runtime/src/iree/hal/dylib/registration/driver_module.h"
+ DRIVER_REGISTRATION_FN
+ "iree_hal_dylib_driver_module_register"
+ COMPILER_TARGET_BACKEND
+ "dylib-llvm-aot"
+ EXECUTABLE_FORMAT
+ "${EXECUTABLE_FORMAT}"
+ DEPS
+ iree::hal::dylib::registration
+)
+
+iree_hal_cts_test_suite(
+ DRIVER_NAME
+ dylib-sync
+ DRIVER_REGISTRATION_HDR
+ "runtime/src/iree/hal/dylib/registration/driver_module_sync.h"
+ DRIVER_REGISTRATION_FN
+ "iree_hal_dylib_sync_driver_module_register"
+ COMPILER_TARGET_BACKEND
+ "dylib-llvm-aot"
+ EXECUTABLE_FORMAT
+ "${EXECUTABLE_FORMAT}"
+ DEPS
+ iree::hal::dylib::registration::sync
+ EXCLUDED_TESTS
+ # TODO(#4680): command buffer recording so that these can run on sync HAL
+ "command_buffer"
+ "event"
+ "semaphore_submission"
+)
diff --git a/runtime/src/iree/hal/dylib/registration/BUILD b/runtime/src/iree/hal/dylib/registration/BUILD
new file mode 100644
index 0000000..44bda46
--- /dev/null
+++ b/runtime/src/iree/hal/dylib/registration/BUILD
@@ -0,0 +1,71 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_cmake_extra_content", "iree_runtime_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_cmake_extra_content(
+ content = """
+if(${IREE_HAL_DRIVER_DYLIB})
+""",
+ inline = True,
+)
+
+iree_runtime_cc_library(
+ name = "registration",
+ srcs = ["driver_module.c"],
+ hdrs = ["driver_module.h"],
+ defines = [
+ "IREE_HAL_HAVE_DYLIB_DRIVER_MODULE=1",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base/internal:flags",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/local",
+ "//runtime/src/iree/hal/local:task_driver",
+ "//runtime/src/iree/hal/local/loaders:embedded_library_loader",
+ "//runtime/src/iree/hal/local/loaders:system_library_loader",
+ "//runtime/src/iree/task:api",
+ ],
+)
+
+iree_cmake_extra_content(
+ content = """
+endif()
+
+if(${IREE_HAL_DRIVER_DYLIB_SYNC})
+""",
+ inline = True,
+)
+
+iree_runtime_cc_library(
+ name = "sync",
+ srcs = ["driver_module_sync.c"],
+ hdrs = ["driver_module_sync.h"],
+ defines = [
+ "IREE_HAL_HAVE_DYLIB_SYNC_DRIVER_MODULE=1",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/local",
+ "//runtime/src/iree/hal/local:sync_driver",
+ "//runtime/src/iree/hal/local/loaders:embedded_library_loader",
+ ],
+)
+
+iree_cmake_extra_content(
+ content = """
+endif()
+""",
+ inline = True,
+)
diff --git a/runtime/src/iree/hal/dylib/registration/CMakeLists.txt b/runtime/src/iree/hal/dylib/registration/CMakeLists.txt
new file mode 100644
index 0000000..edee5dc
--- /dev/null
+++ b/runtime/src/iree/hal/dylib/registration/CMakeLists.txt
@@ -0,0 +1,60 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/dylib/registration/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+if(${IREE_HAL_DRIVER_DYLIB})
+
+iree_cc_library(
+ NAME
+ registration
+ HDRS
+ "driver_module.h"
+ SRCS
+ "driver_module.c"
+ DEPS
+ iree::base
+ iree::base::internal::flags
+ iree::hal
+ iree::hal::local
+ iree::hal::local::loaders::embedded_library_loader
+ iree::hal::local::loaders::system_library_loader
+ iree::hal::local::task_driver
+ iree::task::api
+ DEFINES
+ "IREE_HAL_HAVE_DYLIB_DRIVER_MODULE=1"
+ PUBLIC
+)
+
+endif()
+
+if(${IREE_HAL_DRIVER_DYLIB_SYNC})
+
+iree_cc_library(
+ NAME
+ sync
+ HDRS
+ "driver_module_sync.h"
+ SRCS
+ "driver_module_sync.c"
+ DEPS
+ iree::base
+ iree::hal
+ iree::hal::local
+ iree::hal::local::loaders::embedded_library_loader
+ iree::hal::local::sync_driver
+ DEFINES
+ "IREE_HAL_HAVE_DYLIB_SYNC_DRIVER_MODULE=1"
+ PUBLIC
+)
+
+endif()
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/hal/dylib/registration/driver_module.c b/runtime/src/iree/hal/dylib/registration/driver_module.c
new file mode 100644
index 0000000..836db33
--- /dev/null
+++ b/runtime/src/iree/hal/dylib/registration/driver_module.c
@@ -0,0 +1,108 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/dylib/registration/driver_module.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/flags.h"
+#include "iree/hal/local/executable_loader.h"
+#include "iree/hal/local/loaders/embedded_library_loader.h"
+#include "iree/hal/local/loaders/system_library_loader.h"
+#include "iree/hal/local/task_device.h"
+#include "iree/hal/local/task_driver.h"
+#include "iree/task/api.h"
+
+// TODO(#4298): remove this driver registration and wrapper.
+// By having a single iree/hal/local/registration that then has the loaders
+// added to it based on compilation settings we can have a single set of flags
+// for everything. We can also have API helper methods that register the driver
+// using an existing executor so that we can entirely externalize the task
+// system configuration from the HAL.
+
+#define IREE_HAL_DYLIB_DRIVER_ID 0x58444C4Cu // XDLL
+
+static iree_status_t iree_hal_dylib_driver_factory_enumerate(
+ void* self, const iree_hal_driver_info_t** out_driver_infos,
+ iree_host_size_t* out_driver_info_count) {
+ static const iree_hal_driver_info_t driver_infos[1] = {
+ {
+ .driver_id = IREE_HAL_DYLIB_DRIVER_ID,
+ .driver_name = iree_string_view_literal("dylib"),
+ .full_name =
+ iree_string_view_literal("AOT compiled dynamic libraries"),
+ },
+ };
+ *out_driver_info_count = IREE_ARRAYSIZE(driver_infos);
+ *out_driver_infos = driver_infos;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_dylib_driver_factory_try_create(
+ void* self, iree_hal_driver_id_t driver_id, iree_allocator_t host_allocator,
+ iree_hal_driver_t** out_driver) {
+ if (driver_id != IREE_HAL_DYLIB_DRIVER_ID) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "no driver with ID %016" PRIu64
+ " is provided by this factory",
+ driver_id);
+ }
+
+ iree_hal_task_device_params_t default_params;
+ iree_hal_task_device_params_initialize(&default_params);
+
+ iree_status_t status = iree_ok_status();
+
+ iree_hal_executable_loader_t* loaders[2] = {NULL, NULL};
+ iree_host_size_t loader_count = 0;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_embedded_library_loader_create(
+ iree_hal_executable_import_provider_null(), host_allocator,
+ &loaders[loader_count++]);
+ }
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_system_library_loader_create(
+ iree_hal_executable_import_provider_null(), host_allocator,
+ &loaders[loader_count++]);
+ }
+
+ iree_task_executor_t* executor = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_task_executor_create_from_flags(host_allocator, &executor);
+ }
+
+ iree_hal_allocator_t* device_allocator = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_allocator_create_heap(iree_make_cstring_view("cpu"),
+ host_allocator, host_allocator,
+ &device_allocator);
+ }
+
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_task_driver_create(
+ iree_make_cstring_view("cpu"), &default_params, executor, loader_count,
+ loaders, device_allocator, host_allocator, out_driver);
+ }
+
+ iree_hal_allocator_release(device_allocator);
+ iree_task_executor_release(executor);
+ for (iree_host_size_t i = 0; i < loader_count; ++i) {
+ iree_hal_executable_loader_release(loaders[i]);
+ }
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_dylib_driver_module_register(iree_hal_driver_registry_t* registry) {
+ static const iree_hal_driver_factory_t factory = {
+ .self = NULL,
+ .enumerate = iree_hal_dylib_driver_factory_enumerate,
+ .try_create = iree_hal_dylib_driver_factory_try_create,
+ };
+ return iree_hal_driver_registry_register_factory(registry, &factory);
+}
diff --git a/runtime/src/iree/hal/dylib/registration/driver_module.h b/runtime/src/iree/hal/dylib/registration/driver_module.h
new file mode 100644
index 0000000..7c13188
--- /dev/null
+++ b/runtime/src/iree/hal/dylib/registration/driver_module.h
@@ -0,0 +1,26 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_DYLIB_REGISTRATION_DRIVER_MODULE_H_
+#define IREE_HAL_DYLIB_REGISTRATION_DRIVER_MODULE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// DEPRECATED: this entire driver will be removed soon.
+// TODO(#3580): remove this entire driver w/ iree_hal_executable_library_t.
+IREE_API_EXPORT iree_status_t
+iree_hal_dylib_driver_module_register(iree_hal_driver_registry_t* registry);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_DYLIB_REGISTRATION_DRIVER_MODULE_H_
diff --git a/runtime/src/iree/hal/dylib/registration/driver_module_sync.c b/runtime/src/iree/hal/dylib/registration/driver_module_sync.c
new file mode 100644
index 0000000..29f0a69
--- /dev/null
+++ b/runtime/src/iree/hal/dylib/registration/driver_module_sync.c
@@ -0,0 +1,86 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/dylib/registration/driver_module_sync.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/local/executable_loader.h"
+#include "iree/hal/local/loaders/embedded_library_loader.h"
+#include "iree/hal/local/sync_device.h"
+#include "iree/hal/local/sync_driver.h"
+
+// TODO(#4298): remove this driver registration and wrapper.
+// By having a single iree/hal/local/registration that then has the loaders
+// added to it based on compilation settings we can have a single set of flags
+// for everything.
+
+#define IREE_HAL_DYLIB_SYNC_DRIVER_ID 0x53444C4Cu // SDLL
+
+static iree_status_t iree_hal_dylib_sync_driver_factory_enumerate(
+ void* self, const iree_hal_driver_info_t** out_driver_infos,
+ iree_host_size_t* out_driver_info_count) {
+ static const iree_hal_driver_info_t default_driver_info = {
+ .driver_id = IREE_HAL_DYLIB_SYNC_DRIVER_ID,
+ .driver_name = iree_string_view_literal("dylib-sync"),
+ .full_name = iree_string_view_literal(
+ "synchronous AOT compiled dynamic embedded libraries"),
+ };
+ *out_driver_info_count = 1;
+ *out_driver_infos = &default_driver_info;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_dylib_sync_driver_factory_try_create(
+ void* self, iree_hal_driver_id_t driver_id, iree_allocator_t host_allocator,
+ iree_hal_driver_t** out_driver) {
+ if (driver_id != IREE_HAL_DYLIB_SYNC_DRIVER_ID) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "no driver with ID %016" PRIu64
+ " is provided by this factory",
+ driver_id);
+ }
+
+ iree_hal_sync_device_params_t default_params;
+ iree_hal_sync_device_params_initialize(&default_params);
+
+ iree_status_t status = iree_ok_status();
+ iree_hal_executable_loader_t* loaders[1] = {NULL};
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_embedded_library_loader_create(
+ iree_hal_executable_import_provider_null(), host_allocator,
+ &loaders[0]);
+ }
+
+ iree_hal_allocator_t* device_allocator = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_allocator_create_heap(iree_make_cstring_view("cpu"),
+ host_allocator, host_allocator,
+ &device_allocator);
+ }
+
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_sync_driver_create(
+ iree_make_cstring_view("cpu"), &default_params, IREE_ARRAYSIZE(loaders),
+ loaders, device_allocator, host_allocator, out_driver);
+ }
+
+ iree_hal_allocator_release(device_allocator);
+ iree_hal_executable_loader_release(loaders[0]);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_dylib_sync_driver_module_register(
+ iree_hal_driver_registry_t* registry) {
+ static const iree_hal_driver_factory_t factory = {
+ .self = NULL,
+ .enumerate = iree_hal_dylib_sync_driver_factory_enumerate,
+ .try_create = iree_hal_dylib_sync_driver_factory_try_create,
+ };
+ return iree_hal_driver_registry_register_factory(registry, &factory);
+}
diff --git a/runtime/src/iree/hal/dylib/registration/driver_module_sync.h b/runtime/src/iree/hal/dylib/registration/driver_module_sync.h
new file mode 100644
index 0000000..2f8139f
--- /dev/null
+++ b/runtime/src/iree/hal/dylib/registration/driver_module_sync.h
@@ -0,0 +1,26 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_DYLIB_REGISTRATION_DRIVER_MODULE_SYNC_H_
+#define IREE_HAL_DYLIB_REGISTRATION_DRIVER_MODULE_SYNC_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// DEPRECATED: this entire driver will be removed soon.
+// TODO(#3580): remove this entire driver w/ iree_hal_executable_library_t.
+IREE_API_EXPORT iree_status_t iree_hal_dylib_sync_driver_module_register(
+ iree_hal_driver_registry_t* registry);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_DYLIB_REGISTRATION_DRIVER_MODULE_SYNC_H_
diff --git a/runtime/src/iree/hal/event.c b/runtime/src/iree/hal/event.c
new file mode 100644
index 0000000..95bda1f
--- /dev/null
+++ b/runtime/src/iree/hal/event.c
@@ -0,0 +1,31 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/event.h"
+
+#include <stddef.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/detail.h"
+#include "iree/hal/device.h"
+#include "iree/hal/resource.h"
+
+#define _VTABLE_DISPATCH(event, method_name) \
+ IREE_HAL_VTABLE_DISPATCH(event, iree_hal_event, method_name)
+
+IREE_HAL_API_RETAIN_RELEASE(event);
+
+IREE_API_EXPORT iree_status_t
+iree_hal_event_create(iree_hal_device_t* device, iree_hal_event_t** out_event) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(out_event);
+ *out_event = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = IREE_HAL_VTABLE_DISPATCH(
+ device, iree_hal_device, create_event)(device, out_event);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/event.h b/runtime/src/iree/hal/event.h
new file mode 100644
index 0000000..a6ea312
--- /dev/null
+++ b/runtime/src/iree/hal/event.h
@@ -0,0 +1,64 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_EVENT_H_
+#define IREE_HAL_EVENT_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_hal_device_t iree_hal_device_t;
+
+//===----------------------------------------------------------------------===//
+// iree_hal_event_t
+//===----------------------------------------------------------------------===//
+
+// Events are used for defining synchronization scopes within command buffers.
+// An event only exists within a single CommandBuffer and must not be used
+// across command buffers from the same device or others.
+//
+// See iree_hal_command_buffer_signal_event and
+// iree_hal_command_buffer_wait_events for more info.
+//
+// Maps to VkEvent:
+// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkEvent.html
+typedef struct iree_hal_event_t iree_hal_event_t;
+
+// Creates an event for recording into command buffers.
+// The returned event object is only usable with this device and events must
+// only be used to synchronize within the same queue.
+IREE_API_EXPORT iree_status_t
+iree_hal_event_create(iree_hal_device_t* device, iree_hal_event_t** out_event);
+
+// Retains the given |event| for the caller.
+IREE_API_EXPORT void iree_hal_event_retain(iree_hal_event_t* event);
+
+// Releases the given |event| from the caller.
+IREE_API_EXPORT void iree_hal_event_release(iree_hal_event_t* event);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_event_t implementation details
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_event_vtable_t {
+ void(IREE_API_PTR* destroy)(iree_hal_event_t* event);
+} iree_hal_event_vtable_t;
+IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_event_vtable_t);
+
+IREE_API_EXPORT void iree_hal_event_destroy(iree_hal_event_t* event);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_EVENT_H_
diff --git a/runtime/src/iree/hal/executable.c b/runtime/src/iree/hal/executable.c
new file mode 100644
index 0000000..00a7c9b
--- /dev/null
+++ b/runtime/src/iree/hal/executable.c
@@ -0,0 +1,15 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/executable.h"
+
+#include "iree/hal/detail.h"
+#include "iree/hal/resource.h"
+
+#define _VTABLE_DISPATCH(executable, method_name) \
+ IREE_HAL_VTABLE_DISPATCH(executable, iree_hal_executable, method_name)
+
+IREE_HAL_API_RETAIN_RELEASE(executable);
diff --git a/runtime/src/iree/hal/executable.h b/runtime/src/iree/hal/executable.h
new file mode 100644
index 0000000..561ed3a
--- /dev/null
+++ b/runtime/src/iree/hal/executable.h
@@ -0,0 +1,65 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_EXECUTABLE_H_
+#define IREE_HAL_EXECUTABLE_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_hal_device_t iree_hal_device_t;
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_t
+//===----------------------------------------------------------------------===//
+
+// Handle to a loaded executable.
+// Loading of executables routes through an executable cache, allowing for
+// context-aware scoped caches. HAL implementations can use this to preserve
+// JIT'ed executables across processes or reuse executables across device
+// instances.
+//
+// Executables provide one or more entry points that can be dispatched via
+// iree_hal_command_buffer_dispatch. Some entry points may represent the same
+// computation but specialized in different ways such that the runtime can
+// switch strategies and choose between them per-dispatch.
+//
+//
+// Maps (roughly) to vkShaderModule + VkPipeline[].
+typedef struct iree_hal_executable_t iree_hal_executable_t;
+
+// Retains the given |executable| for the caller.
+IREE_API_EXPORT void iree_hal_executable_retain(
+ iree_hal_executable_t* executable);
+
+// Releases the given |executable| from the caller.
+IREE_API_EXPORT void iree_hal_executable_release(
+ iree_hal_executable_t* executable);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_t implementation details
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_executable_vtable_t {
+ void(IREE_API_PTR* destroy)(iree_hal_executable_t* executable);
+} iree_hal_executable_vtable_t;
+IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_executable_vtable_t);
+
+IREE_API_EXPORT void iree_hal_executable_destroy(
+ iree_hal_executable_t* executable);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_EXECUTABLE_H_
diff --git a/runtime/src/iree/hal/executable_cache.c b/runtime/src/iree/hal/executable_cache.c
new file mode 100644
index 0000000..73bd9bb
--- /dev/null
+++ b/runtime/src/iree/hal/executable_cache.c
@@ -0,0 +1,69 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/executable_cache.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/detail.h"
+#include "iree/hal/device.h"
+#include "iree/hal/resource.h"
+
+void iree_hal_executable_params_initialize(
+ iree_hal_executable_params_t* out_executable_params) {
+ memset(out_executable_params, 0, sizeof(*out_executable_params));
+ out_executable_params->caching_mode =
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ALLOW_PERSISTENT_CACHING |
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ALLOW_OPTIMIZATION;
+}
+
+#define _VTABLE_DISPATCH(executable_cache, method_name) \
+ IREE_HAL_VTABLE_DISPATCH(executable_cache, iree_hal_executable_cache, \
+ method_name)
+
+IREE_HAL_API_RETAIN_RELEASE(executable_cache);
+
+IREE_API_EXPORT iree_status_t iree_hal_executable_cache_create(
+ iree_hal_device_t* device, iree_string_view_t identifier, iree_loop_t loop,
+ iree_hal_executable_cache_t** out_executable_cache) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(out_executable_cache);
+ *out_executable_cache = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = IREE_HAL_VTABLE_DISPATCH(device, iree_hal_device,
+ create_executable_cache)(
+ device, identifier, loop, out_executable_cache);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT bool iree_hal_executable_cache_can_prepare_format(
+ iree_hal_executable_cache_t* executable_cache,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format) {
+ IREE_ASSERT_ARGUMENT(executable_cache);
+ return _VTABLE_DISPATCH(executable_cache, can_prepare_format)(
+ executable_cache, caching_mode, executable_format);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_executable_cache_prepare_executable(
+ iree_hal_executable_cache_t* executable_cache,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable) {
+ IREE_ASSERT_ARGUMENT(executable_cache);
+ IREE_ASSERT_ARGUMENT(executable_params);
+ IREE_ASSERT_ARGUMENT(!executable_params->executable_layout_count ||
+ executable_params->executable_layouts);
+ IREE_ASSERT_ARGUMENT(out_executable);
+ *out_executable = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = _VTABLE_DISPATCH(executable_cache, prepare_executable)(
+ executable_cache, executable_params, out_executable);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/executable_cache.h b/runtime/src/iree/hal/executable_cache.h
new file mode 100644
index 0000000..9fd53f0
--- /dev/null
+++ b/runtime/src/iree/hal/executable_cache.h
@@ -0,0 +1,217 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_EXECUTABLE_CACHE_H_
+#define IREE_HAL_EXECUTABLE_CACHE_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/executable.h"
+#include "iree/hal/executable_layout.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_hal_device_t iree_hal_device_t;
+
+//===----------------------------------------------------------------------===//
+// Types and Enums
+//===----------------------------------------------------------------------===//
+
+// Defines how the executable cache performs preparation.
+enum iree_hal_executable_caching_mode_bits_t {
+ // Allows the cache to reference the provided executable_data after it has
+ // prepared the executable. Callers must ensure the data remains valid for the
+ // lifetime of the cache. If memory mapping constant executable data from
+ // disk this can be used to avoid copies.
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ALIAS_PROVIDED_DATA = 1u << 0,
+ // Allows the prepared executable to be cached persistently (on disk/etc).
+ // Enable for any executable that is likely to be used in future runs.
+ // Note that not all caches support persistent serialization and this is just
+ // a hint.
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ALLOW_PERSISTENT_CACHING = 1u << 1,
+ // Allows the cache to optimize the executable as much as it can.
+ // This may cause preparation to take significantly longer while (hopefully)
+ // improving runtime performance. Avoid for one-shot executables.
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ALLOW_OPTIMIZATION = 1u << 2,
+ // Enables Executable debugging methods if supported by the device and
+ // executable. This may disable certain optimizations or retain additional
+ // data to allow disassembly, stepping, etc.
+ //
+ // Device must support the IREE_HAL_DEVICE_FEATURE_SUPPORTS_DEBUGGING feature
+ // and executables must support the ExecutableFeature::kDebugging feature.
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_DEBUGGING = 1u << 3,
+ // Enables Executable coverage if supported by the device and executable.
+ // Depending on the optimization mode this may produce partial coverage
+ // results (for example, when certain source operations were optimized away).
+ //
+ // Device must support the IREE_HAL_DEVICE_FEATURE_SUPPORTS_COVERAGE feature
+ // and executables must support the ExecutableFeature::kCoverage feature.
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_COVERAGE = 1u << 4,
+ // Enables Executable profiling if supported by the device and executable.
+ // Depending on the optimization mode this may produce partial profiling
+ // results. Profiling attribution (whether to the entire executable or
+ // specific operations) depends on the implementation.
+ //
+ // Device must support the IREE_HAL_DEVICE_FEATURE_SUPPORTS_PROFILING feature
+ // and executables must support the ExecutableFeature::kProfiling feature.
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_PROFILING = 1u << 5,
+ // Disables verification of executable layouts and modes.
+ // This is useful when debugging with partial information but should never
+ // be enabled for real usage as the verification is the best way to catch
+ // API misuse.
+ IREE_HAL_EXECUTABLE_CACHING_MODE_DISABLE_VERIFICATION = 1u << 6,
+};
+typedef uint32_t iree_hal_executable_caching_mode_t;
+
+// Defines an executable compilation specification.
+typedef struct iree_hal_executable_params_t {
+ // Specifies what caching the executable cache is allowed to perform and
+ // (if supported) which transformations on the executable contents are
+ // allowed.
+ iree_hal_executable_caching_mode_t caching_mode;
+
+ // Indicates the format of the data in |executable_data|.
+ iree_string_view_t executable_format;
+
+ // Opaque compiler-generated executable data.
+ // By default the memory storing the executable data is owned by the caller
+ // and not guaranteed to live beyond the preparation call.
+ //
+ // Callers can indicate that they guarantee the lifetime of the memory
+ // outlives the executable that will be created from it with the
+ // IREE_HAL_EXECUTABLE_CACHING_MODE_ALIAS_PROVIDED_DATA flag, in which case
+ // the cache is allowed to retain the data for as long as there is a reference
+ // to any executable created using it still held by the caller.
+ iree_const_byte_span_t executable_data;
+
+ // A set of executable layouts for each entry point in the executable.
+ // The order matches that produced by the compiler. As multiple entry points
+ // may share the same layout some entries in this list may reference the same
+ // executable layout objects.
+ iree_host_size_t executable_layout_count;
+ iree_hal_executable_layout_t* const* executable_layouts;
+
+ // Executable-level constants table used to perform runtime specialization
+ // when information is not available statically during compilation. The
+ // compiler defines the contents of the table, how they are populated, and
+ // their usage in the executable.
+ //
+ // For targets that natively support specialization these directly map down:
+ // Metal: function constants
+ // WGSL: pipeline overrides
+ // Vulkan/SPIR-V: specialization constants
+ // Other targets may present these as constant tables or uniform buffers.
+ // Since the values cannot change after initialization targets that JIT may
+ // perform substitution during initialization to inline the values
+ // immediately (via CUDA PTX linking, etc).
+ iree_host_size_t constant_count;
+ const uint32_t* constants;
+} iree_hal_executable_params_t;
+
+// Initializes |out_executable_params| to the default values for normal
+// executables. Callers must override the fields as required.
+void iree_hal_executable_params_initialize(
+ iree_hal_executable_params_t* out_executable_params);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_cache_t
+//===----------------------------------------------------------------------===//
+
+// A cache of prepared executables for a particular device.
+// Caches may be shared across multiple devices from the same driver or specific
+// to individual devices. Caches may persist prepared executables across process
+// launches or re-prepare them each run. Callers should assume that the cache is
+// a no-op and the returned Executables only live for as long as the cache does.
+//
+// The term 'cache' here is rather optimistic - it's perfectly acceptable for
+// implementations to not cache at all and return new Executables for each
+// iree_hal_executable_cache_prepare_executable called (even for the same
+// executable). Callers should expect such behavior and try to retain the
+// results of the iree_hal_executable_cache_prepare_executable calls to reduce
+// overhead in re-preparing executables.
+//
+// Thread-safe - multiple threads may prepare executables (including the *same*
+// executable) simultaneously.
+typedef struct iree_hal_executable_cache_t iree_hal_executable_cache_t;
+
+// Creates an executable cache using the given identifier.
+// The identifier is provided to the backing cache API as way to partition
+// caches between different groups of executables (from different modules, etc).
+//
+// Any host-side work that needs to be performed will be scheduled on |loop|.
+// This enables JITs, device-specific translation, and verification to be
+// parallelized using a shared scheduler. The loop must remain valid for the
+// lifetime of the executable cache.
+IREE_API_EXPORT iree_status_t iree_hal_executable_cache_create(
+ iree_hal_device_t* device, iree_string_view_t identifier, iree_loop_t loop,
+ iree_hal_executable_cache_t** out_executable_cache);
+
+// Retains the given |executable_cache| for the caller.
+IREE_API_EXPORT void iree_hal_executable_cache_retain(
+ iree_hal_executable_cache_t* executable_cache);
+
+// Releases the given |executable_cache| from the caller.
+IREE_API_EXPORT void iree_hal_executable_cache_release(
+ iree_hal_executable_cache_t* executable_cache);
+
+// Returns true if the executable cache can prepare the given executable input
+// format. Preparation may still fail if the particular version or features
+// required by the executable are not supported.
+IREE_API_EXPORT bool iree_hal_executable_cache_can_prepare_format(
+ iree_hal_executable_cache_t* executable_cache,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format);
+
+// Prepares the executable defined by |executable_params| for use.
+// The provided |executable_data| (in a format defined by |executable_format|)
+// will be used to either lookup a previously prepared executable in the cache
+// or prepare a new one.
+//
+// Each entry point in the executable requires a corresponding value in
+// |executable_layouts| defining the layout used by the entry point. If multiple
+// entry points use the same layouts they can reuse the same values.
+//
+// Depending on the driver preparation may take a non-trivial amount of time
+// (such as when JITing/etc). As the cache is internally synchronized callers
+// can issue preparation requests from multiple threads - even for the same
+// executables - and calls will block until preparation completes.
+IREE_API_EXPORT iree_status_t iree_hal_executable_cache_prepare_executable(
+ iree_hal_executable_cache_t* executable_cache,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_cache_t implementation details
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_executable_cache_vtable_t {
+ void(IREE_API_PTR* destroy)(iree_hal_executable_cache_t* executable_cache);
+
+ bool(IREE_API_PTR* can_prepare_format)(
+ iree_hal_executable_cache_t* executable_cache,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format);
+
+ iree_status_t(IREE_API_PTR* prepare_executable)(
+ iree_hal_executable_cache_t* executable_cache,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable);
+} iree_hal_executable_cache_vtable_t;
+IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_executable_cache_vtable_t);
+
+IREE_API_EXPORT void iree_hal_executable_cache_destroy(
+ iree_hal_executable_cache_t* executable_cache);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_EXECUTABLE_CACHE_H_
diff --git a/runtime/src/iree/hal/executable_layout.c b/runtime/src/iree/hal/executable_layout.c
new file mode 100644
index 0000000..5755b8a
--- /dev/null
+++ b/runtime/src/iree/hal/executable_layout.c
@@ -0,0 +1,38 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/executable_layout.h"
+
+#include <stddef.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/detail.h"
+#include "iree/hal/device.h"
+#include "iree/hal/resource.h"
+
+#define _VTABLE_DISPATCH(executable_layout, method_name) \
+ IREE_HAL_VTABLE_DISPATCH(executable_layout, iree_hal_executable_layout, \
+ method_name)
+
+IREE_HAL_API_RETAIN_RELEASE(executable_layout);
+
+IREE_API_EXPORT iree_status_t iree_hal_executable_layout_create(
+ iree_hal_device_t* device, iree_host_size_t push_constants,
+ iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ iree_hal_executable_layout_t** out_executable_layout) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(!set_layout_count || set_layouts);
+ IREE_ASSERT_ARGUMENT(out_executable_layout);
+ *out_executable_layout = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = IREE_HAL_VTABLE_DISPATCH(device, iree_hal_device,
+ create_executable_layout)(
+ device, push_constants, set_layout_count, set_layouts,
+ out_executable_layout);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/executable_layout.h b/runtime/src/iree/hal/executable_layout.h
new file mode 100644
index 0000000..7fa1a21
--- /dev/null
+++ b/runtime/src/iree/hal/executable_layout.h
@@ -0,0 +1,78 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_EXECUTABLE_LAYOUT_H_
+#define IREE_HAL_EXECUTABLE_LAYOUT_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/descriptor_set_layout.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_hal_device_t iree_hal_device_t;
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_layout_t
+//===----------------------------------------------------------------------===//
+
+// Defines the resource binding layout used by an executable.
+// A "descriptor" is effectively a bound memory range and each dispatch can use
+// one or more "descriptor sets" to access their I/O memory. A "descriptor set
+// layout" defines the types and usage semantics of the descriptors that make up
+// one set. An "executable layout" defines all of the set layouts that will be
+// used when dispatching. Implementations can use this to verify program
+// correctness and accelerate reservation/allocatation/computation of
+// descriptor-related operations.
+//
+// Executables can share the same layout even if they do not use all of the
+// resources referenced by descriptor sets referenced by the layout. Doing so
+// allows for more efficient binding as bound descriptor sets can be reused when
+// command buffer executable bindings change.
+//
+// Maps to VkPipelineLayout:
+// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkPipelineLayout.html
+typedef struct iree_hal_executable_layout_t iree_hal_executable_layout_t;
+
+// Creates an executable layout composed of the given descriptor set layouts.
+// The returned executable layout can be used by multiple executables with the
+// same compatible resource binding layouts.
+IREE_API_EXPORT iree_status_t iree_hal_executable_layout_create(
+ iree_hal_device_t* device, iree_host_size_t push_constants,
+ iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ iree_hal_executable_layout_t** out_executable_layout);
+
+// Retains the given |executable_layout| for the caller.
+IREE_API_EXPORT void iree_hal_executable_layout_retain(
+ iree_hal_executable_layout_t* executable_layout);
+
+// Releases the given |executable_layout| from the caller.
+IREE_API_EXPORT void iree_hal_executable_layout_release(
+ iree_hal_executable_layout_t* executable_layout);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_layout_t implementation details
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_executable_layout_vtable_t {
+ void(IREE_API_PTR* destroy)(iree_hal_executable_layout_t* executable_layout);
+} iree_hal_executable_layout_vtable_t;
+IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_executable_layout_vtable_t);
+
+IREE_API_EXPORT void iree_hal_executable_layout_destroy(
+ iree_hal_executable_layout_t* executable_layout);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_EXECUTABLE_LAYOUT_H_
diff --git a/runtime/src/iree/hal/local/BUILD b/runtime/src/iree/hal/local/BUILD
new file mode 100644
index 0000000..c2927ef
--- /dev/null
+++ b/runtime/src/iree/hal/local/BUILD
@@ -0,0 +1,181 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Default implementations for HAL types that use the host resources.
+# These are generally just wrappers around host heap memory and host threads.
+
+load("//iree:build_defs.oss.bzl", "iree_cmake_extra_content", "iree_runtime_cc_library", "iree_runtime_cc_test")
+load("//build_tools/bazel:cc_binary_benchmark.bzl", "cc_binary_benchmark")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_runtime_cc_library(
+ name = "executable_environment",
+ srcs = ["executable_environment.c"],
+ hdrs = ["executable_environment.h"],
+ deps = [
+ ":executable_library",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal:cpu",
+ "//runtime/src/iree/hal",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "executable_library",
+ hdrs = ["executable_library.h"],
+)
+
+cc_binary_benchmark(
+ name = "executable_library_benchmark",
+ srcs = ["executable_library_benchmark.c"],
+ deps = [
+ ":executable_environment",
+ ":executable_library",
+ ":local",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal:file_io",
+ "//runtime/src/iree/base/internal:flags",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/local/loaders:embedded_library_loader",
+ "//runtime/src/iree/testing:benchmark",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "executable_library_test",
+ srcs = [
+ "executable_library_demo.c",
+ "executable_library_demo.h",
+ "executable_library_test.c",
+ ],
+ deps = [
+ ":executable_environment",
+ ":executable_library",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "local",
+ srcs = [
+ "executable_loader.c",
+ "inline_command_buffer.c",
+ "local_descriptor_set.c",
+ "local_descriptor_set_layout.c",
+ "local_executable.c",
+ "local_executable_cache.c",
+ "local_executable_layout.c",
+ ],
+ hdrs = [
+ "executable_loader.h",
+ "inline_command_buffer.h",
+ "local_descriptor_set.h",
+ "local_descriptor_set_layout.h",
+ "local_executable.h",
+ "local_executable_cache.h",
+ "local_executable_layout.h",
+ ],
+ deps = [
+ ":executable_environment",
+ ":executable_library",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal",
+ "//runtime/src/iree/base/internal:fpu_state",
+ "//runtime/src/iree/hal",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "sync_driver",
+ srcs = [
+ "sync_device.c",
+ "sync_driver.c",
+ "sync_event.c",
+ "sync_semaphore.c",
+ ],
+ hdrs = [
+ "sync_device.h",
+ "sync_driver.h",
+ "sync_event.h",
+ "sync_semaphore.h",
+ ],
+ deps = [
+ ":local",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal",
+ "//runtime/src/iree/base/internal:arena",
+ "//runtime/src/iree/base/internal:synchronization",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/utils:buffer_transfer",
+ ],
+)
+
+#===------------------------------------------------------------------------===#
+# Thread dependent packages
+#===------------------------------------------------------------------------===#
+
+iree_cmake_extra_content(
+ content = """
+# task_driver is used by asynchronuous drivers.
+# TODO(scotttodd): refactor this - code depending on threading should be
+# possible to declare in the build system but conditionally link in
+if(NOT EMSCRIPTEN AND NOT (${IREE_HAL_DRIVER_DYLIB} OR ${IREE_HAL_DRIVER_VMVX}))
+ return()
+endif()
+""",
+ inline = True,
+)
+
+iree_runtime_cc_library(
+ name = "task_driver",
+ srcs = [
+ "task_command_buffer.c",
+ "task_device.c",
+ "task_driver.c",
+ "task_event.c",
+ "task_queue.c",
+ "task_queue_state.c",
+ "task_semaphore.c",
+ ],
+ hdrs = [
+ "task_command_buffer.h",
+ "task_device.h",
+ "task_driver.h",
+ "task_event.h",
+ "task_queue.h",
+ "task_queue_state.h",
+ "task_semaphore.h",
+ ],
+ deps = [
+ ":executable_environment",
+ ":executable_library",
+ ":local",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal",
+ "//runtime/src/iree/base/internal:arena",
+ "//runtime/src/iree/base/internal:event_pool",
+ "//runtime/src/iree/base/internal:synchronization",
+ "//runtime/src/iree/base/internal:wait_handle",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/utils:buffer_transfer",
+ "//runtime/src/iree/hal/utils:resource_set",
+ "//runtime/src/iree/task",
+ ],
+)
diff --git a/runtime/src/iree/hal/local/CMakeLists.txt b/runtime/src/iree/hal/local/CMakeLists.txt
new file mode 100644
index 0000000..693f722
--- /dev/null
+++ b/runtime/src/iree/hal/local/CMakeLists.txt
@@ -0,0 +1,174 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/local/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ executable_environment
+ HDRS
+ "executable_environment.h"
+ SRCS
+ "executable_environment.c"
+ DEPS
+ ::executable_library
+ iree::base
+ iree::base::internal::cpu
+ iree::base::tracing
+ iree::hal
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ executable_library
+ HDRS
+ "executable_library.h"
+ DEPS
+
+ PUBLIC
+)
+
+iree_cc_binary_benchmark(
+ NAME
+ executable_library_benchmark
+ SRCS
+ "executable_library_benchmark.c"
+ DEPS
+ ::executable_environment
+ ::executable_library
+ ::local
+ iree::base
+ iree::base::internal::file_io
+ iree::base::internal::flags
+ iree::base::tracing
+ iree::hal
+ iree::hal::local::loaders::embedded_library_loader
+ iree::testing::benchmark
+ TESTONLY
+)
+
+iree_cc_test(
+ NAME
+ executable_library_test
+ SRCS
+ "executable_library_demo.c"
+ "executable_library_demo.h"
+ "executable_library_test.c"
+ DEPS
+ ::executable_environment
+ ::executable_library
+ iree::base
+ iree::base::core_headers
+)
+
+iree_cc_library(
+ NAME
+ local
+ HDRS
+ "executable_loader.h"
+ "inline_command_buffer.h"
+ "local_descriptor_set.h"
+ "local_descriptor_set_layout.h"
+ "local_executable.h"
+ "local_executable_cache.h"
+ "local_executable_layout.h"
+ SRCS
+ "executable_loader.c"
+ "inline_command_buffer.c"
+ "local_descriptor_set.c"
+ "local_descriptor_set_layout.c"
+ "local_executable.c"
+ "local_executable_cache.c"
+ "local_executable_layout.c"
+ DEPS
+ ::executable_environment
+ ::executable_library
+ iree::base
+ iree::base::core_headers
+ iree::base::internal
+ iree::base::internal::fpu_state
+ iree::base::tracing
+ iree::hal
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ sync_driver
+ HDRS
+ "sync_device.h"
+ "sync_driver.h"
+ "sync_event.h"
+ "sync_semaphore.h"
+ SRCS
+ "sync_device.c"
+ "sync_driver.c"
+ "sync_event.c"
+ "sync_semaphore.c"
+ DEPS
+ ::local
+ iree::base
+ iree::base::core_headers
+ iree::base::internal
+ iree::base::internal::arena
+ iree::base::internal::synchronization
+ iree::base::tracing
+ iree::hal
+ iree::hal::utils::buffer_transfer
+ PUBLIC
+)
+
+# task_driver is used by asynchronuous drivers.
+# TODO(scotttodd): refactor this - code depending on threading should be
+# possible to declare in the build system but conditionally link in
+if(NOT EMSCRIPTEN AND NOT (${IREE_HAL_DRIVER_DYLIB} OR ${IREE_HAL_DRIVER_VMVX}))
+ return()
+endif()
+
+iree_cc_library(
+ NAME
+ task_driver
+ HDRS
+ "task_command_buffer.h"
+ "task_device.h"
+ "task_driver.h"
+ "task_event.h"
+ "task_queue.h"
+ "task_queue_state.h"
+ "task_semaphore.h"
+ SRCS
+ "task_command_buffer.c"
+ "task_device.c"
+ "task_driver.c"
+ "task_event.c"
+ "task_queue.c"
+ "task_queue_state.c"
+ "task_semaphore.c"
+ DEPS
+ ::executable_environment
+ ::executable_library
+ ::local
+ iree::base
+ iree::base::core_headers
+ iree::base::internal
+ iree::base::internal::arena
+ iree::base::internal::event_pool
+ iree::base::internal::synchronization
+ iree::base::internal::wait_handle
+ iree::base::tracing
+ iree::hal
+ iree::hal::utils::buffer_transfer
+ iree::hal::utils::resource_set
+ iree::task
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/hal/local/elf/BUILD b/runtime/src/iree/hal/local/elf/BUILD
new file mode 100644
index 0000000..f5400f9
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/BUILD
@@ -0,0 +1,96 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/bazel:native_binary.bzl", "native_test")
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+#===------------------------------------------------------------------------===#
+# Runtime ELF module loader/linker
+#===------------------------------------------------------------------------===#
+
+iree_runtime_cc_library(
+ name = "elf_module",
+ srcs = [
+ "elf_module.c",
+ ],
+ hdrs = [
+ "elf_module.h",
+ "elf_types.h",
+ ],
+ deps = [
+ ":arch",
+ ":platform",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ ],
+)
+
+cc_binary(
+ name = "elf_module_test_binary",
+ srcs = ["elf_module_test_main.c"],
+ deps = [
+ ":elf_module",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/hal/local:executable_environment",
+ "//runtime/src/iree/hal/local:executable_library",
+ "//runtime/src/iree/hal/local/elf/testdata:elementwise_mul",
+ ],
+)
+
+native_test(
+ name = "elf_module_test",
+ src = ":elf_module_test_binary",
+)
+
+#===------------------------------------------------------------------------===#
+# Architecture and platform support
+#===------------------------------------------------------------------------===#
+
+iree_runtime_cc_library(
+ name = "arch",
+ srcs = [
+ "arch/arm_32.c",
+ "arch/arm_64.c",
+ "arch/riscv.c",
+ "arch/x86_32.c",
+ "arch/x86_64.c",
+ "elf_types.h",
+ ],
+ hdrs = [
+ "arch.h",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "platform",
+ srcs = [
+ "platform/apple.c",
+ "platform/generic.c",
+ "platform/linux.c",
+ "platform/windows.c",
+ ],
+ hdrs = [
+ "platform.h",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ ],
+)
diff --git a/runtime/src/iree/hal/local/elf/CMakeLists.txt b/runtime/src/iree/hal/local/elf/CMakeLists.txt
new file mode 100644
index 0000000..67e67b2
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/CMakeLists.txt
@@ -0,0 +1,107 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/local/elf/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ elf_module
+ HDRS
+ "elf_module.h"
+ "elf_types.h"
+ SRCS
+ "elf_module.c"
+ DEPS
+ ::arch
+ ::platform
+ iree::base
+ iree::base::core_headers
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_binary(
+ NAME
+ elf_module_test_binary
+ SRCS
+ "elf_module_test_main.c"
+ DEPS
+ ::elf_module
+ iree::base
+ iree::base::core_headers
+ iree::hal::local::elf::testdata::elementwise_mul
+ iree::hal::local::executable_environment
+ iree::hal::local::executable_library
+)
+
+iree_native_test(
+ NAME
+ "elf_module_test"
+ SRC
+ ::elf_module_test_binary
+)
+
+iree_cc_library(
+ NAME
+ arch
+ HDRS
+ "arch.h"
+ SRCS
+ "arch/arm_32.c"
+ "arch/arm_64.c"
+ "arch/riscv.c"
+ "arch/x86_32.c"
+ "arch/x86_64.c"
+ "elf_types.h"
+ DEPS
+ iree::base
+ iree::base::core_headers
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ platform
+ HDRS
+ "platform.h"
+ SRCS
+ "platform/apple.c"
+ "platform/generic.c"
+ "platform/linux.c"
+ "platform/windows.c"
+ DEPS
+ iree::base
+ iree::base::core_headers
+ iree::base::tracing
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
+
+# TODO(*): figure out how to make this work on Bazel+Windows.
+if(${MSVC})
+ if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
+ set_source_files_properties(
+ arch/x86_64_msvc.asm
+ PROPERTIES
+ LANGUAGE ASM_MASM
+ )
+ # CMake + MASM does not work well and CMake ends up passing all our C/C++
+ # flags confusing MASM. We invoke MASM directly (ml64.exe) to keep it quiet.
+ target_sources(iree_hal_local_elf_arch PRIVATE "arch/x86_64_msvc.obj")
+ add_custom_command(
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/arch/x86_64_msvc.obj
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/arch/x86_64_msvc.asm
+ COMMAND ml64 /nologo /Zi /c /Fo ${CMAKE_CURRENT_BINARY_DIR}/arch/x86_64_msvc.obj ${CMAKE_CURRENT_SOURCE_DIR}/arch/x86_64_msvc.asm
+ VERBATIM
+ )
+ endif()
+endif()
diff --git a/runtime/src/iree/hal/local/elf/arch.h b/runtime/src/iree/hal/local/elf/arch.h
new file mode 100644
index 0000000..3933c95
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/arch.h
@@ -0,0 +1,65 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_ELF_ARCH_H_
+#define IREE_HAL_LOCAL_ELF_ARCH_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/local/elf/elf_types.h"
+
+//==============================================================================
+// ELF machine type/ABI
+//==============================================================================
+
+// Returns true if the reported ELF machine specification is valid.
+bool iree_elf_arch_is_valid(const iree_elf_ehdr_t* ehdr);
+
+//==============================================================================
+// ELF relocations
+//==============================================================================
+
+// State used during relocation.
+typedef struct iree_elf_relocation_state_t {
+ // Bias applied to all relative addresses (from the string table, etc) in the
+ // loaded module. This is an offset from the vaddr_base that may not be 0 if
+ // host page granularity was larger than the ELF's defined granularity.
+ uint8_t* vaddr_bias;
+
+ // PT_DYNAMIC table.
+ iree_host_size_t dyn_table_count;
+ const iree_elf_dyn_t* dyn_table;
+} iree_elf_relocation_state_t;
+
+// Applies architecture-specific relocations.
+iree_status_t iree_elf_arch_apply_relocations(
+ iree_elf_relocation_state_t* state);
+
+//==============================================================================
+// Cross-ABI function calls
+//==============================================================================
+
+// TODO(benvanik): add thunk functions (iree_elf_thunk_*) to be used by imports
+// for marshaling from linux ABI in the ELF to host ABI.
+
+// Host -> ELF: void(*)(void)
+void iree_elf_call_v_v(const void* symbol_ptr);
+
+// Host -> ELF: void*(*)(int)
+void* iree_elf_call_p_i(const void* symbol_ptr, int a0);
+
+// Host -> ELF: void*(*)(int, void*)
+void* iree_elf_call_p_ip(const void* symbol_ptr, int a0, void* a1);
+
+// Host -> ELF: int(*)(void*)
+int iree_elf_call_i_p(const void* symbol_ptr, void* a0);
+
+// Host -> ELF: int(*)(void*, void*, void*)
+int iree_elf_call_i_ppp(const void* symbol_ptr, void* a0, void* a1, void* a2);
+
+// ELF -> Host: int(*)(void*)
+int iree_elf_thunk_i_p(const void* symbol_ptr, void* a0);
+
+#endif // IREE_HAL_LOCAL_ELF_ARCH_H_
diff --git a/runtime/src/iree/hal/local/elf/arch/arm_32.c b/runtime/src/iree/hal/local/elf/arch/arm_32.c
new file mode 100644
index 0000000..4044fbf
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/arch/arm_32.c
@@ -0,0 +1,152 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+#include "iree/hal/local/elf/arch.h"
+#include "iree/hal/local/elf/elf_types.h"
+
+#if defined(IREE_ARCH_ARM_32)
+
+// Documentation:
+// https://developer.arm.com/documentation/ihi0044/h/
+
+//==============================================================================
+// ELF machine type/ABI
+//==============================================================================
+
+bool iree_elf_arch_is_valid(const iree_elf_ehdr_t* ehdr) {
+ return ehdr->e_machine == 0x28; // EM_ARM / 40
+}
+
+//==============================================================================
+// ELF relocations
+//==============================================================================
+
+enum {
+ IREE_ELF_R_ARM_NONE = 0,
+ IREE_ELF_R_ARM_ABS32 = 2,
+ IREE_ELF_R_ARM_REL32 = 3,
+ IREE_ELF_R_ARM_GLOB_DAT = 21,
+ IREE_ELF_R_ARM_JUMP_SLOT = 22,
+ IREE_ELF_R_ARM_RELATIVE = 23,
+};
+
+static iree_status_t iree_elf_arch_arm_apply_rel(
+ iree_elf_relocation_state_t* state, iree_host_size_t rel_count,
+ const iree_elf_rel_t* rel_table) {
+ for (iree_host_size_t i = 0; i < rel_count; ++i) {
+ const iree_elf_rel_t* rel = &rel_table[i];
+ uint32_t type = IREE_ELF_R_TYPE(rel->r_info);
+ if (type == 0) continue;
+
+ // TODO(benvanik): support imports by resolving from the import table.
+ iree_elf_addr_t sym_addr = 0;
+ if (IREE_ELF_R_SYM(rel->r_info) != 0) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "symbol-relative relocations not implemented");
+ }
+
+ iree_elf_addr_t instr_ptr =
+ (iree_elf_addr_t)state->vaddr_bias + rel->r_offset;
+ switch (type) {
+ case IREE_ELF_R_ARM_NONE:
+ break;
+ case IREE_ELF_R_ARM_ABS32:
+ *(uint32_t*)instr_ptr += (uint32_t)sym_addr;
+ break;
+ case IREE_ELF_R_ARM_REL32:
+ *(uint32_t*)instr_ptr += (uint32_t)sym_addr - rel->r_offset;
+ break;
+ case IREE_ELF_R_ARM_GLOB_DAT:
+ case IREE_ELF_R_ARM_JUMP_SLOT:
+ *(uint32_t*)instr_ptr = (uint32_t)sym_addr;
+ break;
+ case IREE_ELF_R_ARM_RELATIVE:
+ *(uint32_t*)instr_ptr += (uint32_t)state->vaddr_bias;
+ break;
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unimplemented ARM relocation type %08X", type);
+ }
+ }
+ return iree_ok_status();
+}
+
+iree_status_t iree_elf_arch_apply_relocations(
+ iree_elf_relocation_state_t* state) {
+ // Gather the relevant relocation tables.
+ iree_host_size_t rel_count = 0;
+ const iree_elf_rel_t* rel_table = NULL;
+ for (iree_host_size_t i = 0; i < state->dyn_table_count; ++i) {
+ const iree_elf_dyn_t* dyn = &state->dyn_table[i];
+ switch (dyn->d_tag) {
+ case IREE_ELF_DT_REL:
+ rel_table =
+ (const iree_elf_rel_t*)(state->vaddr_bias + dyn->d_un.d_ptr);
+ break;
+ case IREE_ELF_DT_RELSZ:
+ rel_count = dyn->d_un.d_val / sizeof(iree_elf_rel_t);
+ break;
+
+ case IREE_ELF_DT_RELA:
+ case IREE_ELF_DT_RELASZ:
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "unsupported DT_RELA relocations");
+ default:
+ // Ignored.
+ break;
+ }
+ }
+ if (!rel_table) rel_count = 0;
+
+ if (rel_count > 0) {
+ IREE_RETURN_IF_ERROR(
+ iree_elf_arch_arm_apply_rel(state, rel_count, rel_table));
+ }
+
+ return iree_ok_status();
+}
+
+//==============================================================================
+// Cross-ABI function calls
+//==============================================================================
+
+void iree_elf_call_v_v(const void* symbol_ptr) {
+ typedef void (*ptr_t)(void);
+ ((ptr_t)symbol_ptr)();
+}
+
+void* iree_elf_call_p_i(const void* symbol_ptr, int a0) {
+ typedef void* (*ptr_t)(int);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+void* iree_elf_call_p_ip(const void* symbol_ptr, int a0, void* a1) {
+ typedef void* (*ptr_t)(int, void*);
+ return ((ptr_t)symbol_ptr)(a0, a1);
+}
+
+int iree_elf_call_i_p(const void* symbol_ptr, void* a0) {
+ typedef int (*ptr_t)(void*);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+int iree_elf_call_i_ppp(const void* symbol_ptr, void* a0, void* a1, void* a2) {
+ typedef int (*ptr_t)(void*, void*, void*);
+ return ((ptr_t)symbol_ptr)(a0, a1, a2);
+}
+
+int iree_elf_thunk_i_p(const void* symbol_ptr, void* a0) {
+ typedef int (*ptr_t)(void*);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+#endif // IREE_ARCH_ARM_32
diff --git a/runtime/src/iree/hal/local/elf/arch/arm_64.c b/runtime/src/iree/hal/local/elf/arch/arm_64.c
new file mode 100644
index 0000000..cc8398a
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/arch/arm_64.c
@@ -0,0 +1,149 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+#include "iree/hal/local/elf/arch.h"
+#include "iree/hal/local/elf/elf_types.h"
+
+#if defined(IREE_ARCH_ARM_64)
+
+// Documentation:
+// https://developer.arm.com/documentation/ihi0056/g/
+
+//==============================================================================
+// ELF machine type/ABI
+//==============================================================================
+
+bool iree_elf_arch_is_valid(const iree_elf_ehdr_t* ehdr) {
+ return ehdr->e_machine == 0xB7; // EM_AARCH64 / 183
+}
+
+//==============================================================================
+// ELF relocations
+//==============================================================================
+
+enum {
+ IREE_ELF_R_AARCH64_NONE = 0,
+ IREE_ELF_R_AARCH64_ABS64 = 257,
+ IREE_ELF_R_AARCH64_GLOB_DAT = 1025, // S + A
+ IREE_ELF_R_AARCH64_JUMP_SLOT = 1026, // S + A
+ IREE_ELF_R_AARCH64_RELATIVE = 1027, // Delta(S) + A
+};
+
+static iree_status_t iree_elf_arch_aarch64_apply_rela(
+ iree_elf_relocation_state_t* state, iree_host_size_t rela_count,
+ const iree_elf_rela_t* rela_table) {
+ for (iree_host_size_t i = 0; i < rela_count; ++i) {
+ const iree_elf_rela_t* rela = &rela_table[i];
+ uint32_t type = IREE_ELF_R_TYPE(rela->r_info);
+ if (type == 0) continue;
+
+ // TODO(benvanik): support imports by resolving from the import table.
+ iree_elf_addr_t sym_addr = 0;
+ if (IREE_ELF_R_SYM(rela->r_info) != 0) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "symbol-relative relocations not implemented");
+ }
+
+ iree_elf_addr_t instr_ptr =
+ (iree_elf_addr_t)state->vaddr_bias + rela->r_offset;
+ switch (type) {
+ case IREE_ELF_R_AARCH64_NONE:
+ break;
+ case IREE_ELF_R_AARCH64_ABS64:
+ *(uint64_t*)instr_ptr += (uint64_t)(sym_addr + rela->r_addend);
+ break;
+ case IREE_ELF_R_AARCH64_GLOB_DAT:
+ case IREE_ELF_R_AARCH64_JUMP_SLOT:
+ *(uint64_t*)instr_ptr = (uint64_t)(sym_addr + rela->r_addend);
+ break;
+ case IREE_ELF_R_AARCH64_RELATIVE:
+ *(uint64_t*)instr_ptr = (uint64_t)(state->vaddr_bias + rela->r_addend);
+ break;
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unimplemented aarch64 relocation type %08X",
+ type);
+ }
+ }
+ return iree_ok_status();
+}
+
+iree_status_t iree_elf_arch_apply_relocations(
+ iree_elf_relocation_state_t* state) {
+ // Gather the relevant relocation tables.
+ iree_host_size_t rela_count = 0;
+ const iree_elf_rela_t* rela_table = NULL;
+ for (iree_host_size_t i = 0; i < state->dyn_table_count; ++i) {
+ const iree_elf_dyn_t* dyn = &state->dyn_table[i];
+ switch (dyn->d_tag) {
+ case IREE_ELF_DT_RELA:
+ rela_table =
+ (const iree_elf_rela_t*)(state->vaddr_bias + dyn->d_un.d_ptr);
+ break;
+ case IREE_ELF_DT_RELASZ:
+ rela_count = dyn->d_un.d_val / sizeof(iree_elf_rela_t);
+ break;
+
+ case IREE_ELF_DT_REL:
+ case IREE_ELF_DT_RELSZ:
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "unsupported DT_REL relocations");
+ default:
+ // Ignored.
+ break;
+ }
+ }
+ if (!rela_table) rela_count = 0;
+
+ if (rela_count > 0) {
+ IREE_RETURN_IF_ERROR(
+ iree_elf_arch_aarch64_apply_rela(state, rela_count, rela_table));
+ }
+
+ return iree_ok_status();
+}
+
+//==============================================================================
+// Cross-ABI function calls
+//==============================================================================
+
+void iree_elf_call_v_v(const void* symbol_ptr) {
+ typedef void (*ptr_t)(void);
+ ((ptr_t)symbol_ptr)();
+}
+
+void* iree_elf_call_p_i(const void* symbol_ptr, int a0) {
+ typedef void* (*ptr_t)(int);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+void* iree_elf_call_p_ip(const void* symbol_ptr, int a0, void* a1) {
+ typedef void* (*ptr_t)(int, void*);
+ return ((ptr_t)symbol_ptr)(a0, a1);
+}
+
+int iree_elf_call_i_p(const void* symbol_ptr, void* a0) {
+ typedef int (*ptr_t)(void*);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+int iree_elf_call_i_ppp(const void* symbol_ptr, void* a0, void* a1, void* a2) {
+ typedef int (*ptr_t)(void*, void*, void*);
+ return ((ptr_t)symbol_ptr)(a0, a1, a2);
+}
+
+int iree_elf_thunk_i_p(const void* symbol_ptr, void* a0) {
+ typedef int (*ptr_t)(void*);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+#endif // IREE_ARCH_ARM_64
diff --git a/runtime/src/iree/hal/local/elf/arch/riscv.c b/runtime/src/iree/hal/local/elf/arch/riscv.c
new file mode 100644
index 0000000..807b62d
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/arch/riscv.c
@@ -0,0 +1,192 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+#include "iree/hal/local/elf/arch.h"
+#include "iree/hal/local/elf/elf_types.h"
+
+#if defined(IREE_ARCH_RISCV_32) || defined(IREE_ARCH_RISCV_64)
+
+// Documentation:
+// https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md
+
+//==============================================================================
+// ELF machine type/ABI
+//==============================================================================
+
+bool iree_elf_arch_is_valid(const iree_elf_ehdr_t* ehdr) {
+ return ehdr->e_machine == 0xF3; // EM_RISCV / 243
+}
+
+//==============================================================================
+// ELF relocations
+//==============================================================================
+
+enum {
+ IREE_ELF_R_RISCV_NONE = 0,
+ IREE_ELF_R_RISCV_32 = 1,
+ IREE_ELF_R_RISCV_64 = 2,
+ IREE_ELF_R_RISCV_RELATIVE = 3,
+ IREE_ELF_R_RISCV_COPY = 4,
+ IREE_ELF_R_RISCV_JUMP_SLOT = 5,
+};
+
+#if defined(IREE_ARCH_RISCV_32)
+static iree_status_t iree_elf_arch_riscv_apply_rela(
+ iree_elf_relocation_state_t* state, iree_host_size_t rela_count,
+ const iree_elf_rela_t* rela_table) {
+ for (iree_host_size_t i = 0; i < rela_count; ++i) {
+ const iree_elf_rela_t* rela = &rela_table[i];
+ uint32_t type = IREE_ELF_R_TYPE(rela->r_info);
+ if (type == 0) continue;
+
+ // TODO(benvanik): support imports by resolving from the import table.
+ iree_elf_addr_t sym_addr = 0;
+ if (IREE_ELF_R_SYM(rela->r_info) != 0) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "symbol-relative relocations not implemented");
+ }
+
+ iree_elf_addr_t instr_ptr =
+ (iree_elf_addr_t)state->vaddr_bias + rela->r_offset;
+ switch (type) {
+ case IREE_ELF_R_RISCV_NONE:
+ break;
+ case IREE_ELF_R_RISCV_32:
+ *(uint32_t*)instr_ptr = (uint32_t)(sym_addr + rela->r_addend);
+ break;
+ case IREE_ELF_R_RISCV_JUMP_SLOT:
+ *(uint32_t*)instr_ptr = (uint32_t)sym_addr;
+ break;
+ case IREE_ELF_R_RISCV_RELATIVE:
+ *(uint32_t*)instr_ptr = (uint32_t)(state->vaddr_bias + rela->r_addend);
+ break;
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unimplemented riscv32 relocation type %08X",
+ type);
+ }
+ }
+ return iree_ok_status();
+}
+#else // IREE_ARCH_RISCV_64
+static iree_status_t iree_elf_arch_riscv_apply_rela(
+ iree_elf_relocation_state_t* state, iree_host_size_t rela_count,
+ const iree_elf_rela_t* rela_table) {
+ for (iree_host_size_t i = 0; i < rela_count; ++i) {
+ const iree_elf_rela_t* rela = &rela_table[i];
+ uint32_t type = IREE_ELF_R_TYPE(rela->r_info);
+ if (type == 0) continue;
+
+ // TODO(benvanik): support imports by resolving from the import table.
+ iree_elf_addr_t sym_addr = 0;
+ if (IREE_ELF_R_SYM(rela->r_info) != 0) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "symbol-relative relocations not implemented");
+ }
+
+ iree_elf_addr_t instr_ptr =
+ (iree_elf_addr_t)state->vaddr_bias + rela->r_offset;
+ switch (type) {
+ case IREE_ELF_R_RISCV_NONE:
+ break;
+ case IREE_ELF_R_RISCV_32:
+ *(uint32_t*)instr_ptr = (uint32_t)(sym_addr + rela->r_addend);
+ break;
+ case IREE_ELF_R_RISCV_64:
+ *(uint64_t*)instr_ptr = (uint64_t)(sym_addr + rela->r_addend);
+ break;
+ case IREE_ELF_R_RISCV_JUMP_SLOT:
+ *(uint64_t*)instr_ptr = (uint64_t)sym_addr;
+ break;
+ case IREE_ELF_R_RISCV_RELATIVE:
+ *(uint64_t*)instr_ptr = (uint64_t)(state->vaddr_bias + rela->r_addend);
+ break;
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unimplemented riscv64 relocation type %08X",
+ type);
+ }
+ }
+ return iree_ok_status();
+}
+#endif // IREE_ARCH_RISCV_*
+
+iree_status_t iree_elf_arch_apply_relocations(
+ iree_elf_relocation_state_t* state) {
+ // Gather the relevant relocation tables.
+ iree_host_size_t rela_count = 0;
+ const iree_elf_rela_t* rela_table = NULL;
+ for (iree_host_size_t i = 0; i < state->dyn_table_count; ++i) {
+ const iree_elf_dyn_t* dyn = &state->dyn_table[i];
+ switch (dyn->d_tag) {
+ case IREE_ELF_DT_RELA:
+ rela_table =
+ (const iree_elf_rela_t*)(state->vaddr_bias + dyn->d_un.d_ptr);
+ break;
+ case IREE_ELF_DT_RELASZ:
+ rela_count = dyn->d_un.d_val / sizeof(iree_elf_rela_t);
+ break;
+
+ case IREE_ELF_DT_REL:
+ case IREE_ELF_DT_RELSZ:
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "unsupported DT_REL relocations");
+ default:
+ // Ignored.
+ break;
+ }
+ }
+ if (!rela_table) rela_count = 0;
+
+ if (rela_count > 0) {
+ IREE_RETURN_IF_ERROR(
+ iree_elf_arch_riscv_apply_rela(state, rela_count, rela_table));
+ }
+
+ return iree_ok_status();
+}
+
+//==============================================================================
+// Cross-ABI function calls
+//==============================================================================
+
+void iree_elf_call_v_v(const void* symbol_ptr) {
+ typedef void (*ptr_t)(void);
+ ((ptr_t)symbol_ptr)();
+}
+
+void* iree_elf_call_p_i(const void* symbol_ptr, int a0) {
+ typedef void* (*ptr_t)(int);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+void* iree_elf_call_p_ip(const void* symbol_ptr, int a0, void* a1) {
+ typedef void* (*ptr_t)(int, void*);
+ return ((ptr_t)symbol_ptr)(a0, a1);
+}
+
+int iree_elf_call_i_p(const void* symbol_ptr, void* a0) {
+ typedef int (*ptr_t)(void*);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+int iree_elf_call_i_ppp(const void* symbol_ptr, void* a0, void* a1, void* a2) {
+ typedef int (*ptr_t)(void*, void*, void*);
+ return ((ptr_t)symbol_ptr)(a0, a1, a2);
+}
+
+int iree_elf_thunk_i_p(const void* symbol_ptr, void* a0) {
+ typedef int (*ptr_t)(void*);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+#endif // IREE_ARCH_RISCV_*
diff --git a/runtime/src/iree/hal/local/elf/arch/x86_32.c b/runtime/src/iree/hal/local/elf/arch/x86_32.c
new file mode 100644
index 0000000..9d8d885
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/arch/x86_32.c
@@ -0,0 +1,175 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+#include "iree/hal/local/elf/arch.h"
+#include "iree/hal/local/elf/elf_types.h"
+
+#if defined(IREE_ARCH_X86_32)
+
+// Documentation:
+// https://uclibc.org/docs/psABI-i386.pdf
+
+//==============================================================================
+// ELF machine type/ABI
+//==============================================================================
+
+bool iree_elf_arch_is_valid(const iree_elf_ehdr_t* ehdr) {
+ return ehdr->e_machine == 0x03; // EM_386 / 3
+}
+
+//==============================================================================
+// ELF relocations
+//==============================================================================
+
+enum {
+ IREE_ELF_R_386_NONE = 0,
+ IREE_ELF_R_386_32 = 1,
+ IREE_ELF_R_386_PC32 = 2,
+ IREE_ELF_R_386_GLOB_DAT = 6,
+ IREE_ELF_R_386_JMP_SLOT = 7,
+ IREE_ELF_R_386_RELATIVE = 8,
+};
+
+static iree_status_t iree_elf_arch_x86_32_apply_rel(
+ iree_elf_relocation_state_t* state, iree_host_size_t rel_count,
+ const iree_elf_rel_t* rel_table) {
+ for (iree_host_size_t i = 0; i < rel_count; ++i) {
+ const iree_elf_rel_t* rel = &rel_table[i];
+ uint32_t type = IREE_ELF_R_TYPE(rel->r_info);
+ if (type == IREE_ELF_R_386_NONE) continue;
+
+ // TODO(benvanik): support imports by resolving from the import table.
+ iree_elf_addr_t sym_addr = 0;
+ if (IREE_ELF_R_SYM(rel->r_info) != 0) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "symbol-relative relocations not implemented");
+ }
+
+ iree_elf_addr_t instr_ptr =
+ (iree_elf_addr_t)state->vaddr_bias + rel->r_offset;
+ switch (type) {
+ // case IREE_ELF_R_386_NONE: early-exit above
+ case IREE_ELF_R_386_JMP_SLOT:
+ *(uint32_t*)instr_ptr = (uint32_t)sym_addr;
+ break;
+ case IREE_ELF_R_386_GLOB_DAT:
+ *(uint32_t*)instr_ptr = (uint32_t)sym_addr;
+ break;
+ case IREE_ELF_R_386_RELATIVE:
+ *(uint32_t*)instr_ptr += (uint32_t)state->vaddr_bias;
+ break;
+ case IREE_ELF_R_386_32:
+ *(uint32_t*)instr_ptr += (uint32_t)sym_addr;
+ break;
+ case IREE_ELF_R_386_PC32:
+ *(uint32_t*)instr_ptr += (uint32_t)(sym_addr - instr_ptr);
+ break;
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unimplemented x86 relocation type %08X", type);
+ }
+ }
+ return iree_ok_status();
+}
+
+iree_status_t iree_elf_arch_apply_relocations(
+ iree_elf_relocation_state_t* state) {
+ // Gather the relevant relocation tables.
+ iree_host_size_t rel_count = 0;
+ const iree_elf_rel_t* rel_table = NULL;
+ for (iree_host_size_t i = 0; i < state->dyn_table_count; ++i) {
+ const iree_elf_dyn_t* dyn = &state->dyn_table[i];
+ switch (dyn->d_tag) {
+ case IREE_ELF_DT_REL:
+ rel_table =
+ (const iree_elf_rel_t*)(state->vaddr_bias + dyn->d_un.d_ptr);
+ break;
+ case IREE_ELF_DT_RELSZ:
+ rel_count = dyn->d_un.d_val / sizeof(iree_elf_rel_t);
+ break;
+
+ case IREE_ELF_DT_RELA:
+ case IREE_ELF_DT_RELASZ:
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "unsupported DT_RELA relocations");
+ default:
+ // Ignored.
+ break;
+ }
+ }
+ if (!rel_table) rel_count = 0;
+
+ if (rel_count > 0) {
+ IREE_RETURN_IF_ERROR(
+ iree_elf_arch_x86_32_apply_rel(state, rel_count, rel_table));
+ }
+
+ return iree_ok_status();
+}
+
+//==============================================================================
+// Cross-ABI function calls
+//==============================================================================
+
+// System V i386 ABI (used in IREE):
+// https://uclibc.org/docs/psABI-i386.pdf
+// Arguments:
+// (reverse order on the stack; last arg furthest from stack pointer)
+//
+// Results:
+// EAX
+//
+// Non-volatile:
+// EBX, ESP, EBP, ESI, EDI
+//
+// Everything but Windows uses this convention (linux/bsd/mac/etc) and as such
+// we can just use nice little C thunks.
+
+#if defined(IREE_PLATFORM_WINDOWS)
+
+#error "TODO(#6554): need cdecl -> sysv ABI shims in x86_32_msvc.asm"
+
+#else
+
+void iree_elf_call_v_v(const void* symbol_ptr) {
+ typedef void (*ptr_t)(void);
+ ((ptr_t)symbol_ptr)();
+}
+
+void* iree_elf_call_p_i(const void* symbol_ptr, int a0) {
+ typedef void* (*ptr_t)(int);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+void* iree_elf_call_p_ip(const void* symbol_ptr, int a0, void* a1) {
+ typedef void* (*ptr_t)(int, void*);
+ return ((ptr_t)symbol_ptr)(a0, a1);
+}
+
+int iree_elf_call_i_p(const void* symbol_ptr, void* a0) {
+ typedef int (*ptr_t)(void*);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+int iree_elf_call_i_ppp(const void* symbol_ptr, void* a0, void* a1, void* a2) {
+ typedef int (*ptr_t)(void*, void*, void*);
+ return ((ptr_t)symbol_ptr)(a0, a1, a2);
+}
+
+int iree_elf_thunk_i_p(const void* symbol_ptr, void* a0) {
+ typedef int (*ptr_t)(void*);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+#endif // IREE_PLATFORM_WINDOWS
+
+#endif // IREE_ARCH_X86_32
diff --git a/runtime/src/iree/hal/local/elf/arch/x86_64.c b/runtime/src/iree/hal/local/elf/arch/x86_64.c
new file mode 100644
index 0000000..1e3adfc
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/arch/x86_64.c
@@ -0,0 +1,216 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+#include "iree/hal/local/elf/arch.h"
+#include "iree/hal/local/elf/elf_types.h"
+
+#if defined(IREE_ARCH_X86_64)
+
+// Documentation:
+// https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
+
+//==============================================================================
+// ELF machine type/ABI
+//==============================================================================
+
+bool iree_elf_arch_is_valid(const iree_elf_ehdr_t* ehdr) {
+ return ehdr->e_machine == 0x3E; // EM_X86_64 / 62
+}
+
+//==============================================================================
+// ELF relocations
+//==============================================================================
+
+enum {
+ IREE_ELF_R_X86_64_NONE = 0, // No reloc
+ IREE_ELF_R_X86_64_64 = 1, // Direct 64 bit
+ IREE_ELF_R_X86_64_PC32 = 2, // PC relative 32 bit signed
+ IREE_ELF_R_X86_64_GOT32 = 3, // 32 bit GOT entry
+ IREE_ELF_R_X86_64_PLT32 = 4, // 32 bit PLT address
+ IREE_ELF_R_X86_64_COPY = 5, // Copy symbol at runtime
+ IREE_ELF_R_X86_64_GLOB_DAT = 6, // Create GOT entry
+ IREE_ELF_R_X86_64_JUMP_SLOT = 7, // Create PLT entry
+ IREE_ELF_R_X86_64_RELATIVE = 8, // Adjust by program base
+ IREE_ELF_R_X86_64_GOTPCREL = 9, // 32 bit signed pc relative offset to GOT
+ IREE_ELF_R_X86_64_32 = 10, // Direct 32 bit zero extended
+ IREE_ELF_R_X86_64_32S = 11, // Direct 32 bit sign extended
+ IREE_ELF_R_X86_64_16 = 12, // Direct 16 bit zero extended
+ IREE_ELF_R_X86_64_PC16 = 13, // 16 bit sign extended pc relative
+ IREE_ELF_R_X86_64_8 = 14, // Direct 8 bit sign extended
+ IREE_ELF_R_X86_64_PC8 = 15, // 8 bit sign extended pc relative
+ IREE_ELF_R_X86_64_PC64 = 24, // Place relative 64-bit signed
+};
+
+static iree_status_t iree_elf_arch_x86_64_apply_rela(
+ iree_elf_relocation_state_t* state, iree_host_size_t rela_count,
+ const iree_elf_rela_t* rela_table) {
+ for (iree_host_size_t i = 0; i < rela_count; ++i) {
+ const iree_elf_rela_t* rela = &rela_table[i];
+ uint32_t type = IREE_ELF_R_TYPE(rela->r_info);
+ if (type == IREE_ELF_R_X86_64_NONE) continue;
+
+ // TODO(benvanik): support imports by resolving from the import table.
+ iree_elf_addr_t sym_addr = 0;
+ if (IREE_ELF_R_SYM(rela->r_info) != 0) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "symbol-relative relocations not implemented");
+ }
+
+ iree_elf_addr_t instr_ptr =
+ (iree_elf_addr_t)state->vaddr_bias + rela->r_offset;
+ switch (type) {
+ // case IREE_ELF_R_X86_64_NONE: early-exit above
+ case IREE_ELF_R_X86_64_RELATIVE:
+ *(uint64_t*)instr_ptr = (uint64_t)(state->vaddr_bias + rela->r_addend);
+ break;
+ case IREE_ELF_R_X86_64_JUMP_SLOT:
+ *(uint64_t*)instr_ptr = (uint64_t)sym_addr;
+ break;
+ case IREE_ELF_R_X86_64_GLOB_DAT:
+ *(uint64_t*)instr_ptr = (uint64_t)sym_addr;
+ break;
+ case IREE_ELF_R_X86_64_COPY:
+ *(uint64_t*)instr_ptr = (uint64_t)sym_addr;
+ break;
+ case IREE_ELF_R_X86_64_64:
+ *(uint64_t*)instr_ptr = (uint64_t)(sym_addr + rela->r_addend);
+ break;
+ case IREE_ELF_R_X86_64_32:
+ *(uint32_t*)instr_ptr = (uint32_t)(sym_addr + rela->r_addend);
+ break;
+ case IREE_ELF_R_X86_64_32S:
+ *(int32_t*)instr_ptr = (int32_t)(sym_addr + rela->r_addend);
+ break;
+ case IREE_ELF_R_X86_64_PC32:
+ *(uint32_t*)instr_ptr =
+ (uint32_t)(sym_addr + rela->r_addend - instr_ptr);
+ break;
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unimplemented x86_64 relocation type %08X",
+ type);
+ }
+ }
+ return iree_ok_status();
+}
+
+iree_status_t iree_elf_arch_apply_relocations(
+ iree_elf_relocation_state_t* state) {
+ // Gather the relevant relocation tables.
+ iree_host_size_t rela_count = 0;
+ const iree_elf_rela_t* rela_table = NULL;
+ iree_host_size_t plt_rela_count = 0;
+ const iree_elf_rela_t* plt_rela_table = NULL;
+ for (iree_host_size_t i = 0; i < state->dyn_table_count; ++i) {
+ const iree_elf_dyn_t* dyn = &state->dyn_table[i];
+ switch (dyn->d_tag) {
+ case IREE_ELF_DT_RELA:
+ rela_table =
+ (const iree_elf_rela_t*)(state->vaddr_bias + dyn->d_un.d_ptr);
+ break;
+ case IREE_ELF_DT_RELASZ:
+ rela_count = dyn->d_un.d_val / sizeof(iree_elf_rela_t);
+ break;
+
+ case IREE_ELF_DT_PLTREL:
+ // Type of reloc in PLT; we expect DT_RELA right now.
+ if (dyn->d_un.d_val != IREE_ELF_DT_RELA) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "unsupported DT_PLTREL != DT_RELA");
+ }
+ break;
+ case IREE_ELF_DT_JMPREL:
+ plt_rela_table =
+ (const iree_elf_rela_t*)(state->vaddr_bias + dyn->d_un.d_ptr);
+ break;
+ case IREE_ELF_DT_PLTRELSZ:
+ plt_rela_count = dyn->d_un.d_val / sizeof(iree_elf_rela_t);
+ break;
+
+ case IREE_ELF_DT_REL:
+ case IREE_ELF_DT_RELSZ:
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "unsupported DT_REL relocations");
+
+ default:
+ // Ignored.
+ break;
+ }
+ }
+ if (!rela_table) rela_count = 0;
+ if (!plt_rela_table) plt_rela_count = 0;
+
+ if (rela_count > 0) {
+ IREE_RETURN_IF_ERROR(
+ iree_elf_arch_x86_64_apply_rela(state, rela_count, rela_table));
+ }
+ if (plt_rela_count > 0) {
+ IREE_RETURN_IF_ERROR(
+ iree_elf_arch_x86_64_apply_rela(state, plt_rela_count, plt_rela_table));
+ }
+
+ return iree_ok_status();
+}
+
+//==============================================================================
+// Cross-ABI function calls
+//==============================================================================
+
+// System V AMD64 ABI (used in IREE):
+// https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
+// Arguments:
+// RDI, RSI, RDX, RCX, R8, R9, [stack]...
+// Results:
+// RAX, RDX
+//
+// Everything but Windows uses this convention (linux/bsd/mac/etc) and as such
+// we can just use nice little C thunks.
+
+#if defined(IREE_PLATFORM_WINDOWS)
+// Host is using the Microsoft x64 calling convention and we need to translate
+// to the System V AMD64 ABI conventions. Unfortunately MSVC does not support
+// inline assembly and we have to outline the calls in x86_64_msvc.asm.
+#else
+
+void iree_elf_call_v_v(const void* symbol_ptr) {
+ typedef void (*ptr_t)(void);
+ ((ptr_t)symbol_ptr)();
+}
+
+void* iree_elf_call_p_i(const void* symbol_ptr, int a0) {
+ typedef void* (*ptr_t)(int);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+void* iree_elf_call_p_ip(const void* symbol_ptr, int a0, void* a1) {
+ typedef void* (*ptr_t)(int, void*);
+ return ((ptr_t)symbol_ptr)(a0, a1);
+}
+
+int iree_elf_call_i_p(const void* symbol_ptr, void* a0) {
+ typedef int (*ptr_t)(void*);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+int iree_elf_call_i_ppp(const void* symbol_ptr, void* a0, void* a1, void* a2) {
+ typedef int (*ptr_t)(void*, void*, void*);
+ return ((ptr_t)symbol_ptr)(a0, a1, a2);
+}
+
+int iree_elf_thunk_i_p(const void* symbol_ptr, void* a0) {
+ typedef int (*ptr_t)(void*);
+ return ((ptr_t)symbol_ptr)(a0);
+}
+
+#endif // IREE_PLATFORM_WINDOWS
+
+#endif // IREE_ARCH_X86_64
diff --git a/runtime/src/iree/hal/local/elf/arch/x86_64_msvc.asm b/runtime/src/iree/hal/local/elf/arch/x86_64_msvc.asm
new file mode 100644
index 0000000..6e25c29
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/arch/x86_64_msvc.asm
@@ -0,0 +1,202 @@
+; Copyright 2021 The IREE Authors
+;
+; Licensed under the Apache License v2.0 with LLVM Exceptions.
+; See https://llvm.org/LICENSE.txt for license information.
+; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+; Microsoft x64 calling convention:
+; https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
+; Arguments:
+; RCX, RDX, R8, R9, [stack]...
+; Results:
+; RAX
+; Non-volatile:
+; RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-XMM15
+;
+; System V AMD64 ABI (used in IREE):
+; https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
+; Arguments:
+; RDI, RSI, RDX, RCX, R8, R9, [stack]...
+; Results:
+; RAX, RDX
+
+; Total size of non-volatile XMM registers.
+_SYSV_INTEROP_STACK_SIZE = 10 * 10h
+
+; Function prolog that saves registers that we may clobber while in code
+; following the SYS-V x64 ABI.
+;
+; This also encodes unwind table information (.xdata/.pdata) that is used by
+; debuggers/backtrace/etc to be able to look through the function on the stack.
+; Though they debugger will be totally confused by the function we call into
+; (it'll be expecting the Microsoft conventions and won't find them) it'll at
+; least let us see the leaf guest function instead of just a bunch of our
+; iree_elf_call_* thunks.
+; Docs suck but we are in black magic territory so it's expected:
+; https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-160#unwind-helpers-for-masm
+_sysv_interop_prolog MACRO
+ ; Save volatile general purpose registers to the stack.
+ push rbp
+ .pushreg rbp
+ mov rbp, rsp
+ .setframe rbp, 0
+ push rbx
+ .pushreg rbx
+ push rdi
+ .pushreg rdi
+ push rsi
+ .pushreg rsi
+ push r12
+ .pushreg r12
+ push r13
+ .pushreg r13
+ push r14
+ .pushreg r14
+ push r15
+ .pushreg r15
+
+ ; Setup stack space for storing the SIMD registers.
+ ; NOTE: we adjust this by 8 bytes to get on a 16-byte alignment so we can
+ ; use the aligned movaps instruction.
+ sub rsp, _SYSV_INTEROP_STACK_SIZE + 8
+ .allocstack _SYSV_INTEROP_STACK_SIZE + 8
+
+ ; Save volatile SIMD registers to the stack.
+ movaps [rsp + 00h], xmm6
+ .savexmm128 xmm6, 00h
+ movaps [rsp + 10h], xmm7
+ .savexmm128 xmm7, 10h
+ movaps [rsp + 20h], xmm8
+ .savexmm128 xmm8, 20h
+ movaps [rsp + 30h], xmm9
+ .savexmm128 xmm9, 30h
+ movaps [rsp + 40h], xmm10
+ .savexmm128 xmm10, 40h
+ movaps [rsp + 50h], xmm11
+ .savexmm128 xmm11, 50h
+ movaps [rsp + 60h], xmm12
+ .savexmm128 xmm12, 60h
+ movaps [rsp + 70h], xmm13
+ .savexmm128 xmm13, 70h
+ movaps [rsp + 80h], xmm14
+ .savexmm128 xmm14, 80h
+ movaps [rsp + 90h], xmm15
+ .savexmm128 xmm15, 90h
+
+ .endprolog
+ENDM
+
+; Function epilog that restores registers that we may have clobbered while in
+; code following the SYS-V x64 ABI.
+_sysv_interop_epilog MACRO
+ ; Restore volatile SIMD registers from the stack.
+ movaps xmm6, [rsp + 00h]
+ movaps xmm7, [rsp + 10h]
+ movaps xmm8, [rsp + 20h]
+ movaps xmm9, [rsp + 30h]
+ movaps xmm10, [rsp + 40h]
+ movaps xmm11, [rsp + 50h]
+ movaps xmm12, [rsp + 60h]
+ movaps xmm13, [rsp + 70h]
+ movaps xmm14, [rsp + 80h]
+ movaps xmm15, [rsp + 90h]
+ add rsp, _SYSV_INTEROP_STACK_SIZE + 8
+
+ ; Restore volatile general purpose registers from the stack.
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rsi
+ pop rdi
+ pop rbx
+ leave ; mov rsp, rbp + pop ebp
+ENDM
+
+_TEXT SEGMENT
+ALIGN 16
+
+; void iree_elf_call_v_v(const void* symbol_ptr)
+iree_elf_call_v_v PROC FRAME
+ _sysv_interop_prolog
+
+ ; RCX = symbol_ptr
+ call rcx
+
+ _sysv_interop_epilog
+ ret
+iree_elf_call_v_v ENDP
+
+; void* iree_elf_call_p_i(const void* symbol_ptr, int a0)
+iree_elf_call_p_i PROC FRAME
+ _sysv_interop_prolog
+
+ ; RCX = symbol_ptr
+ ; RDX = a0
+ mov rdi, rdx
+ call rcx
+
+ _sysv_interop_epilog
+ ret
+iree_elf_call_p_i ENDP
+
+; void* iree_elf_call_p_ip(const void* symbol_ptr, int a0, void* a1)
+iree_elf_call_p_ip PROC FRAME
+ _sysv_interop_prolog
+
+ ; RCX = symbol_ptr
+ ; RDX = a0
+ ; R8 = a1
+ mov rdi, rdx
+ mov rsi, r8
+ call rcx
+
+ _sysv_interop_epilog
+ ret
+iree_elf_call_p_ip ENDP
+
+; int iree_elf_call_i_p(const void* symbol_ptr, void* a0)
+iree_elf_call_i_p PROC FRAME
+ _sysv_interop_prolog
+
+ ; RCX = symbol_ptr
+ ; RDX = a0
+ mov rdi, rdx
+ call rcx
+
+ _sysv_interop_epilog
+ ret
+iree_elf_call_i_p ENDP
+
+; int iree_elf_call_i_ppp(const void* symbol_ptr, void* a0, void* a1, void* a2)
+iree_elf_call_i_ppp PROC FRAME
+ _sysv_interop_prolog
+
+ ; RCX = symbol_ptr
+ ; RDX = a0
+ ; R8 = a1
+ ; R9 = a2
+ mov rdi, rdx
+ mov rsi, r8
+ mov rdx, r9
+ call rcx
+
+ _sysv_interop_epilog
+ ret
+iree_elf_call_i_ppp ENDP
+
+; int iree_elf_thunk_i_p(const void* symbol_ptr, void* a0)
+iree_elf_thunk_i_p PROC FRAME
+ _sysv_interop_prolog
+
+ ; RDI = symbol_ptr
+ ; RSI = a0
+ mov rcx, rsi
+ call rdi
+
+ _sysv_interop_epilog
+ ret
+iree_elf_thunk_i_p ENDP
+
+_TEXT ENDS
+END
diff --git a/runtime/src/iree/hal/local/elf/elf_module.c b/runtime/src/iree/hal/local/elf/elf_module.c
new file mode 100644
index 0000000..61f68e9
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/elf_module.c
@@ -0,0 +1,660 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/elf/elf_module.h"
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/elf/arch.h"
+#include "iree/hal/local/elf/platform.h"
+
+//==============================================================================
+// Verification and section/info caching
+//==============================================================================
+
+// Fields taken from the ELF headers used only during verification and loading.
+typedef struct iree_elf_module_load_state_t {
+ iree_memory_info_t memory_info;
+ const iree_elf_ehdr_t* ehdr;
+ const iree_elf_phdr_t* phdr_table; // ehdr.e_phnum has count
+ const iree_elf_shdr_t* shdr_table; // ehdr.e_shnum has count
+
+ const iree_elf_dyn_t* dyn_table; // PT_DYNAMIC
+ iree_host_size_t dyn_table_count;
+
+ iree_elf_addr_t init; // DT_INIT
+ const iree_elf_addr_t* init_array; // DT_INIT_ARRAY
+ iree_host_size_t init_array_count; // DT_INIT_ARRAYSZ
+} iree_elf_module_load_state_t;
+
+// Verifies the ELF file header and machine class.
+static iree_status_t iree_elf_module_verify_ehdr(
+ iree_const_byte_span_t raw_data) {
+ // Size must be larger than the header we are trying to load.
+ if (raw_data.data_length < sizeof(iree_elf_ehdr_t)) {
+ return iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "ELF data provided (%zu) is smaller than ehdr (%zu)",
+ raw_data.data_length, sizeof(iree_elf_ehdr_t));
+ }
+
+ // Check for ELF identifier.
+ const iree_elf_ehdr_t* ehdr = (const iree_elf_ehdr_t*)raw_data.data;
+ static const iree_elf_byte_t elf_magic[4] = {0x7F, 'E', 'L', 'F'};
+ if (memcmp(ehdr->e_ident, elf_magic, sizeof(elf_magic)) != 0) {
+ return iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "data provided does not contain the ELF identifier");
+ }
+
+ // Check critical identifier bytes before attempting to deal with any more of
+ // the header; the class determines the size of the header fields and the
+ // endianness determines how multi-byte fields are interpreted.
+
+#if defined(IREE_PTR_SIZE_32)
+ if (ehdr->e_ident[IREE_ELF_EI_CLASS] != IREE_ELF_ELFCLASS32) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "system/ELF class mismatch: expected 32-bit");
+ }
+#elif defined(IREE_PTR_SIZE_64)
+ if (ehdr->e_ident[IREE_ELF_EI_CLASS] != IREE_ELF_ELFCLASS64) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "system/ELF class mismatch: expected 64-bit");
+ }
+#endif // IREE_PTR_SIZE_*
+
+#if defined(IREE_ENDIANNESS_LITTLE)
+ if (ehdr->e_ident[IREE_ELF_EI_DATA] != IREE_ELF_ELFDATA2LSB) {
+ return iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "system/ELF endianness mismatch: expected little-endian");
+ }
+#else
+ if (ehdr->e_ident[IREE_ELF_EI_DATA] != IREE_ELF_ELFDATA2MSB) {
+ return iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "system/ELF endianness mismatch: expected big-endian");
+ }
+#endif // IREE_ENDIANNESS_*
+
+ // ELF version == EV_CURRENT (1) is all we handle.
+ // Check this before other fields as they could change meaning in other
+ // versions.
+ if (ehdr->e_version != 1) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "ELF version %u unsupported; expected 1");
+ }
+
+ // Ensure we have the right architecture compiled in.
+ if (!iree_elf_arch_is_valid(ehdr)) {
+ return iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "ELF machine specification (%04X) does not match the "
+ "running architecture",
+ (uint32_t)ehdr->e_machine);
+ }
+
+ // We could probably support non-shared object types but no need today and it
+ // allows us to make assumptions about the sections that are present (all
+ // those marked as 'mandatory' in the spec.
+ if (ehdr->e_type != IREE_ELF_ET_DYN) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "only shared object ELFs are supported");
+ }
+
+ // Sanity checks on entity sizes - they can be larger than what we expect,
+ // but overlaying our structs onto them is not going to work if they are
+ // smaller. For now we aren't doing pointer walks based on dynamic sizes so
+ // we need equality, but if we ever have a reason to do so we could change all
+ // array-style accesses to scale out based on the ehdr values
+ if (ehdr->e_ehsize != sizeof(iree_elf_ehdr_t) ||
+ ehdr->e_phentsize != sizeof(iree_elf_phdr_t) ||
+ ehdr->e_shentsize != sizeof(iree_elf_shdr_t)) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "ELF entity size mismatch");
+ }
+
+ // Verify the phdr table properties. This doesn't validate each phdr but just
+ // ensures that the table is constructed correctly and within bounds.
+ if (ehdr->e_phoff == 0 || ehdr->e_phnum == 0 ||
+ (ehdr->e_phoff + ehdr->e_phnum * ehdr->e_phentsize) >
+ raw_data.data_length) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "invalid mandatory phdr table");
+ }
+
+ // Verify the shdr table properties.
+ if (ehdr->e_shoff == 0 || ehdr->e_shnum == 0 ||
+ (ehdr->e_shoff + ehdr->e_shnum * ehdr->e_shentsize) >
+ raw_data.data_length) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "invalid mandatory shdr table");
+ }
+
+ return iree_ok_status();
+}
+
+// Verifies the phdr table for supported types and in-bounds file references.
+static iree_status_t iree_elf_module_verify_phdr_table(
+ iree_const_byte_span_t raw_data, iree_elf_module_load_state_t* load_state) {
+ for (iree_elf_half_t i = 0; i < load_state->ehdr->e_phnum; ++i) {
+ const iree_elf_phdr_t* phdr = &load_state->phdr_table[i];
+ if (phdr->p_type != IREE_ELF_PT_LOAD) continue;
+ if (phdr->p_offset + phdr->p_filesz > raw_data.data_length) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "phdr reference outside of file extents: %" PRIu64
+ "-%" PRIu64 "of max %" PRIu64,
+ (uint64_t)phdr->p_offset,
+ (uint64_t)(phdr->p_offset + phdr->p_filesz),
+ (uint64_t)raw_data.data_length);
+ }
+ }
+ return iree_ok_status();
+}
+
+// Parses the ELF to populate fields used during loading and runtime and verify
+// that the ELF matches our very, very low expectations.
+static iree_status_t iree_elf_module_parse_headers(
+ iree_const_byte_span_t raw_data,
+ iree_elf_module_load_state_t* out_load_state,
+ iree_elf_module_t* out_module) {
+ memset(out_module, 0, sizeof(*out_module));
+ memset(out_load_state, 0, sizeof(*out_load_state));
+
+ // Query the host memory information that we can use to verify we are able to
+ // meet the alignment requirements of the ELF.
+ iree_memory_query_info(&out_load_state->memory_info);
+
+ // Verify the ELF is an ELF and that it's for the current machine.
+ // NOTE: this only verifies the ehdr is as expected and nothing else: the ELF
+ // is still untrusted and may be missing mandatory sections.
+ IREE_RETURN_IF_ERROR(iree_elf_module_verify_ehdr(raw_data));
+
+ // Get the primary tables (locations verified above).
+ const iree_elf_ehdr_t* ehdr = (const iree_elf_ehdr_t*)raw_data.data;
+ const iree_elf_phdr_t* phdr_table =
+ (const iree_elf_phdr_t*)(raw_data.data + ehdr->e_phoff);
+ const iree_elf_shdr_t* shdr_table =
+ (const iree_elf_shdr_t*)(raw_data.data + ehdr->e_shoff);
+ out_load_state->ehdr = ehdr;
+ out_load_state->phdr_table = phdr_table;
+ out_load_state->shdr_table = shdr_table;
+
+ // Verify the phdr table to ensure all bounds are in range of the file.
+ IREE_RETURN_IF_ERROR(
+ iree_elf_module_verify_phdr_table(raw_data, out_load_state));
+
+ return iree_ok_status();
+}
+
+//==============================================================================
+// Allocation and layout
+//==============================================================================
+
+// Calculates the in-memory layout of the ELF module as defined by its segments.
+// Returns a byte range representing the minimum virtual address offset of any
+// segment that can be used to offset the vaddr from the host allocation and the
+// total length of the required range. The alignment will meet the requirements
+// of the ELF but is yet unadjusted for host requirements. The range will have
+// zero length if there are no segments to load (which would be weird).
+static iree_byte_range_t iree_elf_module_calculate_vaddr_range(
+ iree_elf_module_load_state_t* load_state) {
+ // Min/max virtual addresses of any allocated segment.
+ iree_elf_addr_t vaddr_min = IREE_ELF_ADDR_MAX;
+ iree_elf_addr_t vaddr_max = IREE_ELF_ADDR_MIN;
+ for (iree_elf_half_t i = 0; i < load_state->ehdr->e_phnum; ++i) {
+ const iree_elf_phdr_t* phdr = &load_state->phdr_table[i];
+ if (phdr->p_type != IREE_ELF_PT_LOAD) continue;
+ iree_elf_addr_t p_vaddr_min =
+ iree_page_align_start(phdr->p_vaddr, phdr->p_align);
+ iree_elf_addr_t p_vaddr_max =
+ iree_page_align_end(phdr->p_vaddr + phdr->p_memsz, phdr->p_align);
+ vaddr_min = iree_min(vaddr_min, p_vaddr_min);
+ vaddr_max = iree_max(vaddr_max, p_vaddr_max);
+ }
+ if (vaddr_min == IREE_ELF_ADDR_MAX) {
+ // Did not find any segments to load.
+ vaddr_min = IREE_ELF_ADDR_MIN;
+ vaddr_max = IREE_ELF_ADDR_MIN;
+ }
+ iree_byte_range_t byte_range = {
+ .offset = (iree_host_size_t)vaddr_min,
+ .length = (iree_host_size_t)(vaddr_max - vaddr_min),
+ };
+ return byte_range;
+}
+
+// Allocates space for and loads all DT_LOAD segments into the host virtual
+// address space.
+static iree_status_t iree_elf_module_load_segments(
+ iree_const_byte_span_t raw_data, iree_elf_module_load_state_t* load_state,
+ iree_elf_module_t* module) {
+ // Calculate the total internally-aligned vaddr range.
+ iree_byte_range_t vaddr_range =
+ iree_elf_module_calculate_vaddr_range(load_state);
+
+ // Reserve virtual address space in the host memory space. This memory is
+ // uncommitted by default as the ELF may only sparsely use the address space.
+ module->vaddr_size = iree_page_align_end(
+ vaddr_range.length, load_state->memory_info.normal_page_size);
+ IREE_RETURN_IF_ERROR(iree_memory_view_reserve(
+ IREE_MEMORY_VIEW_FLAG_MAY_EXECUTE, module->vaddr_size,
+ module->host_allocator, (void**)&module->vaddr_base));
+ module->vaddr_bias = module->vaddr_base - vaddr_range.offset;
+
+ // Commit and load all of the segments.
+ for (iree_elf_half_t i = 0; i < load_state->ehdr->e_phnum; ++i) {
+ const iree_elf_phdr_t* phdr = &load_state->phdr_table[i];
+ if (phdr->p_type != IREE_ELF_PT_LOAD) continue;
+
+ // Commit the range of pages used by this segment, initially with write
+ // access so that we can modify the pages.
+ iree_byte_range_t byte_range = {
+ .offset = phdr->p_vaddr,
+ .length = phdr->p_memsz,
+ };
+ IREE_RETURN_IF_ERROR(iree_memory_view_commit_ranges(
+ module->vaddr_bias, 1, &byte_range,
+ IREE_MEMORY_ACCESS_READ | IREE_MEMORY_ACCESS_WRITE));
+
+ // Copy data present in the file.
+ // TODO(benvanik): infra for being able to detect if the source model is in
+ // a mapped file - if it is, we can remap the page and directly reference it
+ // here for read-only segments and setup copy-on-write for writeable ones.
+ // We'd need a way to pass in the underlying mapping and some guarantees on
+ // the lifetime of it. Today we are just always committing above and copying
+ // here because it keeps this all super simple (you know, as simple as an
+ // entire custom ELF loader can be :).
+ if (phdr->p_filesz > 0) {
+ memcpy(module->vaddr_bias + phdr->p_vaddr, raw_data.data + phdr->p_offset,
+ phdr->p_filesz);
+ }
+
+ // NOTE: p_memsz may be larger than p_filesz - if so, the extra memory bytes
+ // must be zeroed. We require that the initial allocation is zeroed anyway
+ // so this is a no-op.
+
+ // NOTE: the pages are still writeable; we need to apply relocations before
+ // we can go back through and remove write access from read-only/executable
+ // pages in iree_elf_module_protect_segments.
+ }
+
+ return iree_ok_status();
+}
+
+// Applies segment memory protection attributes.
+// This will make pages read-only and must only be performed after relocation
+// (which writes to pages of all types). Executable pages will be flushed from
+// the instruction cache.
+static iree_status_t iree_elf_module_protect_segments(
+ iree_elf_module_load_state_t* load_state, iree_elf_module_t* module) {
+ // PT_LOAD segments (the bulk of progbits):
+ for (iree_elf_half_t i = 0; i < load_state->ehdr->e_phnum; ++i) {
+ const iree_elf_phdr_t* phdr = &load_state->phdr_table[i];
+ if (phdr->p_type != IREE_ELF_PT_LOAD) continue;
+
+ // Interpret the access bits and widen to the implicit allowable
+ // permissions. See Table 7-37:
+ // https://docs.oracle.com/cd/E19683-01/816-1386/6m7qcoblk/index.html#chapter6-34713
+ iree_memory_access_t access = 0;
+ if (phdr->p_flags & IREE_ELF_PF_R) access |= IREE_MEMORY_ACCESS_READ;
+ if (phdr->p_flags & IREE_ELF_PF_W) access |= IREE_MEMORY_ACCESS_WRITE;
+ if (phdr->p_flags & IREE_ELF_PF_X) access |= IREE_MEMORY_ACCESS_EXECUTE;
+ if (access & IREE_MEMORY_ACCESS_WRITE) access |= IREE_MEMORY_ACCESS_READ;
+ if (access & IREE_MEMORY_ACCESS_EXECUTE) access |= IREE_MEMORY_ACCESS_READ;
+
+ // We only support R+X (no W).
+ if ((phdr->p_flags & IREE_ELF_PF_X) && (phdr->p_flags & IREE_ELF_PF_W)) {
+ return iree_make_status(IREE_STATUS_PERMISSION_DENIED,
+ "unable to create a writable executable segment");
+ }
+
+ // Apply new access protection.
+ iree_byte_range_t byte_range = {
+ .offset = phdr->p_vaddr,
+ .length = phdr->p_memsz,
+ };
+ IREE_RETURN_IF_ERROR(iree_memory_view_protect_ranges(module->vaddr_bias, 1,
+ &byte_range, access));
+
+ // Flush the instruction cache if we are going to execute these pages.
+ if (access & IREE_MEMORY_ACCESS_EXECUTE) {
+ iree_memory_view_flush_icache(module->vaddr_bias + phdr->p_vaddr,
+ phdr->p_memsz);
+ }
+ }
+
+ // PT_GNU_RELRO: hardening of post-relocation segments.
+ // These may alias with segments above and must be processed afterward.
+ for (iree_elf_half_t i = 0; i < load_state->ehdr->e_phnum; ++i) {
+ const iree_elf_phdr_t* phdr = &load_state->phdr_table[i];
+ if (phdr->p_type != IREE_ELF_PT_GNU_RELRO) continue;
+ iree_byte_range_t byte_range = {
+ .offset = phdr->p_vaddr,
+ .length = phdr->p_memsz,
+ };
+ IREE_RETURN_IF_ERROR(iree_memory_view_protect_ranges(
+ module->vaddr_bias, 1, &byte_range, IREE_MEMORY_ACCESS_READ));
+ }
+
+ return iree_ok_status();
+}
+
+// Unloads the ELF segments from memory and releases the host virtual address
+// space reservation.
+static void iree_elf_module_unload_segments(iree_elf_module_t* module) {
+ // Decommit/unreserve the entire memory space.
+ if (module->vaddr_base != NULL) {
+ iree_memory_view_release(module->vaddr_base, module->vaddr_size,
+ module->host_allocator);
+ }
+ module->vaddr_base = NULL;
+ module->vaddr_bias = NULL;
+ module->vaddr_size = 0;
+}
+
+//==============================================================================
+// Dynamic library handling
+//==============================================================================
+// NOTE: this happens *after* allocation and loading as the .dynsym and related
+// segments are allocated and loaded in virtual address space.
+
+// Parses, verifies, and populates dynamic symbol related tables for runtime
+// use. These tables are all in allocated memory and use fully rebased virtual
+// addresses.
+static iree_status_t iree_elf_module_parse_dynamic_tables(
+ iree_elf_module_load_state_t* load_state, iree_elf_module_t* module) {
+ // By the spec there must only be one PT_DYNAMIC.
+ // Note that we are getting the one in the loaded virtual address space.
+ const iree_elf_dyn_t* dyn_table = NULL;
+ iree_host_size_t dyn_table_count = 0;
+ for (iree_elf_half_t i = 0; i < load_state->ehdr->e_phnum; ++i) {
+ const iree_elf_phdr_t* phdr = &load_state->phdr_table[i];
+ if (phdr->p_type == IREE_ELF_PT_DYNAMIC) {
+ dyn_table = (const iree_elf_dyn_t*)(module->vaddr_bias + phdr->p_vaddr);
+ dyn_table_count = phdr->p_filesz / sizeof(iree_elf_dyn_t);
+ break;
+ }
+ }
+ if (!dyn_table || !dyn_table_count) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "no PT_DYNAMIC/.dynamic segment");
+ }
+ load_state->dyn_table = dyn_table;
+ load_state->dyn_table_count = dyn_table_count;
+
+ for (iree_host_size_t i = 0; i < dyn_table_count; ++i) {
+ const iree_elf_dyn_t* dyn = &dyn_table[i];
+ switch (dyn->d_tag) {
+ case IREE_ELF_DT_STRTAB:
+ // .dynstr table for runtime symbol lookup.
+ module->dynstr = (const char*)(module->vaddr_bias + dyn->d_un.d_ptr);
+ break;
+ case IREE_ELF_DT_STRSZ:
+ module->dynstr_size = dyn->d_un.d_val;
+ break;
+
+ case IREE_ELF_DT_SYMTAB:
+ // .dynsym table for runtime symbol lookup.
+ module->dynsym =
+ (const iree_elf_sym_t*)(module->vaddr_bias + dyn->d_un.d_ptr);
+ break;
+ case IREE_ELF_DT_SYMENT:
+ if (dyn->d_un.d_val != sizeof(iree_elf_sym_t)) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "DT_SYMENT size mismatch");
+ }
+ break;
+ case IREE_ELF_DT_HASH: {
+ // NOTE: we don't care about the hash table (yet), but it is the only
+ // way to get the total symbol count.
+ const iree_elf_word_t* hash =
+ (const iree_elf_word_t*)(module->vaddr_bias + dyn->d_un.d_ptr);
+ module->dynsym_count = hash[1]; // symbol count, obviously~
+ break;
+ }
+
+ case IREE_ELF_DT_INIT:
+ // .init initializer function (runs before .init_array).
+ load_state->init = dyn->d_un.d_ptr;
+ break;
+ case IREE_ELF_DT_INIT_ARRAY:
+ // .init_array list of initializer functions.
+ load_state->init_array =
+ (const iree_elf_addr_t*)(module->vaddr_bias + dyn->d_un.d_ptr);
+ break;
+ case IREE_ELF_DT_INIT_ARRAYSZ:
+ load_state->init_array_count = dyn->d_un.d_val;
+ break;
+
+ case IREE_ELF_DT_RELENT:
+ if (dyn->d_un.d_val != sizeof(iree_elf_rel_t)) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "DT_RELENT size mismatch");
+ }
+ break;
+ case IREE_ELF_DT_RELAENT:
+ if (dyn->d_un.d_val != sizeof(iree_elf_rela_t)) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "DT_RELAENT size mismatch");
+ }
+ break;
+
+ default:
+ // Ignored.
+ break;
+ }
+ }
+
+ // Must have .dynsym/.dynstr to perform lookups.
+ if (!module->dynstr || !module->dynstr_size || !module->dynsym ||
+ !module->dynsym_count) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "missing .dynsym/.dynstr in ELF .dynamic segment");
+ }
+
+ // NOTE: we could try to verify ranges here but no one seems to do that and
+ // it's somewhat annoying. You're loading untrusted code into your memory
+ // space - this is the least of your concerns :)
+
+ return iree_ok_status();
+}
+
+// Verifies that there are no dynamic imports in the module as we don't support
+// them yet.
+static iree_status_t iree_elf_module_verify_no_imports(
+ iree_elf_module_load_state_t* load_state, iree_elf_module_t* module) {
+ // NOTE: slot 0 is always the 0 placeholder.
+ for (iree_host_size_t i = 1; i < module->dynsym_count; ++i) {
+ const iree_elf_sym_t* sym = &module->dynsym[i];
+ if (sym->st_shndx == IREE_ELF_SHN_UNDEF) {
+ const char* symname IREE_ATTRIBUTE_UNUSED =
+ sym->st_name ? module->dynstr + sym->st_name : NULL;
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "ELF imports one or more symbols (trying "
+ "'%s'); imports are not supported in the "
+ "platform-agnostic loader",
+ symname);
+ }
+ }
+ return iree_ok_status();
+}
+
+//==============================================================================
+// Relocation
+//==============================================================================
+
+// Applies symbol and address base relocations to the loaded sections.
+static iree_status_t iree_elf_module_apply_relocations(
+ iree_elf_module_load_state_t* load_state, iree_elf_module_t* module) {
+ // Redirect to the architecture-specific handler.
+ iree_elf_relocation_state_t reloc_state;
+ memset(&reloc_state, 0, sizeof(reloc_state));
+ reloc_state.vaddr_bias = module->vaddr_bias;
+ reloc_state.dyn_table = load_state->dyn_table;
+ reloc_state.dyn_table_count = load_state->dyn_table_count;
+ return iree_elf_arch_apply_relocations(&reloc_state);
+}
+
+//==============================================================================
+// Initialization/finalization
+//==============================================================================
+
+// Runs initializers defined within the module, if any.
+// .init is run first and then .init_array is run in array order.
+static iree_status_t iree_elf_module_run_initializers(
+ iree_elf_module_load_state_t* load_state, iree_elf_module_t* module) {
+ if (load_state->init != IREE_ELF_ADDR_MIN) {
+ iree_elf_call_v_v((void*)(module->vaddr_bias + load_state->init));
+ }
+
+ // NOTE: entries with values of 0 or -1 must be ignored.
+ for (iree_host_size_t i = 0; i < load_state->init_array_count; ++i) {
+ iree_elf_addr_t symbol_ptr = load_state->init_array[i];
+ if (symbol_ptr == 0 || symbol_ptr == IREE_ELF_ADDR_MAX) continue;
+ iree_elf_call_v_v((void*)(module->vaddr_bias + symbol_ptr));
+ }
+
+ return iree_ok_status();
+}
+
+static void iree_elf_module_run_finalizers(iree_elf_module_t* module) {
+ // NOT IMPLEMENTED
+ // Android doesn't do this for its loader and nothing we do should ever need
+ // them: we're not doing IO or (hopefully) anything stateful inside of our
+ // HAL executables that has correctness depend on them executing.
+}
+
+//==============================================================================
+// Symbol lookup
+//==============================================================================
+
+// Resolves a global symbol within the module by symbol name.
+// Currently we don't support any hashing as we have a single exported symbol
+// and this is a simple linear scan.
+//
+// If we start to get a few dozen then it may be worth it to implement the sysv
+// style as it is smallest both in code size and ELF binary size. This can be
+// specified using --hash-style=sysv with ld/lld. By default most linkers
+// (including lld, which is what we care about) will use
+// --hash-style=both and emit both `.hash` and `.gnu.hash`, but that's silly for
+// us as ideally we'd have none. If we ever try to use this for larger libraries
+// with many exported symbols (we shouldn't!) we can add support:
+// https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html
+// https://blogs.oracle.com/solaris/gnu-hash-elf-sections-v2
+static const iree_elf_sym_t* iree_elf_module_lookup_global_symbol(
+ iree_elf_module_t* module, const char* symbol_name) {
+ // NOTE: symtab[0] is always STN_UNDEF so we skip it.
+ // NOTE: symtab has local symbols before global ones and since we are looking
+ // for global symbols we iterate in reverse.
+ for (int i = (int)module->dynsym_count - 1; i > 0; i--) {
+ const iree_elf_sym_t* sym = &module->dynsym[i];
+ iree_elf_byte_t bind = IREE_ELF_ST_BIND(sym->st_info);
+ if (bind != IREE_ELF_STB_GLOBAL && bind != IREE_ELF_STB_WEAK) continue;
+ if (sym->st_name == 0) continue;
+ if (strcmp(module->dynstr + sym->st_name, symbol_name) == 0) {
+ return sym;
+ }
+ }
+ return NULL;
+}
+
+//==============================================================================
+// API
+//==============================================================================
+
+iree_status_t iree_elf_module_initialize_from_memory(
+ iree_const_byte_span_t raw_data,
+ const iree_elf_import_table_t* import_table,
+ iree_allocator_t host_allocator, iree_elf_module_t* out_module) {
+ IREE_ASSERT_ARGUMENT(raw_data.data);
+ IREE_ASSERT_ARGUMENT(out_module);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Parse the ELF headers and verify that it's something we can handle.
+ // Temporary state required during loading such as references to subtables
+ // within the ELF are tracked here on the stack while persistent fields are
+ // initialized on |out_module|.
+ iree_elf_module_load_state_t load_state;
+ iree_status_t status =
+ iree_elf_module_parse_headers(raw_data, &load_state, out_module);
+ out_module->host_allocator = host_allocator;
+
+ // Allocate and load the ELF into memory.
+ iree_memory_jit_context_begin();
+ if (iree_status_is_ok(status)) {
+ status = iree_elf_module_load_segments(raw_data, &load_state, out_module);
+ }
+
+ // Parse required dynamic symbol tables in loaded memory. These are used for
+ // runtime symbol resolution and relocation.
+ if (iree_status_is_ok(status)) {
+ status = iree_elf_module_parse_dynamic_tables(&load_state, out_module);
+ }
+
+ // TODO(benvanik): imports would happen here. For now we just ensure there are
+ // no imports as otherwise things will fail with obscure messages later on.
+ if (iree_status_is_ok(status)) {
+ status = iree_elf_module_verify_no_imports(&load_state, out_module);
+ }
+
+ // Apply relocations to the loaded pages.
+ if (iree_status_is_ok(status)) {
+ status = iree_elf_module_apply_relocations(&load_state, out_module);
+ }
+
+ // Apply final protections to the loaded pages now that relocations have been
+ // performed.
+ if (iree_status_is_ok(status)) {
+ status = iree_elf_module_protect_segments(&load_state, out_module);
+ }
+ iree_memory_jit_context_end();
+
+ // Run initializers prior to returning to the caller.
+ if (iree_status_is_ok(status)) {
+ status = iree_elf_module_run_initializers(&load_state, out_module);
+ }
+
+ if (!iree_status_is_ok(status)) {
+ // On failure gracefully clean up the module by releasing any allocated
+ // memory during the partial initialization.
+ iree_elf_module_deinitialize(out_module);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_elf_module_deinitialize(iree_elf_module_t* module) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_elf_module_run_finalizers(module);
+ iree_elf_module_unload_segments(module);
+ memset(module, 0, sizeof(*module));
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_elf_module_lookup_export(iree_elf_module_t* module,
+ const char* symbol_name,
+ void** out_export) {
+ IREE_ASSERT_ARGUMENT(module);
+ IREE_ASSERT_ARGUMENT(out_export);
+ *out_export = NULL;
+
+ const iree_elf_sym_t* sym =
+ iree_elf_module_lookup_global_symbol(module, symbol_name);
+ if (IREE_UNLIKELY(!sym)) {
+ return iree_make_status(
+ IREE_STATUS_NOT_FOUND,
+ "exported symbol with name '%s' not found in module", symbol_name);
+ }
+
+ *out_export = module->vaddr_bias + sym->st_value;
+ return iree_ok_status();
+}
diff --git a/runtime/src/iree/hal/local/elf/elf_module.h b/runtime/src/iree/hal/local/elf/elf_module.h
new file mode 100644
index 0000000..326673d
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/elf_module.h
@@ -0,0 +1,92 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_ELF_ELF_LINKER_H_
+#define IREE_HAL_LOCAL_ELF_ELF_LINKER_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/local/elf/arch.h" // IWYU pragma: export
+#include "iree/hal/local/elf/elf_types.h" // IWYU pragma: export
+
+//==============================================================================
+// ELF symbol import table
+//==============================================================================
+
+typedef struct iree_elf_import_t {
+ const char* sym_name;
+ void* thunk_ptr;
+} iree_elf_import_t;
+
+typedef struct iree_elf_import_table_t {
+ iree_host_size_t import_count;
+ const iree_elf_import_t* imports;
+} iree_elf_import_table_t;
+
+// TODO(benvanik): add import declaration macros that setup a unique thunk like
+// IREE_ELF_DEFINE_IMPORT(foo).
+
+//==============================================================================
+// Runtime ELF module loader/linker
+//==============================================================================
+
+// An ELF module mapped directly from memory.
+typedef struct iree_elf_module_t {
+ // Allocator used for additional dynamic memory when needed.
+ iree_allocator_t host_allocator;
+
+ // Base host virtual address the module is loaded into.
+ uint8_t* vaddr_base;
+ // Total size, in bytes, of the virtual address space reservation.
+ iree_host_size_t vaddr_size;
+
+ // Bias applied to all relative addresses (from the string table, etc) in the
+ // loaded module. This is an offset from the vaddr_base that may not be 0 if
+ // host page granularity was larger than the ELF's defined granularity.
+ uint8_t* vaddr_bias;
+
+ // Dynamic symbol string table (.dynstr).
+ const char* dynstr; // DT_STRTAB
+ iree_host_size_t dynstr_size; // DT_STRSZ (bytes)
+
+ // Dynamic symbol table (.dynsym).
+ const iree_elf_sym_t* dynsym; // DT_SYMTAB
+ iree_host_size_t dynsym_count; // DT_SYMENT (bytes) / sizeof(iree_elf_sym_t)
+} iree_elf_module_t;
+
+// Initializes an ELF module from the ELF |raw_data| in memory.
+// |raw_data| only needs to remain valid for the initialization of the module
+// and may be discarded afterward.
+//
+// An optional |import_table| may be specified to provide a set of symbols that
+// the module may import. Strong imports will not be resolved from the host
+// system and initialization will fail if any are not present in the provided
+// table.
+//
+// Upon return |out_module| is initialized and ready for use with any present
+// .init initialization functions having been executed. To release memory
+// allocated by the module during loading iree_elf_module_deinitialize must be
+// called to unload when it is safe (no more outstanding pointers into the
+// loaded module, etc).
+iree_status_t iree_elf_module_initialize_from_memory(
+ iree_const_byte_span_t raw_data,
+ const iree_elf_import_table_t* import_table,
+ iree_allocator_t host_allocator, iree_elf_module_t* out_module);
+
+// Deinitializes a |module|, releasing any allocated executable or data pages.
+// Invalidates all symbol pointers previous retrieved from the module and any
+// pointer to data that may have been in the module text or rwdata.
+//
+// NOTE: .fini finalizers will not be executed.
+void iree_elf_module_deinitialize(iree_elf_module_t* module);
+
+// Returns the host pointer of an exported symbol with the given |symbol_name|.
+iree_status_t iree_elf_module_lookup_export(iree_elf_module_t* module,
+ const char* symbol_name,
+ void** out_export);
+
+#endif // IREE_HAL_LOCAL_ELF_ELF_LINKER_H_
diff --git a/runtime/src/iree/hal/local/elf/elf_module_test_main.c b/runtime/src/iree/hal/local/elf/elf_module_test_main.c
new file mode 100644
index 0000000..1a30698
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/elf_module_test_main.c
@@ -0,0 +1,166 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+#include "iree/hal/local/elf/elf_module.h"
+#include "iree/hal/local/executable_environment.h"
+#include "iree/hal/local/executable_library.h"
+
+// ELF modules for various platforms embedded in the binary:
+#include "iree/hal/local/elf/testdata/elementwise_mul.h"
+
+static iree_status_t query_arch_test_file_data(
+ iree_const_byte_span_t* out_file_data) {
+ *out_file_data = iree_make_const_byte_span(NULL, 0);
+
+ iree_string_view_t pattern = iree_string_view_empty();
+#if defined(IREE_ARCH_ARM_32)
+ pattern = iree_make_cstring_view("*_arm_32.so");
+#elif defined(IREE_ARCH_ARM_64)
+ pattern = iree_make_cstring_view("*_arm_64.so");
+#elif defined(IREE_ARCH_RISCV_32)
+ pattern = iree_make_cstring_view("*_riscv_32.so");
+#elif defined(IREE_ARCH_RISCV_64)
+ pattern = iree_make_cstring_view("*_riscv_64.so");
+#elif defined(IREE_ARCH_X86_32)
+ pattern = iree_make_cstring_view("*_x86_32.so");
+#elif defined(IREE_ARCH_X86_64)
+ pattern = iree_make_cstring_view("*_x86_64.so");
+#else
+#warning "No architecture pattern specified; ELF linker will not be tested"
+#endif // IREE_ARCH_*
+
+ if (!iree_string_view_is_empty(pattern)) {
+ for (size_t i = 0; i < elementwise_mul_size(); ++i) {
+ const struct iree_file_toc_t* file_toc = &elementwise_mul_create()[i];
+ if (iree_string_view_match_pattern(iree_make_cstring_view(file_toc->name),
+ pattern)) {
+ *out_file_data =
+ iree_make_const_byte_span(file_toc->data, file_toc->size);
+ return iree_ok_status();
+ }
+ }
+ }
+
+ return iree_make_status(IREE_STATUS_NOT_FOUND,
+ "no architecture-specific ELF binary embedded into "
+ "the application for the current target platform");
+}
+
+static iree_status_t run_test() {
+ iree_const_byte_span_t file_data;
+ IREE_RETURN_IF_ERROR(query_arch_test_file_data(&file_data));
+
+ iree_elf_import_table_t import_table;
+ memset(&import_table, 0, sizeof(import_table));
+ iree_elf_module_t module;
+ IREE_RETURN_IF_ERROR(iree_elf_module_initialize_from_memory(
+ file_data, &import_table, iree_allocator_system(), &module));
+
+ iree_hal_executable_environment_v0_t environment;
+ iree_hal_executable_environment_initialize(iree_allocator_system(),
+ &environment);
+
+ void* query_fn_ptr = NULL;
+ IREE_RETURN_IF_ERROR(iree_elf_module_lookup_export(
+ &module, IREE_HAL_EXECUTABLE_LIBRARY_EXPORT_NAME, &query_fn_ptr));
+
+ union {
+ const iree_hal_executable_library_header_t** header;
+ const iree_hal_executable_library_v0_t* v0;
+ } library;
+ library.header =
+ (const iree_hal_executable_library_header_t**)iree_elf_call_p_ip(
+ query_fn_ptr, IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST,
+ &environment);
+ if (library.header == NULL) {
+ return iree_make_status(IREE_STATUS_NOT_FOUND,
+ "library header is empty (version mismatch?)");
+ }
+
+ const iree_hal_executable_library_header_t* header = *library.header;
+ if (header->version != IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "library version error");
+ }
+
+ if (strncmp(header->name, "ex", strlen(header->name)) != 0) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "library name mismatches");
+ }
+
+ if (library.v0->exports.count != 1) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "entry point count mismatches");
+ }
+
+ // ret0 = arg0 * arg1
+ float arg0[4] = {1.0f, 2.0f, 3.0f, 4.0f};
+ float arg1[4] = {100.0f, 200.0f, 300.0f, 400.0f};
+ float ret0[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+ const float expected[4] = {100.0f, 400.0f, 900.0f, 1600.0f};
+
+ size_t binding_lengths[3] = {
+ sizeof(arg0),
+ sizeof(arg1),
+ sizeof(ret0),
+ };
+ void* binding_ptrs[3] = {
+ arg0,
+ arg1,
+ ret0,
+ };
+ const iree_hal_executable_dispatch_state_v0_t dispatch_state = {
+ .workgroup_size_x = 1,
+ .workgroup_size_y = 1,
+ .workgroup_size_z = 1,
+ .workgroup_count_x = 1,
+ .workgroup_count_y = 1,
+ .workgroup_count_z = 1,
+ .max_concurrency = 1,
+ .binding_count = 1,
+ .binding_lengths = binding_lengths,
+ .binding_ptrs = binding_ptrs,
+ };
+ const iree_hal_executable_workgroup_state_v0_t workgroup_state = {
+ .workgroup_id_x = 0,
+ .workgroup_id_y = 0,
+ .workgroup_id_z = 0,
+ .processor_id = iree_cpu_query_processor_id(),
+ };
+ int ret = iree_elf_call_i_ppp((const void*)library.v0->exports.ptrs[0],
+ (void*)&environment, (void*)&dispatch_state,
+ (void*)&workgroup_state);
+ if (ret != 0) {
+ return iree_make_status(IREE_STATUS_INTERNAL,
+ "dispatch function returned failure: %d", ret);
+ }
+
+ iree_status_t status = iree_ok_status();
+ for (int i = 0; i < IREE_ARRAYSIZE(expected); ++i) {
+ if (ret0[i] != expected[i]) {
+ status =
+ iree_make_status(IREE_STATUS_INTERNAL,
+ "output mismatch: ret[%d] = %.1f, expected %.1f", i,
+ ret0[i], expected[i]);
+ break;
+ }
+ }
+
+ iree_elf_module_deinitialize(&module);
+ return status;
+}
+
+int main() {
+ const iree_status_t result = run_test();
+ int ret = (int)iree_status_code(result);
+ if (!iree_status_is_ok(result)) {
+ iree_status_fprint(stderr, result);
+ iree_status_free(result);
+ }
+ return ret;
+}
diff --git a/runtime/src/iree/hal/local/elf/elf_types.h b/runtime/src/iree/hal/local/elf/elf_types.h
new file mode 100644
index 0000000..3952786
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/elf_types.h
@@ -0,0 +1,420 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_ELF_ELF_TYPES_H_
+#define IREE_HAL_LOCAL_ELF_ELF_TYPES_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+
+// This file contains the ELF data structures we use in our runtime linker and
+// the definitions to support them. The structure definitions are taken from
+// the System V ABI:
+// http://www.sco.com/developers/gabi/latest/contents.html
+// LLVM's BinaryFormat ELF headers:
+// third_party/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h
+// And the Linux specification:
+// https://linux.die.net/man/5/elf
+// https://refspecs.linuxbase.org/LSB_3.1.1/LSB-Core-generic/LSB-Core-generic.html
+// (among others)
+//
+// We define both 32-bit and 64-bit variants of the structures as we support
+// both; however we only ever use one at a time based on the target
+// configuration so that we are only including the code for the
+// architecture-native integer width.
+//
+// We purposefully avoid inserting a large number of enums that we never use:
+// this implementation is just to load our own compiled HAL executables and as
+// such we control both the linker configuration used to produce the inputs we
+// load.
+//
+// Code can generally be written using only the iree_elf_* types and IREE_ELF_*
+// macros; if used consistently then only one source code definition is required
+// and it'll get compiled into the appropriate form with no additional
+// configuration.
+
+typedef uint8_t iree_elf32_byte_t;
+typedef uint32_t iree_elf32_addr_t;
+typedef uint16_t iree_elf32_half_t;
+typedef uint32_t iree_elf32_off_t;
+typedef int32_t iree_elf32_sword_t;
+typedef uint32_t iree_elf32_word_t;
+
+typedef uint8_t iree_elf64_byte_t;
+typedef uint64_t iree_elf64_addr_t;
+typedef uint16_t iree_elf64_half_t;
+typedef uint64_t iree_elf64_off_t;
+typedef int32_t iree_elf64_sword_t;
+typedef uint32_t iree_elf64_word_t;
+typedef uint64_t iree_elf64_xword_t;
+typedef int64_t iree_elf64_sxword_t;
+
+enum {
+ IREE_ELF_EI_CLASS = 4, // IREE_ELF_ELFCLASS*
+ IREE_ELF_EI_DATA = 5, // IREE_ELF_ELFDATA*
+ IREE_ELF_EI_VERSION = 6, // File version (1 expected)
+ IREE_ELF_EI_OSABI = 7, // Operating system/ABI identification
+ IREE_ELF_EI_ABIVERSION = 8, // ABI version
+ IREE_ELF_EI_PAD = 9, // Start of padding bytes
+ IREE_ELF_EI_NIDENT = 16, // Size of e_ident[]
+};
+
+enum {
+ IREE_ELF_ELFCLASSNONE = 0, // Invalid class
+ IREE_ELF_ELFCLASS32 = 1, // 32-bit objects
+ IREE_ELF_ELFCLASS64 = 2, // 64-bit objects
+};
+
+enum {
+ IREE_ELF_ELFDATANONE = 0, // Invalid data encoding
+ IREE_ELF_ELFDATA2LSB = 1, // Little-endian
+ IREE_ELF_ELFDATA2MSB = 2, // Big-endian
+};
+
+enum {
+ IREE_ELF_ET_NONE = 0, // No file type
+ IREE_ELF_ET_REL = 1, // Relocatable file
+ IREE_ELF_ET_EXEC = 2, // Executable file
+ IREE_ELF_ET_DYN = 3, // Shared object file
+ IREE_ELF_ET_CORE = 4, // Core file
+};
+
+typedef struct {
+ iree_elf32_byte_t e_ident[IREE_ELF_EI_NIDENT];
+ iree_elf32_half_t e_type; // IREE_ELF_ET_*
+ iree_elf32_half_t e_machine;
+ iree_elf32_word_t e_version;
+ iree_elf32_addr_t e_entry;
+ iree_elf32_off_t e_phoff;
+ iree_elf32_off_t e_shoff;
+ iree_elf32_word_t e_flags;
+ iree_elf32_half_t e_ehsize;
+ iree_elf32_half_t e_phentsize;
+ iree_elf32_half_t e_phnum;
+ iree_elf32_half_t e_shentsize;
+ iree_elf32_half_t e_shnum;
+ iree_elf32_half_t e_shstrndx;
+} iree_elf32_ehdr_t;
+
+typedef struct {
+ iree_elf64_byte_t e_ident[IREE_ELF_EI_NIDENT];
+ iree_elf64_half_t e_type; // IREE_ELF_ET_*
+ iree_elf64_half_t e_machine;
+ iree_elf64_word_t e_version;
+ iree_elf64_addr_t e_entry;
+ iree_elf64_off_t e_phoff;
+ iree_elf64_off_t e_shoff;
+ iree_elf64_word_t e_flags;
+ iree_elf64_half_t e_ehsize;
+ iree_elf64_half_t e_phentsize;
+ iree_elf64_half_t e_phnum;
+ iree_elf64_half_t e_shentsize;
+ iree_elf64_half_t e_shnum;
+ iree_elf64_half_t e_shstrndx;
+} iree_elf64_ehdr_t;
+
+enum {
+ IREE_ELF_PT_NULL = 0,
+ IREE_ELF_PT_LOAD = 1,
+ IREE_ELF_PT_DYNAMIC = 2,
+ IREE_ELF_PT_INTERP = 3,
+ IREE_ELF_PT_NOTE = 4,
+ IREE_ELF_PT_SHLIB = 5,
+ IREE_ELF_PT_PHDR = 6,
+ IREE_ELF_PT_GNU_RELRO = 0x6474e552,
+};
+
+enum {
+ IREE_ELF_PF_X = 0x1, // Execute
+ IREE_ELF_PF_W = 0x2, // Write
+ IREE_ELF_PF_R = 0x4, // Read
+};
+
+typedef struct {
+ iree_elf32_word_t p_type; // IREE_ELF_PT_*
+ iree_elf32_off_t p_offset;
+ iree_elf32_addr_t p_vaddr;
+ iree_elf32_addr_t p_paddr;
+ iree_elf32_word_t p_filesz;
+ iree_elf32_word_t p_memsz;
+ iree_elf32_word_t p_flags; // IREE_ELF_PF_*
+ iree_elf32_word_t p_align;
+} iree_elf32_phdr_t;
+
+typedef struct {
+ iree_elf64_word_t p_type; // IREE_ELF_PT_*
+ iree_elf64_word_t p_flags; // IREE_ELF_PF_*
+ iree_elf64_off_t p_offset;
+ iree_elf64_addr_t p_vaddr;
+ iree_elf64_addr_t p_paddr;
+ iree_elf64_xword_t p_filesz;
+ iree_elf64_xword_t p_memsz;
+ iree_elf64_xword_t p_align;
+} iree_elf64_phdr_t;
+
+// An undefined, missing, irrelevant, or otherwise meaningless section ref.
+#define IREE_ELF_SHN_UNDEF 0
+
+enum {
+ IREE_ELF_SHT_NULL = 0,
+ IREE_ELF_SHT_PROGBITS = 1,
+ IREE_ELF_SHT_SYMTAB = 2,
+ IREE_ELF_SHT_STRTAB = 3,
+ IREE_ELF_SHT_RELA = 4,
+ IREE_ELF_SHT_HASH = 5,
+ IREE_ELF_SHT_DYNAMIC = 6,
+ IREE_ELF_SHT_NOTE = 7,
+ IREE_ELF_SHT_NOBITS = 8,
+ IREE_ELF_SHT_REL = 9,
+ IREE_ELF_SHT_SHLIB = 10,
+ IREE_ELF_SHT_DYNSYM = 11,
+};
+
+enum {
+ IREE_ELF_SHF_WRITE = 0x1,
+ IREE_ELF_SHF_ALLOC = 0x2,
+ IREE_ELF_SHF_EXECINSTR = 0x4,
+ IREE_ELF_SHF_MERGE = 0x10,
+ IREE_ELF_SHF_STRINGS = 0x20,
+ IREE_ELF_SHF_INFO_LINK = 0x40,
+ IREE_ELF_SHF_LINK_ORDER = 0x80,
+ IREE_ELF_SHF_OS_NONCONFORMING = 0x100,
+ IREE_ELF_SHF_GROUP = 0x200
+};
+
+typedef struct {
+ iree_elf32_word_t sh_name;
+ iree_elf32_word_t sh_type; // IREE_ELF_SHT_*
+ iree_elf32_word_t sh_flags; // IREE_ELF_SHF_*
+ iree_elf32_addr_t sh_addr;
+ iree_elf32_off_t sh_offset;
+ iree_elf32_word_t sh_size;
+ iree_elf32_word_t sh_link;
+ iree_elf32_word_t sh_info;
+ iree_elf32_word_t sh_addralign;
+ iree_elf32_word_t sh_entsize;
+} iree_elf32_shdr_t;
+
+typedef struct {
+ iree_elf64_word_t sh_name;
+ iree_elf64_word_t sh_type; // IREE_ELF_SHT_*
+ iree_elf64_xword_t sh_flags; // IREE_ELF_SHF_*
+ iree_elf64_addr_t sh_addr;
+ iree_elf64_off_t sh_offset;
+ iree_elf64_xword_t sh_size;
+ iree_elf64_word_t sh_link;
+ iree_elf64_word_t sh_info;
+ iree_elf64_xword_t sh_addralign;
+ iree_elf64_xword_t sh_entsize;
+} iree_elf64_shdr_t;
+
+typedef struct {
+ iree_elf32_word_t n_namesz;
+ iree_elf32_word_t n_descsz;
+ iree_elf32_word_t n_type;
+} iree_elf32_nhdr_t;
+
+typedef struct {
+ iree_elf64_word_t n_namesz;
+ iree_elf64_word_t n_descsz;
+ iree_elf64_word_t n_type;
+} iree_elf64_nhdr_t;
+
+#define IREE_ELF_ST_INFO(bind, type) (((bind) << 4) + ((type)&0xF))
+
+#define IREE_ELF_ST_TYPE(info) ((info)&0xF)
+enum {
+ IREE_ELF_STT_NOTYPE = 0,
+ IREE_ELF_STT_OBJECT = 1,
+ IREE_ELF_STT_FUNC = 2,
+ IREE_ELF_STT_SECTION = 3,
+ IREE_ELF_STT_FILE = 4,
+ IREE_ELF_STT_COMMON = 5,
+};
+
+#define IREE_ELF_ST_BIND(info) ((info) >> 4)
+enum {
+ IREE_ELF_STB_LOCAL = 0, // Local symbol.
+ IREE_ELF_STB_GLOBAL = 1, // Global symbol (export).
+ IREE_ELF_STB_WEAK = 2, // Weak symbol (somewhat like global).
+};
+
+#define IREE_ELF_ST_VISIBILITY(o) ((o)&0x3)
+enum {
+ IREE_ELF_STV_DEFAULT = 0,
+ IREE_ELF_STV_INTERNAL = 1,
+ IREE_ELF_STV_HIDDEN = 2,
+ IREE_ELF_STV_PROTECTED = 3,
+};
+
+typedef struct {
+ iree_elf32_word_t st_name;
+ iree_elf32_addr_t st_value;
+ iree_elf32_word_t st_size;
+ iree_elf32_byte_t st_info;
+ iree_elf32_byte_t st_other;
+ iree_elf32_half_t st_shndx;
+} iree_elf32_sym_t;
+
+typedef struct {
+ iree_elf64_word_t st_name;
+ iree_elf64_byte_t st_info;
+ iree_elf64_byte_t st_other;
+ iree_elf64_half_t st_shndx;
+ iree_elf64_addr_t st_value;
+ iree_elf64_xword_t st_size;
+} iree_elf64_sym_t;
+
+enum {
+ IREE_ELF_DT_NULL = 0, // (no data)
+ IREE_ELF_DT_NEEDED = 1, // d_val
+ IREE_ELF_DT_PLTRELSZ = 2, // d_val
+ IREE_ELF_DT_PLTGOT = 3, // d_ptr
+ IREE_ELF_DT_HASH = 4, // d_ptr
+ IREE_ELF_DT_STRTAB = 5, // d_ptr
+ IREE_ELF_DT_SYMTAB = 6, // d_ptr
+ IREE_ELF_DT_RELA = 7, // d_ptr
+ IREE_ELF_DT_RELASZ = 8, // d_val
+ IREE_ELF_DT_RELAENT = 9, // d_val
+ IREE_ELF_DT_STRSZ = 10, // d_val
+ IREE_ELF_DT_SYMENT = 11, // d_val
+ IREE_ELF_DT_INIT = 12, // d_ptr
+ IREE_ELF_DT_FINI = 13, // d_ptr
+ IREE_ELF_DT_SONAME = 14, // d_val
+ IREE_ELF_DT_RPATH = 15, // d_val
+ IREE_ELF_DT_SYMBOLIC = 16, // (no data)
+ IREE_ELF_DT_REL = 17, // d_ptr
+ IREE_ELF_DT_RELSZ = 18, // d_val
+ IREE_ELF_DT_RELENT = 19, // d_val
+ IREE_ELF_DT_PLTREL = 20, // d_val
+ IREE_ELF_DT_TEXTREL = 22, // (no data)
+ IREE_ELF_DT_JMPREL = 23, // d_ptr
+ IREE_ELF_DT_BIND_NOW = 24, // (no data)
+ IREE_ELF_DT_INIT_ARRAY = 25, // d_ptr
+ IREE_ELF_DT_FINI_ARRAY = 26, // d_ptr
+ IREE_ELF_DT_INIT_ARRAYSZ = 27, // d_val
+ IREE_ELF_DT_FINI_ARRAYSZ = 28, // d_val
+ IREE_ELF_DT_RUNPATH = 29, // d_val
+ IREE_ELF_DT_FLAGS = 30, // d_val
+ IREE_ELF_DT_SUNW_RTLDINF = 0x6000000e, // d_ptr
+ IREE_ELF_DT_CHECKSUM = 0x6ffffdf8, // d_val
+ IREE_ELF_DT_PLTPADSZ = 0x6ffffdf9, // d_val
+ IREE_ELF_DT_MOVEENT = 0x6ffffdfa, // d_val
+ IREE_ELF_DT_MOVESZ = 0x6ffffdfb, // d_val
+ IREE_ELF_DT_FEATURE_1 = 0x6ffffdfc, // d_val
+ IREE_ELF_DT_POSFLAG_1 = 0x6ffffdfd, // d_val
+ IREE_ELF_DT_SYMINSZ = 0x6ffffdfe, // d_val
+ IREE_ELF_DT_SYMINENT = 0x6ffffdff, // d_val
+ IREE_ELF_DT_CONFIG = 0x6ffffefa, // d_ptr
+ IREE_ELF_DT_DEPAUDIT = 0x6ffffefb, // d_ptr
+ IREE_ELF_DT_AUDIT = 0x6ffffefc, // d_ptr
+ IREE_ELF_DT_PLTPAD = 0x6ffffefd, // d_ptr
+ IREE_ELF_DT_MOVETAB = 0x6ffffefe, // d_ptr
+ IREE_ELF_DT_SYMINFO = 0x6ffffeff, // d_ptr
+ IREE_ELF_DT_RELACOUNT = 0x6ffffff9, // d_val
+ IREE_ELF_DT_RELCOUNT = 0x6ffffffa, // d_val
+ IREE_ELF_DT_FLAGS_1 = 0x6ffffffb, // d_val
+ IREE_ELF_DT_VERDEF = 0x6ffffffc, // d_ptr
+ IREE_ELF_DT_VERDEFNUM = 0x6ffffffd, // d_val
+ IREE_ELF_DT_VERNEED = 0x6ffffffe, // d_ptr
+ IREE_ELF_DT_VERNEEDNUM = 0x6fffffff, // d_val
+ IREE_ELF_DT_AUXILIARY = 0x7ffffffd, // d_val
+ IREE_ELF_DT_USED = 0x7ffffffe, // d_val
+};
+
+typedef struct {
+ iree_elf32_sword_t d_tag; // IREE_ELF_DT_*
+ union {
+ iree_elf32_sword_t d_val;
+ iree_elf32_addr_t d_ptr;
+ } d_un;
+} iree_elf32_dyn_t;
+
+typedef struct {
+ iree_elf64_sxword_t d_tag; // IREE_ELF_DT_*
+ union {
+ iree_elf64_xword_t d_val;
+ iree_elf64_addr_t d_ptr;
+ } d_un;
+} iree_elf64_dyn_t;
+
+typedef struct {
+ iree_elf32_addr_t r_offset;
+ iree_elf32_word_t r_info;
+} iree_elf32_rel_t;
+
+typedef struct {
+ iree_elf64_addr_t r_offset;
+ iree_elf64_xword_t r_info;
+} iree_elf64_rel_t;
+
+typedef struct {
+ iree_elf32_addr_t r_offset;
+ iree_elf32_word_t r_info;
+ iree_elf32_sword_t r_addend;
+} iree_elf32_rela_t;
+
+typedef struct {
+ iree_elf64_addr_t r_offset;
+ iree_elf64_xword_t r_info;
+ iree_elf64_sxword_t r_addend;
+} iree_elf64_rela_t;
+
+#if defined(IREE_PTR_SIZE_32)
+
+#define IREE_ELF_ADDR_MIN 0u
+#define IREE_ELF_ADDR_MAX UINT32_MAX
+
+typedef iree_elf32_byte_t iree_elf_byte_t;
+typedef iree_elf32_addr_t iree_elf_addr_t;
+typedef iree_elf32_half_t iree_elf_half_t;
+typedef iree_elf32_off_t iree_elf_off_t;
+typedef iree_elf32_sword_t iree_elf_sword_t;
+typedef iree_elf32_word_t iree_elf_word_t;
+
+typedef iree_elf32_dyn_t iree_elf_dyn_t;
+typedef iree_elf32_rel_t iree_elf_rel_t;
+typedef iree_elf32_rela_t iree_elf_rela_t;
+typedef iree_elf32_sym_t iree_elf_sym_t;
+typedef iree_elf32_ehdr_t iree_elf_ehdr_t;
+typedef iree_elf32_phdr_t iree_elf_phdr_t;
+typedef iree_elf32_shdr_t iree_elf_shdr_t;
+typedef iree_elf32_nhdr_t iree_elf_nhdr_t;
+
+#define IREE_ELF_R_SYM(x) ((x) >> 8)
+#define IREE_ELF_R_TYPE(x) ((x)&0xFF)
+
+#elif defined(IREE_PTR_SIZE_64)
+
+#define IREE_ELF_ADDR_MIN 0ull
+#define IREE_ELF_ADDR_MAX UINT64_MAX
+
+typedef iree_elf64_byte_t iree_elf_byte_t;
+typedef iree_elf64_addr_t iree_elf_addr_t;
+typedef iree_elf64_half_t iree_elf_half_t;
+typedef iree_elf64_off_t iree_elf_off_t;
+typedef iree_elf64_sword_t iree_elf_sword_t;
+typedef iree_elf64_word_t iree_elf_word_t;
+
+typedef iree_elf64_dyn_t iree_elf_dyn_t;
+typedef iree_elf64_rel_t iree_elf_rel_t;
+typedef iree_elf64_rela_t iree_elf_rela_t;
+typedef iree_elf64_sym_t iree_elf_sym_t;
+typedef iree_elf64_ehdr_t iree_elf_ehdr_t;
+typedef iree_elf64_phdr_t iree_elf_phdr_t;
+typedef iree_elf64_shdr_t iree_elf_shdr_t;
+typedef iree_elf64_nhdr_t iree_elf_nhdr_t;
+
+#define IREE_ELF_R_SYM(i) ((i) >> 32)
+#define IREE_ELF_R_TYPE(i) ((i)&0xFFFFFFFF)
+
+#else
+#error "unsupported ELF N size (only 32/64-bits are defined)"
+#endif // IREE_PTR_SIZE_*
+
+#endif // IREE_HAL_LOCAL_ELF_ELF_TYPES_H_
diff --git a/runtime/src/iree/hal/local/elf/platform.h b/runtime/src/iree/hal/local/elf/platform.h
new file mode 100644
index 0000000..03af89b
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/platform.h
@@ -0,0 +1,177 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_ELF_PLATFORM_H_
+#define IREE_HAL_LOCAL_ELF_PLATFORM_H_
+
+#include "iree/base/api.h"
+
+// TODO(benvanik): move some of this to iree/base/internal/. A lot of this code
+// comes from an old partial implementation of memory objects that should be
+// finished. When done it will replace the need for all of these platform files.
+
+//==============================================================================
+// Alignment utilities
+//==============================================================================
+
+// Defines a range of bytes with any arbitrary alignment.
+// Most operations will adjust this range by the allocation granularity, meaning
+// that a range that stradles a page boundary will be specifying multiple pages
+// (such as offset=1, length=4096 with a page size of 4096 indicating 2 pages).
+typedef struct iree_byte_range_t {
+ iree_host_size_t offset;
+ iree_host_size_t length;
+} iree_byte_range_t;
+
+static inline uintptr_t iree_page_align_start(uintptr_t addr,
+ iree_host_size_t page_alignment) {
+ return addr & (~(page_alignment - 1));
+}
+
+static inline uintptr_t iree_page_align_end(uintptr_t addr,
+ iree_host_size_t page_alignment) {
+ return iree_page_align_start(addr + (page_alignment - 1), page_alignment);
+}
+
+// Computes a page-aligned range base and total length from a range.
+// This will produce a starting address <= the range offset and a length >=
+// the range length.
+static inline void iree_page_align_range(void* base_address,
+ iree_byte_range_t range,
+ iree_host_size_t page_alignment,
+ void** out_start_address,
+ iree_host_size_t* out_aligned_length) {
+ void* range_start = (void*)iree_page_align_start(
+ (uintptr_t)base_address + range.offset, page_alignment);
+ void* range_end = (void*)iree_page_align_end(
+ (uintptr_t)base_address + range.offset + range.length, page_alignment);
+ *out_start_address = range_start;
+ *out_aligned_length =
+ (iree_host_size_t)range_end - (iree_host_size_t)range_start;
+}
+
+//==============================================================================
+// Memory subsystem information and control
+//==============================================================================
+
+// System platform/environment information defining memory parameters.
+// These can be used to control application behavior (such as whether to enable
+// a JIT if executable pages can be allocated) and allow callers to compute
+// memory ranges based on the variable page size of the platform.
+typedef struct iree_memory_info_t {
+ // The page size and the granularity of page protection and commitment. This
+ // is the page size used by the iree_memory_view_t functions.
+ iree_host_size_t normal_page_size;
+
+ // The granularity for the starting address at which virtual memory can be
+ // allocated.
+ iree_host_size_t normal_page_granularity;
+
+ // The minimum page size and granularity for large pages or 0 if unavailable.
+ // To use large pages the size and alignment must be a multiple of this value
+ // and the IREE_MEMORY_VIEW_FLAG_LARGE_PAGES must be set.
+ iree_host_size_t large_page_granularity;
+
+ // Indicates whether executable pages may be allocated within the process.
+ // Some platforms or release environments have restrictions on whether
+ // executable pages may be allocated from user code (such as iOS).
+ bool can_allocate_executable_pages;
+} iree_memory_info_t;
+
+// Queries the system platform/environment memory information.
+// Callers should cache the results to avoid repeated queries, such as storing
+// the used fields in an allocator upon initialization to reuse during
+// allocations made via the allocator.
+void iree_memory_query_info(iree_memory_info_t* out_info);
+
+// Enter a W^X region where pages will be changed RW->RX or RX->RW and write
+// protection should be suspended. Only effects the calling thread and must be
+// paired with iree_memory_jit_context_end.
+void iree_memory_jit_context_begin(void);
+
+// Exits a W^X region previously entered with iree_memory_jit_context_begin.
+void iree_memory_jit_context_end(void);
+
+//==============================================================================
+// Virtual address space manipulation
+//==============================================================================
+
+// Defines which access operations are allowed on a view of memory.
+// Attempts to perform an access not originally allowed when the view was
+// defined may result in process termination/exceptions/sadness on platforms
+// with real MMUs and are generally not detectable: treat limited access as a
+// fail-safe mechanism only.
+enum iree_memory_access_bits_t {
+ // Pages in the view may be read by the process.
+ // Some platforms may not respect this value being unset meaning that reads
+ // will still succeed.
+ IREE_MEMORY_ACCESS_READ = 1u << 0,
+ // Pages in the view may be written by the process.
+ // If unset then writes will result in process termination.
+ IREE_MEMORY_ACCESS_WRITE = 1u << 1,
+ // Pages in the view can be executed as native machine code.
+ // Callers must ensure iree_memory_info_t::can_allocate_executable_pages is
+ // true prior to requesting executable memory as certain platforms or release
+ // environments may not support allocating/using executable pages.
+ IREE_MEMORY_ACCESS_EXECUTE = 1u << 2,
+};
+typedef uint32_t iree_memory_access_t;
+
+// Flags used to control the behavior of allocated memory views.
+enum iree_memory_view_flag_bits_t {
+ // TODO(benvanik): pull from memory_object.h.
+ IREE_MEMORY_VIEW_FLAG_NONE = 0u,
+
+ // Indicates that the memory may be used to execute code.
+ // May be used to ask for special privileges (like MAP_JIT on MacOS).
+ IREE_MEMORY_VIEW_FLAG_MAY_EXECUTE = 1u << 10,
+};
+typedef uint32_t iree_memory_view_flags_t;
+
+// Reserves a range of virtual address space in the host process.
+// The base alignment will be that of the page granularity as specified
+// (normal or large) in |flags| and |total_length| will be adjusted to match.
+//
+// The resulting range at |out_base_address| will be uncommitted and
+// inaccessible on systems with memory protection. Pages within the range must
+// first be committed with iree_memory_view_commit_ranges and then may have
+// their access permissions changed with iree_memory_view_protect_ranges.
+//
+// Implemented by VirtualAlloc+MEM_RESERVE/mmap+PROT_NONE.
+iree_status_t iree_memory_view_reserve(iree_memory_view_flags_t flags,
+ iree_host_size_t total_length,
+ iree_allocator_t allocator,
+ void** out_base_address);
+
+// Releases a range of virtual address
+void iree_memory_view_release(void* base_address, iree_host_size_t total_length,
+ iree_allocator_t allocator);
+
+// Commits pages overlapping the byte ranges defined by |byte_ranges|.
+// Ranges will be adjusted to the page granularity of the view.
+//
+// Implemented by VirtualAlloc+MEM_COMMIT/mmap+!PROT_NONE.
+iree_status_t iree_memory_view_commit_ranges(
+ void* base_address, iree_host_size_t range_count,
+ const iree_byte_range_t* ranges, iree_memory_access_t initial_access);
+
+// Changes the access protection of view byte ranges defined by |byte_ranges|.
+// Ranges will be adjusted to the page granularity of the view.
+//
+// Implemented by VirtualProtect/mprotect:
+// https://docs.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualprotect
+// https://man7.org/linux/man-pages/man2/mprotect.2.html
+iree_status_t iree_memory_view_protect_ranges(void* base_address,
+ iree_host_size_t range_count,
+ const iree_byte_range_t* ranges,
+ iree_memory_access_t new_access);
+
+// Flushes the CPU instruction cache for a given range of bytes.
+// May be a no-op depending on architecture, but must be called prior to
+// executing code from any pages that have been written during load.
+void iree_memory_view_flush_icache(void* base_address, iree_host_size_t length);
+
+#endif // IREE_HAL_LOCAL_ELF_PLATFORM_H_
diff --git a/runtime/src/iree/hal/local/elf/platform/apple.c b/runtime/src/iree/hal/local/elf/platform/apple.c
new file mode 100644
index 0000000..c6c8129
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/platform/apple.c
@@ -0,0 +1,179 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/elf/platform.h"
+
+#if defined(IREE_PLATFORM_APPLE)
+
+// NOTE: because Apple there's some hoop-jumping to get executable code.
+// https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
+// https://keith.github.io/xcode-man-pages/pthread_jit_write_protect_np.3.html
+
+#include <errno.h>
+#include <libkern/OSCacheControl.h>
+#include <mach/vm_statistics.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+// MAP_JIT and related utilities are only available on MacOS 11.0+.
+#if defined(MAC_OS_VERSION_11_0) && \
+ MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
+#define IREE_APPLE_IF_AT_LEAST_MAC_OS_11_0(expr) \
+ if (__builtin_available(macOS 11.0, *)) { \
+ expr \
+ }
+#else
+#define IREE_APPLE_IF_AT_LEAST_MAC_OS_11_0(expr)
+#endif // MAC_OS_VERSION_11_0
+
+//==============================================================================
+// Memory subsystem information and control
+//==============================================================================
+
+void iree_memory_query_info(iree_memory_info_t* out_info) {
+ memset(out_info, 0, sizeof(*out_info));
+
+ int page_size = sysconf(_SC_PAGESIZE);
+ out_info->normal_page_size = page_size;
+ out_info->normal_page_granularity = page_size;
+ out_info->large_page_granularity = (2 * 1024 * 1024); // What V8 uses.
+
+ out_info->can_allocate_executable_pages = true;
+}
+
+void iree_memory_jit_context_begin(void) {
+ IREE_APPLE_IF_AT_LEAST_MAC_OS_11_0({
+ if (pthread_jit_write_protect_supported_np()) {
+ pthread_jit_write_protect_np(0);
+ }
+ });
+}
+
+void iree_memory_jit_context_end(void) {
+ IREE_APPLE_IF_AT_LEAST_MAC_OS_11_0({
+ if (pthread_jit_write_protect_supported_np()) {
+ pthread_jit_write_protect_np(1);
+ }
+ });
+}
+
+//==============================================================================
+// Virtual address space manipulation
+//==============================================================================
+
+// This user tag makes it easier to find our pages in vmmap dumps.
+#define IREE_MEMORY_MMAP_FD VM_MAKE_TAG(255)
+
+static int iree_memory_access_to_prot(iree_memory_access_t access) {
+ int prot = 0;
+ if (access & IREE_MEMORY_ACCESS_READ) prot |= PROT_READ;
+ if (access & IREE_MEMORY_ACCESS_WRITE) prot |= PROT_WRITE;
+ if (access & IREE_MEMORY_ACCESS_EXECUTE) prot |= PROT_EXEC;
+ return prot;
+}
+
+iree_status_t iree_memory_view_reserve(iree_memory_view_flags_t flags,
+ iree_host_size_t total_length,
+ iree_allocator_t allocator,
+ void** out_base_address) {
+ *out_base_address = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ int mmap_prot = PROT_NONE;
+ int mmap_flags = MAP_PRIVATE | MAP_ANON | MAP_NORESERVE;
+ IREE_APPLE_IF_AT_LEAST_MAC_OS_11_0({
+ if (flags & IREE_MEMORY_VIEW_FLAG_MAY_EXECUTE) {
+ mmap_flags |= MAP_JIT;
+ }
+ });
+
+ iree_status_t status = iree_ok_status();
+ void* base_address =
+ mmap(NULL, total_length, mmap_prot, mmap_flags, IREE_MEMORY_MMAP_FD, 0);
+ if (base_address == MAP_FAILED) {
+ status = iree_make_status(iree_status_code_from_errno(errno),
+ "mmap reservation failed");
+ }
+
+ *out_base_address = base_address;
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_memory_view_release(void* base_address, iree_host_size_t total_length,
+ iree_allocator_t allocator) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // NOTE: return value ignored as this is a shutdown path.
+ munmap(base_address, total_length);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_memory_view_commit_ranges(
+ void* base_address, iree_host_size_t range_count,
+ const iree_byte_range_t* ranges, iree_memory_access_t initial_access) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ int mmap_prot = iree_memory_access_to_prot(initial_access);
+ int mmap_flags = MAP_PRIVATE | MAP_ANON | MAP_FIXED;
+
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < range_count; ++i) {
+ void* range_start = NULL;
+ iree_host_size_t aligned_length = 0;
+ iree_page_align_range(base_address, ranges[i], getpagesize(), &range_start,
+ &aligned_length);
+ void* result = mmap(range_start, aligned_length, mmap_prot, mmap_flags,
+ IREE_MEMORY_MMAP_FD, 0);
+ if (result == MAP_FAILED) {
+ status = iree_make_status(iree_status_code_from_errno(errno),
+ "mmap commit failed");
+ break;
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_status_t iree_memory_view_protect_ranges(void* base_address,
+ iree_host_size_t range_count,
+ const iree_byte_range_t* ranges,
+ iree_memory_access_t new_access) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ int mmap_prot = iree_memory_access_to_prot(new_access);
+
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < range_count; ++i) {
+ void* range_start = NULL;
+ iree_host_size_t aligned_length = 0;
+ iree_page_align_range(base_address, ranges[i], getpagesize(), &range_start,
+ &aligned_length);
+ int ret = mprotect(range_start, aligned_length, mmap_prot);
+ if (ret != 0) {
+ status = iree_make_status(iree_status_code_from_errno(errno),
+ "mprotect failed");
+ break;
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void sys_icache_invalidate(void* start, size_t len);
+
+void iree_memory_view_flush_icache(void* base_address,
+ iree_host_size_t length) {
+ sys_icache_invalidate(base_address, length);
+}
+
+#endif // IREE_PLATFORM_APPLE
diff --git a/runtime/src/iree/hal/local/elf/platform/generic.c b/runtime/src/iree/hal/local/elf/platform/generic.c
new file mode 100644
index 0000000..0f68592
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/platform/generic.c
@@ -0,0 +1,99 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/elf/platform.h"
+
+#if defined(IREE_PLATFORM_GENERIC)
+
+#include <malloc.h>
+#include <stdlib.h>
+
+//==============================================================================
+// Memory subsystem information and control
+//==============================================================================
+
+// TODO(benvanik): control with a config.h.
+#define IREE_MEMORY_PAGE_SIZE_NORMAL 4096
+#define IREE_MEMORY_PAGE_SIZE_LARGE 4096
+
+void iree_memory_query_info(iree_memory_info_t* out_info) {
+ memset(out_info, 0, sizeof(*out_info));
+
+ out_info->normal_page_size = IREE_MEMORY_PAGE_SIZE_NORMAL;
+ out_info->normal_page_granularity = IREE_MEMORY_PAGE_SIZE_NORMAL;
+ out_info->large_page_granularity = IREE_MEMORY_PAGE_SIZE_LARGE;
+
+ out_info->can_allocate_executable_pages = true;
+}
+
+void iree_memory_jit_context_begin(void) {}
+
+void iree_memory_jit_context_end(void) {}
+
+//==============================================================================
+// Virtual address space manipulation
+//==============================================================================
+
+iree_status_t iree_memory_view_reserve(iree_memory_view_flags_t flags,
+ iree_host_size_t total_length,
+ iree_allocator_t allocator,
+ void** out_base_address) {
+ *out_base_address = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status =
+ iree_allocator_malloc(allocator, total_length, out_base_address);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_memory_view_release(void* base_address, iree_host_size_t total_length,
+ iree_allocator_t allocator) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_allocator_free(allocator, base_address);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_memory_view_commit_ranges(
+ void* base_address, iree_host_size_t range_count,
+ const iree_byte_range_t* ranges, iree_memory_access_t initial_access) {
+ // No-op.
+ return iree_ok_status();
+}
+
+iree_status_t iree_memory_view_protect_ranges(void* base_address,
+ iree_host_size_t range_count,
+ const iree_byte_range_t* ranges,
+ iree_memory_access_t new_access) {
+ // No-op.
+ return iree_ok_status();
+}
+
+// IREE_ELF_CLEAR_CACHE can be defined externally to override this default
+// behavior.
+#if !defined(IREE_ELF_CLEAR_CACHE)
+// __has_builtin was added in GCC 10, so just hard-code the availability
+// for < 10, special cased here so it can be dropped once no longer needed.
+#if defined __GNUC__ && __GNUC__ < 10
+#define IREE_ELF_CLEAR_CACHE(start, end) __builtin___clear_cache(start, end)
+#elif defined __has_builtin
+#if __has_builtin(__builtin___clear_cache)
+#define IREE_ELF_CLEAR_CACHE(start, end) __builtin___clear_cache(start, end)
+#endif // __builtin___clear_cache
+#endif // __has_builtin
+#endif // !defined(IREE_ELF_CLEAR_CACHE)
+
+#if !defined(IREE_ELF_CLEAR_CACHE)
+#error "no instruction cache clear implementation"
+#endif // !defined(IREE_ELF_CLEAR_CACHE)
+
+void iree_memory_view_flush_icache(void* base_address,
+ iree_host_size_t length) {
+ IREE_ELF_CLEAR_CACHE(base_address, base_address + length);
+}
+
+#endif // IREE_PLATFORM_GENERIC
diff --git a/runtime/src/iree/hal/local/elf/platform/linux.c b/runtime/src/iree/hal/local/elf/platform/linux.c
new file mode 100644
index 0000000..4dfc1ff
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/platform/linux.c
@@ -0,0 +1,164 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/elf/platform.h"
+
+#if defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_LINUX)
+
+#include <errno.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+//==============================================================================
+// Memory subsystem information and control
+//==============================================================================
+
+void iree_memory_query_info(iree_memory_info_t* out_info) {
+ memset(out_info, 0, sizeof(*out_info));
+
+ int page_size = sysconf(_SC_PAGESIZE);
+ out_info->normal_page_size = page_size;
+ out_info->normal_page_granularity = page_size;
+
+ // Large pages arent't currently used so we aren't introducing the build goo
+ // to detect and use them yet.
+ // https://linux.die.net/man/3/gethugepagesizes
+ // http://manpages.ubuntu.com/manpages/bionic/man3/gethugepagesize.3.html
+ // Would be:
+ // #include <hugetlbfs.h>
+ // out_info->large_page_granularity = gethugepagesize();
+ out_info->large_page_granularity = page_size;
+
+ out_info->can_allocate_executable_pages = true;
+}
+
+void iree_memory_jit_context_begin(void) {}
+
+void iree_memory_jit_context_end(void) {}
+
+//==============================================================================
+// Virtual address space manipulation
+//==============================================================================
+
+static int iree_memory_access_to_prot(iree_memory_access_t access) {
+ int prot = 0;
+ if (access & IREE_MEMORY_ACCESS_READ) prot |= PROT_READ;
+ if (access & IREE_MEMORY_ACCESS_WRITE) prot |= PROT_WRITE;
+ if (access & IREE_MEMORY_ACCESS_EXECUTE) prot |= PROT_EXEC;
+ return prot;
+}
+
+iree_status_t iree_memory_view_reserve(iree_memory_view_flags_t flags,
+ iree_host_size_t total_length,
+ iree_allocator_t allocator,
+ void** out_base_address) {
+ *out_base_address = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ int mmap_prot = PROT_NONE;
+ int mmap_flags = MAP_PRIVATE | MAP_ANON | MAP_NORESERVE;
+
+ iree_status_t status = iree_ok_status();
+ void* base_address = mmap(NULL, total_length, mmap_prot, mmap_flags, -1, 0);
+ if (base_address == MAP_FAILED) {
+ status = iree_make_status(iree_status_code_from_errno(errno),
+ "mmap reservation failed");
+ }
+
+ *out_base_address = base_address;
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_memory_view_release(void* base_address, iree_host_size_t total_length,
+ iree_allocator_t allocator) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // NOTE: return value ignored as this is a shutdown path.
+ munmap(base_address, total_length);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_memory_view_commit_ranges(
+ void* base_address, iree_host_size_t range_count,
+ const iree_byte_range_t* ranges, iree_memory_access_t initial_access) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ int mmap_prot = iree_memory_access_to_prot(initial_access);
+ int mmap_flags = MAP_PRIVATE | MAP_ANON | MAP_FIXED;
+
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < range_count; ++i) {
+ void* range_start = NULL;
+ iree_host_size_t aligned_length = 0;
+ iree_page_align_range(base_address, ranges[i], getpagesize(), &range_start,
+ &aligned_length);
+ void* result =
+ mmap(range_start, aligned_length, mmap_prot, mmap_flags, -1, 0);
+ if (result == MAP_FAILED) {
+ status = iree_make_status(iree_status_code_from_errno(errno),
+ "mmap commit failed");
+ break;
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_status_t iree_memory_view_protect_ranges(void* base_address,
+ iree_host_size_t range_count,
+ const iree_byte_range_t* ranges,
+ iree_memory_access_t new_access) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ int mmap_prot = iree_memory_access_to_prot(new_access);
+
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < range_count; ++i) {
+ void* range_start = NULL;
+ iree_host_size_t aligned_length = 0;
+ iree_page_align_range(base_address, ranges[i], getpagesize(), &range_start,
+ &aligned_length);
+ int ret = mprotect(range_start, aligned_length, mmap_prot);
+ if (ret != 0) {
+ status = iree_make_status(iree_status_code_from_errno(errno),
+ "mprotect failed");
+ break;
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// IREE_ELF_CLEAR_CACHE can be defined externally to override this default
+// behavior.
+#if !defined(IREE_ELF_CLEAR_CACHE)
+// __has_builtin was added in GCC 10, so just hard-code the availability
+// for < 10, special cased here so it can be dropped once no longer needed.
+#if defined __GNUC__ && __GNUC__ < 10
+#define IREE_ELF_CLEAR_CACHE(start, end) __builtin___clear_cache(start, end)
+#elif defined __has_builtin
+#if __has_builtin(__builtin___clear_cache)
+#define IREE_ELF_CLEAR_CACHE(start, end) __builtin___clear_cache(start, end)
+#endif // __builtin___clear_cache
+#endif // __has_builtin
+#endif // !defined(IREE_ELF_CLEAR_CACHE)
+
+#if !defined(IREE_ELF_CLEAR_CACHE)
+#error "no instruction cache clear implementation"
+#endif // !defined(IREE_ELF_CLEAR_CACHE)
+
+void iree_memory_view_flush_icache(void* base_address,
+ iree_host_size_t length) {
+ IREE_ELF_CLEAR_CACHE(base_address, base_address + length);
+}
+
+#endif // IREE_PLATFORM_*
diff --git a/runtime/src/iree/hal/local/elf/platform/windows.c b/runtime/src/iree/hal/local/elf/platform/windows.c
new file mode 100644
index 0000000..7d3b313
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/platform/windows.c
@@ -0,0 +1,152 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/elf/platform.h"
+
+#if defined(IREE_PLATFORM_WINDOWS)
+
+//==============================================================================
+// Memory subsystem information and control
+//==============================================================================
+
+void iree_memory_query_info(iree_memory_info_t* out_info) {
+ memset(out_info, 0, sizeof(*out_info));
+
+ SYSTEM_INFO system_info;
+ GetSystemInfo(&system_info);
+ out_info->normal_page_size = system_info.dwPageSize;
+ out_info->normal_page_granularity = system_info.dwAllocationGranularity;
+
+ out_info->large_page_granularity = GetLargePageMinimum();
+
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+ out_info->can_allocate_executable_pages = true;
+#else
+ // The application can define the `codeGeneration` property to enable use of
+ // PAGE_EXECUTE but cannot use PAGE_EXECUTE_READWRITE - it's still possible to
+ // make that work but it requires aliasing views (one with READWRITE and one
+ // with EXECUTE) and I'm not sure if anyone will ever care.
+ out_info->can_allocate_executable_pages = false;
+#endif // WINAPI_PARTITION_DESKTOP
+}
+
+void iree_memory_jit_context_begin(void) {}
+
+void iree_memory_jit_context_end(void) {}
+
+//==============================================================================
+// Virtual address space manipulation
+//==============================================================================
+
+// https://docs.microsoft.com/en-us/windows/win32/memory/memory-protection-constants
+static DWORD iree_memory_access_to_win32_page_flags(
+ iree_memory_access_t access) {
+ DWORD protect = 0;
+ if (access & IREE_MEMORY_ACCESS_EXECUTE) {
+ if (access & IREE_MEMORY_ACCESS_WRITE) {
+ protect |= PAGE_EXECUTE_READWRITE;
+ } else if (access & IREE_MEMORY_ACCESS_READ) {
+ protect |= PAGE_EXECUTE_READ;
+ } else {
+ protect |= PAGE_EXECUTE;
+ }
+ } else if (access & IREE_MEMORY_ACCESS_WRITE) {
+ protect |= PAGE_READWRITE;
+ } else if (access & IREE_MEMORY_ACCESS_READ) {
+ protect |= PAGE_READONLY;
+ } else {
+ protect |= PAGE_NOACCESS;
+ }
+ return protect;
+}
+
+iree_status_t iree_memory_view_reserve(iree_memory_view_flags_t flags,
+ iree_host_size_t total_length,
+ iree_allocator_t allocator,
+ void** out_base_address) {
+ *out_base_address = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status = iree_ok_status();
+
+ void* base_address =
+ VirtualAlloc(NULL, total_length, MEM_RESERVE, PAGE_NOACCESS);
+ if (base_address == NULL) {
+ status = iree_make_status(iree_status_code_from_win32_error(GetLastError()),
+ "VirtualAlloc failed to reserve");
+ }
+
+ *out_base_address = base_address;
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_memory_view_release(void* base_address, iree_host_size_t total_length,
+ iree_allocator_t allocator) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ // NOTE: return value ignored as this is a shutdown path.
+ VirtualFree(base_address, 0, MEM_RELEASE);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_memory_view_commit_ranges(
+ void* base_address, iree_host_size_t range_count,
+ const iree_byte_range_t* ranges, iree_memory_access_t initial_access) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ DWORD initial_protect =
+ iree_memory_access_to_win32_page_flags(initial_access);
+
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < range_count; ++i) {
+ if (!VirtualAlloc((uint8_t*)base_address + ranges[i].offset,
+ ranges[i].length, MEM_COMMIT, initial_protect)) {
+ status =
+ iree_make_status(iree_status_code_from_win32_error(GetLastError()),
+ "VirtualAlloc failed to commit");
+ break;
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_status_t iree_memory_view_protect_ranges(void* base_address,
+ iree_host_size_t range_count,
+ const iree_byte_range_t* ranges,
+ iree_memory_access_t new_access) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ DWORD new_protect = iree_memory_access_to_win32_page_flags(new_access);
+
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < range_count; ++i) {
+ uint8_t* range_address = (uint8_t*)base_address + ranges[i].offset;
+ DWORD old_protect = 0;
+ BOOL ret = VirtualProtect(range_address, ranges[i].length, new_protect,
+ &old_protect);
+ if (!ret) {
+ status =
+ iree_make_status(iree_status_code_from_win32_error(GetLastError()),
+ "VirtualProtect failed");
+ break;
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_memory_view_flush_icache(void* base_address,
+ iree_host_size_t length) {
+ FlushInstructionCache(GetCurrentProcess(), base_address, length);
+}
+
+#endif // IREE_PLATFORM_WINDOWS
diff --git a/runtime/src/iree/hal/local/elf/testdata/BUILD b/runtime/src/iree/hal/local/elf/testdata/BUILD
new file mode 100644
index 0000000..40f0124
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/testdata/BUILD
@@ -0,0 +1,21 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/embed_data:build_defs.bzl", "c_embed_data")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+c_embed_data(
+ name = "elementwise_mul",
+ srcs = glob(["elementwise_mul_*.so"]),
+ c_file_output = "elementwise_mul.c",
+ flatten = True,
+ h_file_output = "elementwise_mul.h",
+)
diff --git a/runtime/src/iree/hal/local/elf/testdata/CMakeLists.txt b/runtime/src/iree/hal/local/elf/testdata/CMakeLists.txt
new file mode 100644
index 0000000..4e53175
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/testdata/CMakeLists.txt
@@ -0,0 +1,27 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/local/elf/testdata/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+file(GLOB _GLOB_ELEMENTWISE_MUL_X_SO LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} CONFIGURE_DEPENDS elementwise_mul_*.so)
+iree_c_embed_data(
+ NAME
+ elementwise_mul
+ SRCS
+ "${_GLOB_ELEMENTWISE_MUL_X_SO}"
+ C_FILE_OUTPUT
+ "elementwise_mul.c"
+ H_FILE_OUTPUT
+ "elementwise_mul.h"
+ FLATTEN
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir
new file mode 100644
index 0000000..65bfa0f
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir
@@ -0,0 +1,74 @@
+// An elementwise multiply of two 4xf32 values:
+// %dst = arith.mulf %lhs, %rhs : tensor<4xf32>
+// This program could be that simple however this example demonstrates how to
+// perform workgroup-level tiling.
+//
+// Can be run with:
+// iree/hal/local/executable_library_benchmark \
+// --executable_format=EX_ELF \
+// --executable_file=iree/hal/local/elf/testdata/elementwise_mul_x86_64.so \
+// --entry_point=0 \
+// --workgroup_count_x=1 \
+// --workgroup_count_y=1 \
+// --workgroup_count_z=1 \
+// --workgroup_size_x=1 \
+// --workgroup_size_y=1 \
+// --workgroup_size_z=1 \
+// --binding=4xf32=1,2,3,4 \
+// --binding=4xf32=100,200,300,400 \
+// --binding=4xf32=0,0,0,0
+
+// lhs * rhs => dst / s0b0 * s0b1 => s0b2
+#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
+ #hal.descriptor_set.layout<0, bindings = [
+ #hal.descriptor_set.binding<0, storage_buffer>,
+ #hal.descriptor_set.binding<1, storage_buffer>,
+ #hal.descriptor_set.binding<2, storage_buffer>
+ ]>
+]>
+
+// A single executable source definition is allowed per translation in this mode
+// as linking and multi-executable embedding support requires our host-side IR.
+hal.executable.source public @ex {
+ // Exported functions are declared with the layout they use and may optionally
+ // contain other information - though when hand-authoring that's usually
+ // omitted.
+ //
+ // The ordinal is used to specify the entry point on command line tools and
+ // must be unique across all entry points within the same executable.
+ hal.executable.entry_point public @elementwise_mul ordinal(0) layout(#executable_layout)
+
+ // The inner module defining the executable. This may have any number of
+ // private functions and only those with declared entry points will be
+ // exported.
+ builtin.module {
+ func.func @elementwise_mul() {
+ %lhs = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(32) : !flow.dispatch.tensor<readonly:4xf32>
+ %rhs = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(32) : !flow.dispatch.tensor<readonly:4xf32>
+ %dst = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(32) : !flow.dispatch.tensor<writeonly:4xf32>
+ %workgroup_size_x = hal.interface.workgroup.size[0] : index
+ %workgroup_id_x = hal.interface.workgroup.id[0] : index
+ %workgroup_count_x = hal.interface.workgroup.count[0] : index
+ %base_i = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_x, %workgroup_size_x]
+ %step_i = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_x, %workgroup_size_x]
+ %end_i = arith.constant 4 : index
+ scf.for %i = %base_i to %end_i step %step_i {
+ %remaining = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 4)>(%i)[%workgroup_size_x]
+ %lhs_tile = flow.dispatch.tensor.load %lhs, offsets = [%i], sizes = [%remaining], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<?xf32>
+ %rhs_tile = flow.dispatch.tensor.load %rhs, offsets = [%i], sizes = [%remaining], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<?xf32>
+ %dst_init = linalg.init_tensor [%remaining] : tensor<?xf32>
+ %dst_tile = linalg.generic {
+ indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
+ iterator_types = ["parallel"]
+ } ins(%lhs_tile, %rhs_tile : tensor<?xf32>, tensor<?xf32>)
+ outs(%dst_init : tensor<?xf32>) {
+ ^bb0(%lhs_value: f32, %rhs_value: f32, %init_value: f32):
+ %dst_value = arith.mulf %lhs_value, %rhs_value : f32
+ linalg.yield %dst_value : f32
+ } -> tensor<?xf32>
+ flow.dispatch.tensor.store %dst_tile, %dst, offsets = [%i], sizes = [%remaining], strides = [1] : tensor<?xf32> -> !flow.dispatch.tensor<writeonly:4xf32>
+ }
+ return
+ }
+ }
+}
diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_arm_32.so b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_arm_32.so
new file mode 100644
index 0000000..e10b64b
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_arm_32.so
Binary files differ
diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_arm_64.so b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_arm_64.so
new file mode 100644
index 0000000..50e6fb6
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_arm_64.so
Binary files differ
diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_benchmark.txt b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_benchmark.txt
new file mode 100644
index 0000000..a8f1a46
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_benchmark.txt
@@ -0,0 +1,13 @@
+--executable_format=EX_ELF
+--executable_file=iree/hal/local/elf/testdata/elementwise_mul_x86_64.so
+--entry_point=0
+--workgroup_count_x=1
+--workgroup_count_y=1
+--workgroup_count_z=1
+--workgroup_size_x=1
+--workgroup_size_y=1
+--workgroup_size_z=1
+--max_concurrency=1
+--binding=4xf32=1,2,3,4
+--binding=4xf32=100,200,300,400
+--binding=4xf32=0,0,0,0
diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_riscv_32.so b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_riscv_32.so
new file mode 100644
index 0000000..602206c
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_riscv_32.so
Binary files differ
diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_riscv_64.so b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_riscv_64.so
new file mode 100644
index 0000000..99631b4
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_riscv_64.so
Binary files differ
diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_x86_32.so b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_x86_32.so
new file mode 100644
index 0000000..7f8d18c
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_x86_32.so
Binary files differ
diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_x86_64.so b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_x86_64.so
new file mode 100644
index 0000000..e534a22
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_x86_64.so
Binary files differ
diff --git a/runtime/src/iree/hal/local/elf/testdata/generate.sh b/runtime/src/iree/hal/local/elf/testdata/generate.sh
new file mode 100755
index 0000000..7c8df03
--- /dev/null
+++ b/runtime/src/iree/hal/local/elf/testdata/generate.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Updates the checked-in ELF files used for testing the ELF loader.
+# In general we try not to check in binary files however these files act as a
+# test of binary compatibility for artifacts users may have produced. If a
+# build break occurs here we know that we have broken compatibility. Today this
+# happens every few months as we are not yet binary-stable but in the future
+# will be a bigger issue.
+#
+# To use, ensure iree-compile and your compiled ld.lld are on your PATH and
+# run the script:
+# $ ./iree/hal/local/elf/testdata/generate.sh
+
+# Uncomment to see the iree-translate commands issued:
+# set -x
+set -e
+
+ROOT_DIR=$(git rev-parse --show-toplevel)
+TESTDATA="${ROOT_DIR}/iree/hal/local/elf/testdata"
+
+# $1: file name ("foo_arm_32.so")
+# $2: list of iree-translate arguments for targeting
+function compile_and_extract_library() {
+ local so_name=$1
+ shift
+ local translate_args=("$@")
+
+ echo "Updating ${TESTDATA}/${so_name}"
+
+ CMD=(
+ iree-translate
+ -iree-mlir-to-hal-executable
+ ${TESTDATA}/elementwise_mul.mlir
+ -o="${TESTDATA}/${so_name}"
+
+ -iree-hal-target-backends=dylib-llvm-aot
+ -iree-llvm-debug-symbols=false
+
+ "${translate_args[@]}"
+ )
+ "${CMD[@]}"
+}
+
+ARM_32=(
+ -iree-llvm-target-triple=armv7a-pc-linux-elf
+ -iree-llvm-target-float-abi=hard
+)
+compile_and_extract_library "elementwise_mul_arm_32.so" ${ARM_32[@]}
+
+ARM_64=(
+ -iree-llvm-target-triple=aarch64-pc-linux-elf
+)
+compile_and_extract_library "elementwise_mul_arm_64.so" ${ARM_64[@]}
+
+RISCV_32=(
+ -iree-llvm-target-triple=riscv32-pc-linux-elf
+ -iree-llvm-target-cpu=generic-rv32
+ -iree-llvm-target-cpu-features=+m,+f
+ -iree-llvm-target-abi=ilp32
+)
+compile_and_extract_library "elementwise_mul_riscv_32.so" ${RISCV_32[@]}
+
+RISCV_64=(
+ -iree-llvm-target-triple=riscv64-pc-linux-elf
+ -iree-llvm-target-cpu=generic-rv64
+ -iree-llvm-target-cpu-features=+m,+a,+f,+d,+c
+ -iree-llvm-target-abi=lp64d
+)
+compile_and_extract_library "elementwise_mul_riscv_64.so" ${RISCV_64[@]}
+
+X86_32=(
+ -iree-llvm-target-triple=i686-pc-linux-elf
+)
+compile_and_extract_library "elementwise_mul_x86_32.so" ${X86_32[@]}
+
+X86_64=(
+ -iree-llvm-target-triple=x86_64-pc-linux-elf
+)
+compile_and_extract_library "elementwise_mul_x86_64.so" ${X86_64[@]}
diff --git a/runtime/src/iree/hal/local/executable_environment.c b/runtime/src/iree/hal/local/executable_environment.c
new file mode 100644
index 0000000..cebe4e2
--- /dev/null
+++ b/runtime/src/iree/hal/local/executable_environment.c
@@ -0,0 +1,40 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/executable_environment.h"
+
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// iree_hal_processor_*_t
+//===----------------------------------------------------------------------===//
+
+void iree_hal_processor_query(iree_allocator_t temp_allocator,
+ iree_hal_processor_v0_t* out_processor) {
+ IREE_ASSERT_ARGUMENT(out_processor);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ memset(out_processor, 0, sizeof(*out_processor));
+
+ // TODO(benvanik): define processor features we want to query for each arch.
+ // This needs to be baked into the executable library API and made consistent
+ // with the compiler side producing the executables that access it.
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_environment_*_t
+//===----------------------------------------------------------------------===//
+
+void iree_hal_executable_environment_initialize(
+ iree_allocator_t temp_allocator,
+ iree_hal_executable_environment_v0_t* out_environment) {
+ IREE_ASSERT_ARGUMENT(out_environment);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ memset(out_environment, 0, sizeof(*out_environment));
+ iree_hal_processor_query(temp_allocator, &out_environment->processor);
+ IREE_TRACE_ZONE_END(z0);
+}
diff --git a/runtime/src/iree/hal/local/executable_environment.h b/runtime/src/iree/hal/local/executable_environment.h
new file mode 100644
index 0000000..b4d23ca
--- /dev/null
+++ b/runtime/src/iree/hal/local/executable_environment.h
@@ -0,0 +1,47 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_EXECUTABLE_ENVIRONMENT_H_
+#define IREE_HAL_LOCAL_EXECUTABLE_ENVIRONMENT_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/cpu.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/executable_library.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_hal_processor_*_t
+//===----------------------------------------------------------------------===//
+
+// Queries the current processor information and writes it to |out_processor|.
+// |temp_allocator| may be used for temporary allocations required while
+// querying. If the processor cannot be queried then |out_processor| will be
+// zeroed.
+void iree_hal_processor_query(iree_allocator_t temp_allocator,
+ iree_hal_processor_v0_t* out_processor);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_environment_*_t
+//===----------------------------------------------------------------------===//
+
+// Initializes |out_environment| to the default empty environment.
+// No imports will be available unless overridden during loading.
+// |temp_allocator| may be used for temporary allocations during initialization.
+void iree_hal_executable_environment_initialize(
+ iree_allocator_t temp_allocator,
+ iree_hal_executable_environment_v0_t* out_environment);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_EXECUTABLE_ENVIRONMENT_H_
diff --git a/runtime/src/iree/hal/local/executable_library.h b/runtime/src/iree/hal/local/executable_library.h
new file mode 100644
index 0000000..a579a6d
--- /dev/null
+++ b/runtime/src/iree/hal/local/executable_library.h
@@ -0,0 +1,446 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_EXECUTABLE_LIBRARY_H_
+#define IREE_HAL_LOCAL_EXECUTABLE_LIBRARY_H_
+
+// NOTE: this file is designed to be a standalone header: it is embedded in the
+// compiler and must not take any dependencies on the runtime HAL code.
+// Changes here will require changes to the compiler and must be versioned as if
+// this was a schema: backwards-incompatible changes require version bumps or
+// the ability to feature-detect at runtime.
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+//===----------------------------------------------------------------------===//
+// Common utilities included to reduce dependencies
+//===----------------------------------------------------------------------===//
+
+// `restrict` keyword, not supported by some older compilers.
+// We define our own macro in case dependencies use `restrict` differently.
+#if defined(_MSC_VER) && _MSC_VER >= 1900
+#define IREE_RESTRICT __restrict
+#elif defined(_MSC_VER)
+#define IREE_RESTRICT
+#elif defined(__cplusplus)
+#define IREE_RESTRICT __restrict__
+#else
+#define IREE_RESTRICT restrict
+#endif // _MSC_VER
+
+//===----------------------------------------------------------------------===//
+// Runtime feature support metadata
+//===----------------------------------------------------------------------===//
+
+// Defines a bitfield of features that the library requires or supports.
+enum iree_hal_executable_library_feature_bits_t {
+ IREE_HAL_EXECUTABLE_LIBRARY_FEATURE_NONE = 0u,
+ // TODO(benvanik): declare features for debugging/coverage/printf/etc.
+ // These will control which symbols are injected into the library at runtime.
+};
+typedef uint32_t iree_hal_executable_library_features_t;
+
+// Defines a set of supported sanitizers that libraries may be compiled with.
+// Loaders can use this declaration to check as to whether the library is
+// compatible with the hosting environment for cases where the sanitizer
+// requires host support.
+typedef enum iree_hal_executable_library_sanitizer_kind_e {
+ IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_NONE = 0,
+ // Indicates the library is compiled to use AddressSanitizer:
+ // https://clang.llvm.org/docs/AddressSanitizer.html
+ // Equivalent compiler flag: -fsanitize=address
+ IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_ADDRESS = 1,
+ // Indicates the library is compiled to use MemorySanitizer:
+ // https://clang.llvm.org/docs/MemorySanitizer.html
+ // Equivalent compiler flag: -fsanitize=memory
+ IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_MEMORY = 2,
+ // Indicates the library is compiled to use ThreadSanitizer:
+ // https://clang.llvm.org/docs/ThreadSanitizer.html
+ // Equivalent compiler flag: -fsanitize=thread
+ IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_THREAD = 3,
+ // Indicates the library is compiled to use UndefinedBehaviorSanitizer:
+ // https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html
+ // Equivalent compiler flag: -fsanitize=undefined
+ IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_UNDEFINED = 4,
+
+ IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_MAX_ENUM = INT32_MAX,
+} iree_hal_executable_library_sanitizer_kind_t;
+
+//===----------------------------------------------------------------------===//
+// Versioning and interface querying
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_executable_environment_v0_t
+ iree_hal_executable_environment_v0_t;
+
+// Version code indicating the minimum required runtime structures.
+// Runtimes cannot load executables with newer versions but may be able to load
+// older versions if backward compatibility is enabled.
+//
+// NOTE: until we hit v1 the versioning scheme here is not set in stone.
+// We may want to make this major release number, date codes (0x20220307),
+// or some semantic versioning we track in whatever spec we end up having.
+typedef uint32_t iree_hal_executable_library_version_t;
+
+#define IREE_HAL_EXECUTABLE_LIBRARY_VERSION_0_2 0x00000002u
+
+// The latest version of the library API; can be used to populate the
+// iree_hal_executable_library_header_t::version when building libraries.
+#define IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST \
+ IREE_HAL_EXECUTABLE_LIBRARY_VERSION_0_2
+
+// A header present at the top of all versions of the library API used by the
+// runtime to ensure version compatibility.
+typedef struct iree_hal_executable_library_header_t {
+ // Version of the API this library was built with, which was likely the value
+ // of IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST.
+ iree_hal_executable_library_version_t version;
+
+ // Name used for logging/diagnostics.
+ const char* name;
+
+ // Bitfield of features required/supported by this executable.
+ iree_hal_executable_library_features_t features;
+
+ // Which sanitizer the library is compiled to use, if any.
+ // Libraries meant for use with a particular sanitizer will are only usable
+ // with hosting code that is using the same sanitizer.
+ iree_hal_executable_library_sanitizer_kind_t sanitizer;
+} iree_hal_executable_library_header_t;
+
+// Exported function from dynamic libraries for querying library information.
+//
+// The provided |max_version| is the maximum version the caller supports;
+// callees must return NULL if their lowest available version is greater
+// than the max version supported by the caller.
+//
+// The provided |environment| field contains information about the hosting
+// execution environment that the executable may use to specialize its
+// implementation, such as using specific imports or exporting
+// architecture-specific dispatch routines. Some environmental properties may
+// change per-invocation such as the CPU info when performing dispatches on
+// heterogenous processors that may change over the lifetime of the program.
+typedef const iree_hal_executable_library_header_t** (
+ *iree_hal_executable_library_query_fn_t)(
+ iree_hal_executable_library_version_t max_version,
+ const iree_hal_executable_environment_v0_t* environment);
+
+// Function name exported from dynamic libraries (pass to dlsym).
+#define IREE_HAL_EXECUTABLE_LIBRARY_EXPORT_NAME \
+ "iree_hal_executable_library_query"
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_EXECUTABLE_LIBRARY_VERSION_0_*
+//===----------------------------------------------------------------------===//
+
+// Function signature of imported functions for use in the executable.
+// Each call takes opaque parameters as defined by the imported function.
+// Both the compiler and the runtime must agree on the parameter format
+// (including struct alignment and packing) and doing so is outside the scope
+// of this API. In general one should only pass precisely what they need
+// (pointers directly into buffers being manipulated, arguments, etc) and not
+// try to replicate the dispatch structure (workgroup information and bindings)
+// so that the imported functions can be versioned independently from this
+// specification.
+//
+// Returns 0 on success and non-zero on failure. Failures will cause device loss
+// and should only be used to communicate serious issues that should abort all
+// execution within the current device. Buffer overflows are a good example of
+// a useful failure though the HAL does not mandate that all overflows are
+// caught and only that they are not harmful - clamping byte ranges and never
+// returning a failure is sufficient.
+typedef int (*iree_hal_executable_import_v0_t)(void* import_params);
+
+// A thunk function used to call an import.
+// All imports must be called through this function by passing the import
+// function pointer as the first argument followed by the arguments of the
+// import function itself.
+typedef int (*iree_hal_executable_import_thunk_v0_t)(
+ iree_hal_executable_import_v0_t fn_ptr, void* import_params);
+
+// Declares imports available to the executable library at runtime.
+// To enable linker isolation, ABI shimming, and import multi-versioning we use
+// this import table exclusively and do not allow platform-level linking. If it
+// were allowed the deployment situation gets significantly more complex as the
+// libraries containing the imported symbols will differ on all platforms, will
+// have the platform-dependent ABI (Windows, MacOS, etc), and may not be
+// available at all (bare-metal).
+//
+// Static libraries may choose to still dynamically link against external
+// symbols without using this table as in that scenario much of the above
+// concerns do not apply: all code is being linked together into the same binary
+// and symbol availability is known during build-time linking. Static linking
+// also enables LTO to strip any import not used by any executables in contrast
+// to the dynamic style elsewhere.
+//
+// Represented as a struct-of-arrays for more efficient packing and more
+// locality during lookup. Each subarray - when not omitted and NULL - is
+// indexed by import ordinal and has up to |count| entries.
+typedef struct iree_hal_executable_import_table_v0_t {
+ // Total number of imports in the table.
+ uint32_t count;
+
+ // Import symbol name encoding the name and whether it is weak.
+ // Example: `mylib_some_fn_v2?`
+ // `mylib_...`:
+ // Prefix indicating the owner of the function; symbols have a global
+ // namespace and this is used to reduce collisions.
+ // `some_fn...`:
+ // Name of the function used to link to the imports available in the
+ // hosting executable.
+ // `..._v2`:
+ // Function-specified version number used to allow multiple versions to
+ // to be imported. For backward compatibility one could import both
+ // `some_fn_v1?` and `some_fn_v2?` and use whichever is available.
+ // Note that this is just a convention for the suffix and can be anything.
+ // `?`:
+ // Indicates when an import is optional. If the import of the specified
+ // version is not found the table entry will be NULL. When omitted if the
+ // import is unavailable loading will fail.
+ //
+ // The symbol table is sorted ascending alphabetical (by strcmp).
+ const char* const* symbols;
+} iree_hal_executable_import_table_v0_t;
+
+// Maximum number of data fields in iree_hal_processor_v0_t.
+#define IREE_HAL_PROCESSOR_DATA_CAPACITY_V0 8
+
+// Architecture-specific CPU information available to executables.
+// This encodes zero or more fields of opaque processor data.
+// The intent is that this structure can be put in .rodata when there are no
+// runtime features that need to be queried.
+//
+// The format of the data is architecture-specific as by construction no value
+// will ever be used in a compiled binary from another architecture. This
+// allows us to simplify this interface as we can't for example load the same
+// executable library for both aarch64 on riscv32 and don't need to normalize
+// any of the fields across them both.
+typedef struct iree_hal_processor_v0_t {
+ // Opaque architecture-specific encoding in 64-bit words.
+ // This may represent a fixed-length data structure, a series of hardware
+ // registers, or key-value pairs.
+ //
+ // The contents are opaque here as to support out-of-tree architectures. The
+ // runtime code deriving the identifier/flags and providing it here is losely
+ // coupled with the compiler code emitting checks based on the identifier and
+ // only those two places ever need to change.
+ uint64_t data[IREE_HAL_PROCESSOR_DATA_CAPACITY_V0];
+} iree_hal_processor_v0_t;
+static_assert(sizeof(iree_hal_processor_v0_t) % sizeof(uint64_t) == 0,
+ "8-byte alignment required");
+
+// Defines the environment in which the executable is being used.
+// Executables only have access to the information in this structure and must
+// make all decisions based on it; this ensures executables are portable across
+// operating environments (Linux, Mac, bare-metal, web, etc) by not having
+// platform-specific syscalls and register query emulation.
+typedef struct iree_hal_executable_environment_v0_t {
+ // Specialization constants available to the executable, if any.
+ // Contains as many as declared in the library header.
+ const uint32_t* constants;
+
+ // Thunk function for calling imports. All calls must be made through this.
+ iree_hal_executable_import_thunk_v0_t import_thunk;
+ // Optional imported functions available for use within the executable.
+ // Contains one entry per imported function. If an import was marked as weak
+ // then the corresponding entry may be NULL.
+ const iree_hal_executable_import_v0_t* imports;
+
+ // Optional architecture-specific CPU information.
+ // In heterogenous processors this may represent any of the subarchitecture
+ // types as it is derived from the core the calling thread is scheduled on.
+ // Will be all zeros if unavailable.
+ iree_hal_processor_v0_t processor;
+} iree_hal_executable_environment_v0_t;
+
+// Read-only per-dispatch state passed to each workgroup in a dispatch.
+//
+// We layout to try to fit everything commonly used into the first cache line
+// (on archs with 64-bit pointers; 32-bit fits in a single line).
+//
+// For workgroup dimensions we allow the full 32-bit range on X and Y as those
+// are the primary distribution dimensions. Z is the coarsest control and is
+// usually in the 1-16 range; any higher and it can pessimize scheduling. Almost
+// all GPUs also have this limitation (max Z of 65K) for the same reason.
+typedef struct iree_hal_executable_dispatch_state_v0_t {
+ // Workgroup size chosen for the dispatch. For compilation modes where the
+ // workgroup size is constant this may be ignored.
+ uint32_t workgroup_size_x;
+ uint32_t workgroup_size_y;
+ uint16_t workgroup_size_z;
+
+ // Total number of available 4 byte push constant values in |push_constants|.
+ uint16_t push_constant_count;
+
+ // Total workgroup count for the dispatch. This is sourced from either the
+ // original dispatch call (for iree_hal_command_buffer_dispatch) or the
+ // indirection buffer (for iree_hal_command_buffer_dispatch_indirect).
+ uint32_t workgroup_count_x;
+ uint32_t workgroup_count_y;
+ uint16_t workgroup_count_z;
+
+ // Estimated maximum concurrent workgroups; loosely maps to the number of
+ // processors allowed to execute the dispatch. The actual number will vary
+ // based on competing dispatches and dynamic executor configuration.
+ uint8_t max_concurrency;
+
+ // Total number of binding base pointers in |binding_ptrs| and
+ // |binding_lengths|. The set is packed densely based on which bindings are
+ // used (known at compile-time).
+ uint8_t binding_count;
+
+ // |push_constant_count| values.
+ const uint32_t* push_constants;
+ // Base pointers to each binding buffer.
+ void* const* binding_ptrs;
+ // The length of each binding in bytes, 1:1 with |binding_ptrs|.
+ const size_t* binding_lengths;
+
+ // NOTE: the above fields are frequently accessed and should be kept together
+ // to ensure cache-friendly behavior. The first instructions every dispatch
+ // executes are loads from the fields and we want to avoid a cascade of
+ // cache misses. Less-frequently used fields can follow.
+} iree_hal_executable_dispatch_state_v0_t;
+static_assert(sizeof(iree_hal_executable_dispatch_state_v0_t) <= 64,
+ "try keeping dispatch state small enough to fit in a cache line");
+
+// Read-only per-workgroup state passed to each workgroup in a dispatch.
+//
+// We layout to try to fit everything commonly used into the first cache line
+// (on archs with 64-bit pointers; 32-bit fits in a single line).
+typedef struct iree_hal_executable_workgroup_state_v0_t {
+ // Workgroup ID of the currently executing workgroup.
+ // This is in the range of 0-workgroup_count and each unique workgroup is to
+ // perform workgroup_size invocations.
+ uint32_t workgroup_id_x;
+ uint32_t workgroup_id_y;
+ uint16_t workgroup_id_z;
+
+ // Reserved for future use.
+ uint16_t reserved;
+
+ // Logical processor identifier used to index into processor info fields.
+ // Depending on the implementation this may be an ordinal, a bitfield, or an
+ // opaque unique identifier.
+ //
+ // NOTE: we could steal bits from the |processor_id| if needed; today the ID
+ // is the global ID but it really only needs to be within the current node
+ // (8-bits, or 16-bit for single-node thousand-core future proofing).
+ uint32_t processor_id;
+
+ // Scratch memory available for use by the workgroup.
+ // Requires a non-zero value to be specified for |local_memory_pages|; at
+ // least the size specified will be available. This memory is transient and
+ // exclusive to the workgroup. The provided pointer may be NULL if no
+ // workgroup local memory was requested.
+ void* local_memory;
+ // Total number of bytes available in |local_memory|. This may be larger than
+ // the requested amount.
+ uint32_t local_memory_size;
+
+ // +4 trailing bytes of free space
+} iree_hal_executable_workgroup_state_v0_t;
+static_assert(
+ sizeof(iree_hal_executable_workgroup_state_v0_t) <= 64,
+ "try keeping workgroup state small enough to fit in a cache line");
+
+// Function signature of exported executable entry points.
+// The same |environment| is passed to all dispatches.
+// The same |dispatch_state| is passed to all workgroups within a dispatch.
+// A unique |workgroup_state| is passed to every workgroup within a dispatch.
+//
+// Returns 0 on success and non-zero on failure. Failures will cause device loss
+// and should only be used to communicate serious issues that should abort all
+// execution within the current device. Buffer overflows are a good example of
+// a useful failure though the HAL does not mandate that all overflows are
+// caught and only that they are not harmful - clamping byte ranges and never
+// returning a failure is sufficient.
+typedef int (*iree_hal_executable_dispatch_v0_t)(
+ const iree_hal_executable_environment_v0_t* environment,
+ const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
+ const iree_hal_executable_workgroup_state_v0_t* workgroup_state);
+
+// Bytes per page of workgroup local memory.
+// This is chosen to match the common page size of devices.
+#define IREE_HAL_WORKGROUP_LOCAL_MEMORY_PAGE_SIZE 4096
+
+// Attributes for exported dispatch functions defining how they are to be
+// executed. 0 defaults are well-specified and the entire attributes table may
+// be omitted if no dispatch functions require these fields.
+typedef struct iree_hal_executable_dispatch_attrs_v0_t {
+ // Number of IREE_HAL_WORKGROUP_LOCAL_MEMORY_PAGE_SIZE byte pages (or 0)
+ // indicating how much workgroup local memory is required for the dispatch.
+ // This is the size of the buffer referenced by the `local_memory` argument.
+ uint16_t local_memory_pages;
+ // Must be 0. May be used in the future for flags controlling the dispatch
+ // behavior/synchronization requirements.
+ uint16_t reserved;
+} iree_hal_executable_dispatch_attrs_v0_t;
+static_assert(sizeof(iree_hal_executable_dispatch_attrs_v0_t) == 4, "uint32_t");
+
+// A table of exported functions arranged as a struct-of-arrays for more
+// efficient packing and faster lookup. Each subarray - when not omitted and
+// NULL - is indexed by export ordinal and has up to |count| entries.
+typedef struct iree_hal_executable_export_table_v0_t {
+ // Total number of exports in the table.
+ uint32_t count;
+
+ // Function pointers for each exported entry point.
+ const iree_hal_executable_dispatch_v0_t* ptrs;
+
+ // Optional table of attributes 1:1 with ptrs.
+ // Omitting the table entirely means that no exports need workgroup local
+ // memory (or whatever else we pack into the attributes).
+ const iree_hal_executable_dispatch_attrs_v0_t* attrs;
+
+ // Optional table of export function entry point names 1:1 with ptrs.
+ // These names are only used for tracing/debugging and can be omitted to save
+ // binary size.
+ const char* const* names;
+
+ // Optional table of entry point tags 1:1 with ptrs.
+ // Used to describe the entry point in a human-readable format useful for
+ // verbose logging. The string values, when present, may be attached to
+ // tracing/debugging events related to the entry point.
+ const char* const* tags;
+} iree_hal_executable_export_table_v0_t;
+
+// A table declaring the executable-level constants that can be used to
+// specialize the executable behavior.
+typedef struct iree_hal_executable_constant_table_v0_t {
+ // Total number of constants in the table.
+ uint32_t count;
+ // We could add more metadata here if we wanted to enable reflection.
+} iree_hal_executable_constant_table_v0_t;
+
+// Structure used for v0 library interfaces.
+// The entire structure is designed to be read-only and able to live embedded in
+// the binary .rdata section.
+//
+// The information held within the structure is not cached by the runtime.
+// Implementations may choose to heap allocate this structure and modify its
+// members at runtime so long as they observe the thread-safety guarantees.
+// For example, a JIT may default all exports to JIT thunk functions and then
+// atomically swap them out for the translated function pointers as they are
+// available.
+typedef struct iree_hal_executable_library_v0_t {
+ // Version/metadata header.
+ // Will have a version of IREE_HAL_EXECUTABLE_LIBRARY_VERSION_*.
+ const iree_hal_executable_library_header_t* header;
+
+ // Table of imported functions available to functions in the executable.
+ iree_hal_executable_import_table_v0_t imports;
+
+ // Table of exported functions from the executable.
+ iree_hal_executable_export_table_v0_t exports;
+
+ // Table of executable-level constants.
+ iree_hal_executable_constant_table_v0_t constants;
+} iree_hal_executable_library_v0_t;
+
+#endif // IREE_HAL_LOCAL_EXECUTABLE_LIBRARY_H_
diff --git a/runtime/src/iree/hal/local/executable_library_benchmark.c b/runtime/src/iree/hal/local/executable_library_benchmark.c
new file mode 100644
index 0000000..b20aa2b
--- /dev/null
+++ b/runtime/src/iree/hal/local/executable_library_benchmark.c
@@ -0,0 +1,335 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/file_io.h"
+#include "iree/base/internal/flags.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/executable_library.h"
+#include "iree/hal/local/executable_loader.h"
+#include "iree/hal/local/local_descriptor_set_layout.h"
+#include "iree/hal/local/local_executable.h"
+#include "iree/hal/local/local_executable_layout.h"
+#include "iree/testing/benchmark.h"
+
+IREE_FLAG(string, executable_format, "",
+ "Format of the executable file being loaded.");
+IREE_FLAG(string, executable_file, "",
+ "Path to the executable library file to load.");
+
+IREE_FLAG(int32_t, entry_point, 0, "Entry point ordinal to run.");
+
+IREE_FLAG(int32_t, workgroup_count_x, 1,
+ "X dimension of the workgroup count defining the number of\n"
+ "workgroup invocations that will be run per benchmark iteration.\n"
+ "This is the fastest-changing dimension.");
+IREE_FLAG(int32_t, workgroup_count_y, 1,
+ "Y dimension of the workgroup count defining the number of\n"
+ "workgroup invocations that will be run per benchmark iteration.");
+IREE_FLAG(int32_t, workgroup_count_z, 1,
+ "Z dimension of the workgroup count defining the number of\n"
+ "workgroup invocations that will be run per benchmark iteration.\n"
+ "This is the slowest-changing dimension.");
+IREE_FLAG(int32_t, workgroup_size_x, 1,
+ "X dimension of the workgroup size passed to the executable.");
+IREE_FLAG(int32_t, workgroup_size_y, 1,
+ "Y dimension of the workgroup size passed to the executable.");
+IREE_FLAG(int32_t, workgroup_size_z, 1,
+ "Z dimension of the workgroup size passed to the executable.");
+
+IREE_FLAG(int32_t, max_concurrency, 1,
+ "Maximum available concurrency exposed to the dispatch.");
+
+// Total number of bindings we (currently) allow any executable to have.
+#define IREE_HAL_LOCAL_MAX_TOTAL_BINDING_COUNT \
+ (IREE_HAL_LOCAL_MAX_DESCRIPTOR_SET_COUNT * \
+ IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT)
+
+// Parsed parameters from flags.
+// Used to construct the dispatch parameters for the benchmark invocation.
+struct {
+ int32_t push_constant_count;
+ union {
+ uint32_t ui32;
+ } push_constants[IREE_HAL_LOCAL_MAX_PUSH_CONSTANT_COUNT];
+
+ int32_t binding_count;
+ iree_string_view_t bindings[IREE_HAL_LOCAL_MAX_TOTAL_BINDING_COUNT];
+} dispatch_params = {
+ .push_constant_count = 0,
+ .binding_count = 0,
+};
+
+static iree_status_t parse_push_constant(iree_string_view_t flag_name,
+ void* storage,
+ iree_string_view_t value) {
+ IREE_ASSERT_LE(dispatch_params.push_constant_count + 1,
+ IREE_ARRAYSIZE(dispatch_params.push_constants),
+ "too many push constants");
+ dispatch_params.push_constants[dispatch_params.push_constant_count++].ui32 =
+ atoi(value.data);
+ return iree_ok_status();
+}
+static void print_push_constant(iree_string_view_t flag_name, void* storage,
+ FILE* file) {
+ if (dispatch_params.push_constant_count == 0) {
+ fprintf(file, "# --%.*s=[integer value]\n", (int)flag_name.size,
+ flag_name.data);
+ return;
+ }
+ for (int32_t i = 0; i < dispatch_params.push_constant_count; ++i) {
+ fprintf(file, "--%.*s=%u", (int)flag_name.size, flag_name.data,
+ dispatch_params.push_constants[i].ui32);
+ if (i < dispatch_params.push_constant_count - 1) {
+ fprintf(file, "\n");
+ }
+ }
+}
+IREE_FLAG_CALLBACK(parse_push_constant, print_push_constant, &dispatch_params,
+ push_constant_callback,
+ "Appends a uint32_t push constant value.\n");
+
+static iree_status_t parse_binding(iree_string_view_t flag_name, void* storage,
+ iree_string_view_t value) {
+ IREE_ASSERT_LE(dispatch_params.binding_count + 1,
+ IREE_ARRAYSIZE(dispatch_params.bindings), "too many bindings");
+ dispatch_params.bindings[dispatch_params.binding_count++] = value;
+ return iree_ok_status();
+}
+static void print_binding(iree_string_view_t flag_name, void* storage,
+ FILE* file) {
+ if (dispatch_params.binding_count == 0) {
+ fprintf(file, "# --%.*s=\"shapextype[=values]\"\n", (int)flag_name.size,
+ flag_name.data);
+ return;
+ }
+ for (int32_t i = 0; i < dispatch_params.binding_count; ++i) {
+ const iree_string_view_t binding_str = dispatch_params.bindings[i];
+ fprintf(file, "--%.*s=\"%.*s\"\n", (int)flag_name.size, flag_name.data,
+ (int)binding_str.size, binding_str.data);
+ }
+}
+IREE_FLAG_CALLBACK(
+ parse_binding, print_binding, &dispatch_params, binding,
+ "Appends a binding to the dispatch parameters.\n"
+ "Bindings are defined by their shape, element type, and their data.\n"
+ "Examples:\n"
+ " # 16 4-byte elements zero-initialized:\n"
+ " --binding=2x8xi32\n"
+ " # 10000 bytes all initialized to 123:\n"
+ " --binding=10000xi8=123\n"
+ " # 2 4-byte floating-point values with contents [[1.4], [2.1]]:\n"
+ " --binding=2x1xf32=1.4,2.1");
+
+#if defined(IREE_HAL_HAVE_EMBEDDED_LIBRARY_LOADER)
+#include "iree/hal/local/loaders/embedded_library_loader.h"
+#endif // IREE_HAL_HAVE_EMBEDDED_LIBRARY_LOADER
+
+// Creates an executable loader based on the given format flag.
+static iree_status_t iree_hal_executable_library_create_loader(
+ iree_allocator_t host_allocator,
+ iree_hal_executable_loader_t** out_executable_loader) {
+#if defined(IREE_HAL_HAVE_EMBEDDED_LIBRARY_LOADER)
+ if (strcmp(FLAG_executable_format, "EX_ELF") == 0) {
+ return iree_hal_embedded_library_loader_create(
+ iree_hal_executable_import_provider_null(), host_allocator,
+ out_executable_loader);
+ }
+#endif // IREE_HAL_HAVE_EMBEDDED_LIBRARY_LOADER
+ return iree_make_status(
+ IREE_STATUS_UNAVAILABLE,
+ "no loader available that can handle --executable_format=%s",
+ FLAG_executable_format);
+}
+
+// NOTE: error handling is here just for better diagnostics: it is not tracking
+// allocations correctly and will leak. Don't use this as an example for how to
+// write robust code.
+static iree_status_t iree_hal_executable_library_run(
+ const iree_benchmark_def_t* benchmark_def,
+ iree_benchmark_state_t* benchmark_state) {
+ iree_allocator_t host_allocator = benchmark_state->host_allocator;
+
+ // Register the loader used to load (or find) the executable.
+ iree_hal_executable_loader_t* executable_loader = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_executable_library_create_loader(
+ host_allocator, &executable_loader));
+
+ // Setup the specification used to perform the executable load.
+ // This information is normally used to select the appropriate loader but in
+ // this benchmark we only have a single one.
+ iree_hal_executable_params_t executable_params;
+ iree_hal_executable_params_initialize(&executable_params);
+ executable_params.caching_mode =
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ALLOW_OPTIMIZATION |
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ALIAS_PROVIDED_DATA |
+ IREE_HAL_EXECUTABLE_CACHING_MODE_DISABLE_VERIFICATION;
+ executable_params.executable_format =
+ iree_make_cstring_view(FLAG_executable_format);
+
+ // Load the executable data.
+ iree_file_contents_t* file_contents = NULL;
+ IREE_RETURN_IF_ERROR(iree_file_read_contents(FLAG_executable_file,
+ host_allocator, &file_contents));
+ executable_params.executable_data = file_contents->const_buffer;
+
+ // Setup the layouts defining how each entry point is interpreted.
+ // NOTE: we know for the embedded library loader that this is not required.
+ // Other loaders may need it in which case it'll have to be provided.
+ executable_params.executable_layout_count = 0;
+ executable_params.executable_layouts = NULL;
+
+ // Perform the load, which will fail if the executable cannot be loaded or
+ // there was an issue with the layouts.
+ iree_hal_executable_t* executable = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_executable_loader_try_load(
+ executable_loader, &executable_params, &executable));
+ iree_hal_local_executable_t* local_executable =
+ iree_hal_local_executable_cast(executable);
+
+ // Allocate workgroup-local memory that each invocation can use.
+ iree_byte_span_t local_memory = iree_make_byte_span(NULL, 0);
+ iree_host_size_t local_memory_size =
+ local_executable->dispatch_attrs
+ ? local_executable->dispatch_attrs[FLAG_entry_point]
+ .local_memory_pages *
+ IREE_HAL_WORKGROUP_LOCAL_MEMORY_PAGE_SIZE
+ : 0;
+ if (local_memory_size > 0) {
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc(
+ host_allocator, local_memory_size, (void**)&local_memory.data));
+ local_memory.data_length = local_memory_size;
+ }
+
+ // Allocate storage for buffers and populate them.
+ // They only need to remain valid for the duration of the invocation and all
+ // memory accessed by the invocation will come from here.
+ iree_hal_allocator_t* heap_allocator = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_allocator_create_heap(
+ iree_make_cstring_view("benchmark"), host_allocator, host_allocator,
+ &heap_allocator));
+ iree_hal_buffer_view_t* buffer_views[IREE_HAL_LOCAL_MAX_TOTAL_BINDING_COUNT];
+ void* binding_ptrs[IREE_HAL_LOCAL_MAX_TOTAL_BINDING_COUNT];
+ size_t binding_lengths[IREE_HAL_LOCAL_MAX_TOTAL_BINDING_COUNT];
+ for (iree_host_size_t i = 0; i < dispatch_params.binding_count; ++i) {
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_view_parse(
+ dispatch_params.bindings[i], heap_allocator, &buffer_views[i]));
+ iree_hal_buffer_t* buffer = iree_hal_buffer_view_buffer(buffer_views[i]);
+ iree_device_size_t buffer_length =
+ iree_hal_buffer_view_byte_length(buffer_views[i]);
+ iree_hal_buffer_mapping_t buffer_mapping = {{0}};
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range(
+ buffer, IREE_HAL_MAPPING_MODE_PERSISTENT,
+ IREE_HAL_MEMORY_ACCESS_READ | IREE_HAL_MEMORY_ACCESS_WRITE, 0,
+ buffer_length, &buffer_mapping));
+ binding_ptrs[i] = buffer_mapping.contents.data;
+ binding_lengths[i] = (size_t)buffer_mapping.contents.data_length;
+ }
+
+ // Setup dispatch state.
+ const iree_hal_executable_dispatch_state_v0_t dispatch_state = {
+ .workgroup_count_x = FLAG_workgroup_count_x,
+ .workgroup_count_y = FLAG_workgroup_count_y,
+ .workgroup_count_z = FLAG_workgroup_count_z,
+ .workgroup_size_x = FLAG_workgroup_size_x,
+ .workgroup_size_y = FLAG_workgroup_size_y,
+ .workgroup_size_z = FLAG_workgroup_size_z,
+ .max_concurrency = FLAG_max_concurrency,
+ .push_constant_count = dispatch_params.push_constant_count,
+ .push_constants = &dispatch_params.push_constants[0].ui32,
+ .binding_count = dispatch_params.binding_count,
+ .binding_ptrs = binding_ptrs,
+ .binding_lengths = binding_lengths,
+ };
+
+ // Execute benchmark the workgroup invocation.
+ // Note that each iteration runs through the whole grid as it's important that
+ // we are testing the memory access patterns: if we just ran the same single
+ // tile processing the same exact region of memory over and over we are not
+ // testing cache effects.
+ int64_t dispatch_count = 0;
+ while (iree_benchmark_keep_running(benchmark_state, /*batch_count=*/1)) {
+ IREE_RETURN_IF_ERROR(iree_hal_local_executable_issue_dispatch_inline(
+ local_executable, FLAG_entry_point, &dispatch_state, 0, local_memory));
+ ++dispatch_count;
+ }
+
+ // To get a total time per invocation we set the item count to the total
+ // invocations dispatched. That gives us both total dispatch and single
+ // invocation times in the reporter output.
+ int64_t total_invocations =
+ dispatch_count * dispatch_state.workgroup_count_x *
+ dispatch_state.workgroup_count_y * dispatch_state.workgroup_count_z;
+ iree_benchmark_set_items_processed(benchmark_state, total_invocations);
+
+ // Deallocate buffers.
+ for (iree_host_size_t i = 0; i < dispatch_params.binding_count; ++i) {
+ iree_hal_buffer_view_release(buffer_views[i]);
+ }
+ iree_hal_allocator_release(heap_allocator);
+
+ // Unload.
+ iree_hal_executable_release(executable);
+ iree_hal_executable_loader_release(executable_loader);
+ iree_file_contents_free(file_contents);
+
+ return iree_ok_status();
+}
+
+int main(int argc, char** argv) {
+ iree_flags_set_usage(
+ "executable_library_benchmark",
+ "Benchmarks a single entry point within an executable library.\n"
+ "Executable libraries can be found in your temp path when compiling\n"
+ "with `-iree-llvm-keep-linker-artifacts`. The parameters used can be\n"
+ "inferred from the entry point `hal.interface` and dispatches to it.\n"
+ "\n"
+ "Note that this tool is intentionally low level: you must specify all\n"
+ "of the push constant/binding parameters precisely as they are expected\n"
+ "by the executable. `iree-benchmark-module` is the user-friendly\n"
+ "benchmarking tool while this one favors direct access to the\n"
+ "executables (bypassing all of the IREE VM, HAL APIs, task system,\n"
+ "etc).\n"
+ "\n"
+ "Example --flagfile:\n"
+ " --executable_format=EX_ELF\n"
+ " --executable_file=iree/hal/local/elf/testdata/"
+ "elementwise_mul_x86_64.so\n"
+ " --entry_point=0\n"
+ " --workgroup_count_x=1\n"
+ " --workgroup_count_y=1\n"
+ " --workgroup_count_z=1\n"
+ " --workgroup_size_x=1\n"
+ " --workgroup_size_y=1\n"
+ " --workgroup_size_z=1\n"
+ " --binding=4xf32=1,2,3,4\n"
+ " --binding=4xf32=100,200,300,400\n"
+ " --binding=4xf32=0,0,0,0);\n"
+ "\n");
+
+ iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_UNDEFINED_OK, &argc, &argv);
+ iree_benchmark_initialize(&argc, argv);
+
+ // TODO(benvanik): override these with our own flags.
+ iree_benchmark_def_t benchmark_def = {
+ .flags = IREE_BENCHMARK_FLAG_MEASURE_PROCESS_CPU_TIME |
+ IREE_BENCHMARK_FLAG_USE_REAL_TIME,
+ .time_unit = IREE_BENCHMARK_UNIT_NANOSECOND,
+ .minimum_duration_ns = 0,
+ .iteration_count = 0,
+ .run = iree_hal_executable_library_run,
+ };
+ iree_benchmark_register(iree_make_cstring_view("dispatch"), &benchmark_def);
+
+ iree_benchmark_run_specified();
+ return 0;
+}
diff --git a/runtime/src/iree/hal/local/executable_library_benchmark.md b/runtime/src/iree/hal/local/executable_library_benchmark.md
new file mode 100644
index 0000000..e988fbd
--- /dev/null
+++ b/runtime/src/iree/hal/local/executable_library_benchmark.md
@@ -0,0 +1,223 @@
+executable_library_benchmark
+---
+
+Use `iree/hal/local/executable_library_benchmark --help` for more information.
+This tool is intended for CPU codegen developers only and cuts into the system
+at the lowest level possible: if you wish this was automated or easier to use
+then you should be looking elsewhere in the stack.
+
+The best inputs for this are those that result in a single dispatch function
+so that you don't have to look hard to figure out what all the flags are. As
+the fusion is compiler-driven this can be tricky to ensure.
+
+Keep in mind that in IREE the generated HAL executables and the functions they
+contain are an internal implementation detail of the compiler. Using this tool
+is effectively the same as taking some random assembly dump of a C program and
+trying to call one of the private functions inside of it: it's opaque,
+ever-changing, and unfriendly for a reason!
+
+---
+
+### Full example using the files checked in to the repo
+
+Start here to ensure you have a working build and see the expected output:
+
+```
+iree/hal/local/executable_library_benchmark \
+ --executable_format=EX_ELF \
+ --executable_file=iree/hal/local/elf/testdata/elementwise_mul_x86_64.so \
+ --entry_point=0 \
+ --workgroup_count_x=1 \
+ --workgroup_count_y=1 \
+ --workgroup_count_z=1 \
+ --workgroup_size_x=1 \
+ --workgroup_size_y=1 \
+ --workgroup_size_z=1 \
+ --binding=4xf32=1,2,3,4 \
+ --binding=4xf32=100,200,300,400 \
+ --binding=4xf32=0,0,0,0
+```
+
+```
+---------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations UserCounters...
+---------------------------------------------------------------------------------------------
+BM_dispatch/process_time/real_time 90.7 ns 90.9 ns 7739262 items_per_second=11.0312M/s
+```
+
+---
+
+It can be helpful to put the flags in flagfiles (newline separated):
+
+```
+iree/hal/local/executable_library_benchmark --flagfile=my_flags.txt
+```
+
+For an example, the flags for an x86-64 run of a simple element-wise multiply:
+
+```
+iree/hal/local/executable_library_benchmark --flagfile=iree/hal/local/testdata/elementwise_mul_benchmark.txt
+```
+
+---
+
+### Running standalone HAL executables
+
+This approach uses an explicitly specified HAL executable without any associated
+host code. When doing this the executable layout specifying the bindings and
+push constants is chosen by the user instead of being automatically derived by
+the compiler. The design of the layout can have performance implications and
+it's important to try to match the kind of layout the compiler would produce or
+ensure that what's being tested is relatively immune to the potential effects
+(having enough work per workgroup, etc).
+
+1. Hand-author a `hal.executable.source` op or extract a `hal.executable`
+
+See [iree/hal/local/testdata/elementwise_mul.mlir](iree/hal/local/testdata/elementwise_mul.mlir)
+for an example of the former that allows for the same source to be retargeted
+to many different formats/architectures.
+
+2. Translate the executable into the binary form consumed by the IREE loaders:
+
+```
+iree-translate \
+ -iree-mlir-to-hal-executable \
+ iree/hal/local/testdata/elementwise_mul.mlir \
+ -o=elementwise_mul.so \
+ -iree-hal-target-backends=dylib-llvm-aot \
+ -iree-llvm-debug-symbols=false \
+ -iree-llvm-target-triple=x86_64-pc-linux-elf
+```
+
+Note that the architecture and other related LLVM flags must be specified by the
+user. Some examples can be seen in [iree/hal/local/testdata/generate.sh](iree/hal/local/testdata/generate.sh).
+
+3. Setup flags
+
+Use the above example flagfile as a template or read below for details on how
+to map the parameters. You'll need to specify the executable file and entry
+point, the workgroup parameters, and any bindings and push constants used for
+I/O.
+
+---
+
+### Running executables from full user modules
+
+This approach extracts the embedded executable files contained within a full
+IREE module and allows for benchmarking of any of them by using the
+`--entry_point=` flag to select the executable. It's important to remember that
+the exact set of bindings and parameters are implementation details of the
+compiler and subject to change at any time - when using this approach one must
+inspect the IR to find the proper way to call their kernels.
+
+1. Build your module with the flags you want for your target architecture:
+
+```
+iree-compile \
+ -iree-input-type=mhlo \
+ iree/samples/simple_embedding/simple_embedding_test.mlir \
+ -o=module.vmfb \
+ -iree-hal-target-backends=dylib-llvm-aot \
+ -iree-llvm-debug-symbols=false \
+ -iree-llvm-target-triple=x86_64-pc-linux-elf \
+ -mlir-print-ir-after-all \
+ >module_dump.mlir 2>&1
+```
+
+This produces `module_dump.mlir` containing the IR at various stages.
+You'll need this to determine the flags used to invoke the dispatch.
+
+2. Extract the executable shared object from the module:
+
+```
+7z e -aoa -bb0 -y module.vmfb
+```
+
+This (today) results in a single extracted file you pass to the tool:
+
+```
+--executable_format=EX_ELF
+--executable_file=_simple_mul_dispatch_0_llvm_binary_ex_elf.so
+```
+
+3. Find `ResolveEntryPointOrdinalsPass` and look for the dispatch:
+
+```mlir
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer>
+ target(%3 : !hal.executable)[1]
+ workgroups([%c5, %c6, %c7])
+```
+
+This maps to the following flags defining the executable entry point and counts:
+
+```
+--entry_point=1
+--workgroup_count_x=5
+--workgroup_count_y=6
+--workgroup_count_z=7
+```
+
+4. Look up in the IR from that for where bindings are specified:
+
+```mlir
+ hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer>
+ layout(%0 : !hal.executable_layout)[%c0]
+ bindings([
+ %c0 = (%buffer : !hal.buffer)[%c0, %c16],
+ %c1 = (%buffer_0 : !hal.buffer)[%c0, %c16],
+ %c2 = (%buffer_1 : !hal.buffer)[%c0, %c16]
+ ])
+```
+
+This is 3 buffers of 16 bytes each, which is enough to call most things:
+
+```
+--binding=16xi8
+--binding=16xi8
+--binding=16xi8
+```
+
+If you want to provide real data then you can look for the `flow.executable`
+with the `!flow.dispatch.tensor` operands:
+
+```mlir
+ func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>,
+ %arg1: !flow.dispatch.tensor<readonly:4xf32>,
+ %arg2: !flow.dispatch.tensor<writeonly:4xf32>) {
+```
+
+Now we know each binding is 4 floats and can get more realistic test data:
+
+```
+--binding=4xf32=1,2,3,4
+--binding=4xf32=100,200,300,400
+--binding=4xf32=0,0,0,0
+```
+
+**Note that multiple tensors may alias to a single binding** - including
+tensors of differing data types. It's best to use the generic
+`[byte length]xi8` form above instead of trying to match the types in all but
+the most simple scenarios. You don't want to be using this tool to verify
+results and the only time it should matter what the value of the inputs are is
+if there is branching behavior inside the generated code itself. These are not
+good candidates for this tool.
+
+5. Look up in the IR to see the values of push constants, if required:
+
+```mlir
+ hal.command_buffer.push_constants<%cmd : !hal.command_buffer>
+ layout(%0 : !hal.executable_layout)
+ offset(0)
+ values(%c1, %c2, %c3, %c4) : i32, i32, i32, i32
+```
+
+These are often shape dimensions but by this point they are hard to guess if
+non-constant. This microbenchmarking approach is not generally suited for
+things like this but in cases where you know the meaning you can provide values:
+
+```
+--push_constant=1
+--push_constant=2
+--push_constant=3
+--push_constant=4
+```
diff --git a/runtime/src/iree/hal/local/executable_library_demo.c b/runtime/src/iree/hal/local/executable_library_demo.c
new file mode 100644
index 0000000..af18875
--- /dev/null
+++ b/runtime/src/iree/hal/local/executable_library_demo.c
@@ -0,0 +1,120 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/executable_library_demo.h"
+
+#include <stddef.h>
+
+// An executable entry point, called one or more times based on the 3D XYZ
+// workgroup count specified during the dispatch. Each invocation gets access to
+// the dispatch state via |dispatch_state| such as workgroup parameters, push
+// constants providing small arguments, and buffer bindings.
+//
+// See the iree_hal_executable_dispatch_state_v0_t struct for more
+// information on the fields here and how they can be used.
+//
+// WARNING: these functions must not access mutable global state: read-only data
+// may be used but as each invocation may be running concurrently with any
+// number of other invocations (from any number of user sessions!) all
+// communication between invocations must use the buffer bindings for I/O.
+//
+// This is a simple scalar addition:
+// binding[1] = binding[0] + push_constant[0]
+static int dispatch_tile_a(
+ const iree_hal_executable_environment_v0_t* environment,
+ const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
+ const iree_hal_executable_workgroup_state_v0_t* workgroup_state) {
+ const dispatch_tile_a_push_constants_t* push_constants =
+ (const dispatch_tile_a_push_constants_t*)dispatch_state->push_constants;
+ const float* src = ((const float*)dispatch_state->binding_ptrs[0]);
+ float* dst = ((float*)dispatch_state->binding_ptrs[1]);
+ const uint32_t x = workgroup_state->workgroup_id_x;
+ dst[x] = src[x] + push_constants->f0;
+ return 0;
+}
+
+// Just another entry point.
+static int dispatch_tile_b(
+ const iree_hal_executable_environment_v0_t* environment,
+ const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
+ const iree_hal_executable_workgroup_state_v0_t* workgroup_state) {
+ return 0;
+}
+
+// Version/metadata header.
+static const iree_hal_executable_library_header_t header = {
+ // Declares what library version is present: newer runtimes may support
+ // loading older executables but newer executables cannot load on older
+ // runtimes.
+ .version = IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST,
+ // Name used for logging/diagnostics and rendezvous.
+ .name = "demo_library",
+ .features = IREE_HAL_EXECUTABLE_LIBRARY_FEATURE_NONE,
+ .sanitizer = IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_NONE,
+};
+// Table of export function entry points.
+static const iree_hal_executable_dispatch_v0_t entry_points[2] = {
+ dispatch_tile_a,
+ dispatch_tile_b,
+};
+// Optional attributes for each dispatch function used by the runtime.
+// The table can be omitted if no attributes are non-zero. We don't use
+// local_memory in our dispatches here and don't need to specify the sizes.
+static const iree_hal_executable_dispatch_attrs_v0_t entry_attrs[2] = {
+ {
+ .local_memory_pages = 0,
+ },
+ {
+ .local_memory_pages = 0,
+ },
+};
+// Names for each entry point.
+static const char* entry_point_names[2] = {
+ "dispatch_tile_a",
+ "dispatch_tile_b",
+};
+// User tags for debugging/logging; not used for anything but presentation.
+static const char* entry_point_tags[2] = {
+ "matmul+div",
+ "conv2d[512x512]",
+};
+static const iree_hal_executable_library_v0_t library = {
+ .header = &header,
+ .imports =
+ {
+ .count = 0,
+ .symbols = NULL,
+ },
+ .exports =
+ {
+ .count = 2,
+ .ptrs = entry_points,
+ .attrs = entry_attrs,
+ .names = entry_point_names,
+ .tags = entry_point_tags,
+ },
+ .constants =
+ {
+ .count = 0,
+ },
+};
+
+// The primary access point to the executable: in a static library this is
+// just like any other C symbol that can be called from other code (like
+// executable_library_test.c does), and in dynamic libraries this is the symbol
+// that you would be dlsym'ing.
+//
+// This is just code: if the executable wants to return different headers based
+// on the currently executing architecture or the requested version it can. For
+// example, an executable may want to swap out a few entry points to an
+// architecture-specific version.
+const iree_hal_executable_library_header_t** demo_executable_library_query(
+ iree_hal_executable_library_version_t max_version,
+ const iree_hal_executable_environment_v0_t* environment) {
+ return max_version <= IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST
+ ? (const iree_hal_executable_library_header_t**)&library
+ : NULL;
+}
diff --git a/runtime/src/iree/hal/local/executable_library_demo.h b/runtime/src/iree/hal/local/executable_library_demo.h
new file mode 100644
index 0000000..f458768
--- /dev/null
+++ b/runtime/src/iree/hal/local/executable_library_demo.h
@@ -0,0 +1,53 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_EXECUTABLE_LIBRARY_DEMO_H_
+#define IREE_HAL_LOCAL_EXECUTABLE_LIBRARY_DEMO_H_
+
+#include <stdint.h>
+
+#include "iree/hal/local/executable_library.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Ideally we would have the IREE compiler generate a header like this so that
+// it's possible to manually call into executables. For now this is just an
+// example for the demo: the real HAL does not require this header as it
+// dlsym's the function pointer and packs the push constants itself.
+
+// Push constants used in the 'dispatch_tile_a' entry point.
+typedef union {
+ uint32_t values[1];
+ struct {
+ float f0;
+ };
+} dispatch_tile_a_push_constants_t;
+
+// Returns a simple demo library with the following structure:
+//
+// Name: 'demo_library'
+//
+// [0] 'dispatch_tile_a': matmul+div
+// push constants: 1 (dispatch_tile_a_push_constants_t)
+// bindings: 2
+// [0] = R
+// [1] = W
+//
+// [1] 'dispatch_tile_b': conv2d[512x512]
+// push constants: 0
+// bindings: 0
+//
+const iree_hal_executable_library_header_t** demo_executable_library_query(
+ iree_hal_executable_library_version_t max_version,
+ const iree_hal_executable_environment_v0_t* environment);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_EXECUTABLE_LIBRARY_DEMO_H_
diff --git a/runtime/src/iree/hal/local/executable_library_test.c b/runtime/src/iree/hal/local/executable_library_test.c
new file mode 100644
index 0000000..f925117
--- /dev/null
+++ b/runtime/src/iree/hal/local/executable_library_test.c
@@ -0,0 +1,124 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/executable_library.h"
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/local/executable_environment.h"
+#include "iree/hal/local/executable_library_demo.h"
+
+// Demonstration of the HAL-side of the iree_hal_executable_library_t ABI.
+// This is the lowest level of the system right before calling into generated
+// code.
+//
+// This shows what the various execution systems are doing (through a lot
+// of fancy means): all `inline_command_buffer.c` and `task_command_buffer.c`
+// lead up to just calling into the iree_hal_executable_dispatch_v0_t entry
+// point functions with a state structure and a workgroup XYZ.
+//
+// Below walks through acquiring the library pointer (which in this case is a
+// hand-coded example to show the codegen-side), setting up the I/O buffers and
+// state, and calling the function to do some math.
+//
+// See iree/hal/local/executable_library.h for more information.
+int main(int argc, char** argv) {
+ // Default environment.
+ iree_hal_executable_environment_v0_t environment;
+ iree_hal_executable_environment_initialize(iree_allocator_system(),
+ &environment);
+
+ // Query the library header at the requested version.
+ // The query call in this example is going into the handwritten demo code
+ // but could be targeted at generated files or runtime-loaded shared objects.
+ union {
+ const iree_hal_executable_library_header_t** header;
+ const iree_hal_executable_library_v0_t* v0;
+ } library;
+ library.header = demo_executable_library_query(
+ IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST, &environment);
+ IREE_ASSERT_NE(library.header, NULL, "version may not have matched");
+ const iree_hal_executable_library_header_t* header = *library.header;
+ IREE_ASSERT_NE(header, NULL, "version may not have matched");
+ IREE_ASSERT_LE(
+ header->version, IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST,
+ "expecting the library to have the same or older version as us");
+ IREE_ASSERT(strcmp(header->name, "demo_library") == 0,
+ "library name can be used to rendezvous in a registry");
+ IREE_ASSERT_GT(library.v0->exports.count, 0,
+ "expected at least one entry point");
+
+ // Push constants are an array of 4-byte values that are much more efficient
+ // to specify (no buffer pointer indirection) and more efficient to access
+ // (static struct offset address calculation, all fit in a few cache lines,
+ // etc). They are limited in capacity, though, so only <=64(ish) are usable.
+ dispatch_tile_a_push_constants_t push_constants;
+ memset(&push_constants, 0, sizeof(push_constants));
+ push_constants.f0 = 5.0f;
+
+ // Setup the two buffer bindings the entry point is expecting.
+ // They only need to remain valid for the duration of the invocation and all
+ // memory accessed by the invocation will come from here.
+ float arg0[4] = {1.0f, 2.0f, 3.0f, 4.0f};
+ float ret0[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+ const float ret0_expected[4] = {6.0f, 7.0f, 8.0f, 9.0f};
+ size_t binding_lengths[2] = {
+ sizeof(arg0),
+ sizeof(ret0),
+ };
+ void* binding_ptrs[2] = {
+ arg0,
+ ret0,
+ };
+
+ // Resolve the entry point by ordinal.
+ const iree_hal_executable_dispatch_v0_t entry_fn_ptr =
+ library.v0->exports.ptrs[0];
+
+ // Dispatch each workgroup with the same state.
+ const iree_hal_executable_dispatch_state_v0_t dispatch_state = {
+ .workgroup_count_x = 4,
+ .workgroup_count_y = 1,
+ .workgroup_count_z = 1,
+ .workgroup_size_x = 1,
+ .workgroup_size_y = 1,
+ .workgroup_size_z = 1,
+ .max_concurrency = 1,
+ .push_constant_count = IREE_ARRAYSIZE(push_constants.values),
+ .push_constants = push_constants.values,
+ .binding_count = IREE_ARRAYSIZE(binding_ptrs),
+ .binding_ptrs = binding_ptrs,
+ .binding_lengths = binding_lengths,
+ };
+ iree_hal_executable_workgroup_state_v0_t workgroup_state = {
+ .processor_id = iree_cpu_query_processor_id(),
+ };
+ for (uint32_t z = 0; z < dispatch_state.workgroup_count_z; ++z) {
+ workgroup_state.workgroup_id_z = z;
+ for (uint32_t y = 0; y < dispatch_state.workgroup_count_y; ++y) {
+ workgroup_state.workgroup_id_y = y;
+ for (uint32_t x = 0; x < dispatch_state.workgroup_count_x; ++x) {
+ workgroup_state.workgroup_id_x = x;
+ // Invoke the workgroup (x, y, z).
+ int ret = entry_fn_ptr(&environment, &dispatch_state, &workgroup_state);
+ IREE_ASSERT_EQ(
+ ret, 0,
+ "if we have bounds checking enabled the executable will signal "
+ "us of badness");
+ }
+ }
+ }
+
+ // Ensure it worked.
+ bool all_match = true;
+ for (size_t i = 0; i < IREE_ARRAYSIZE(ret0_expected); ++i) {
+ IREE_ASSERT_EQ(ret0[i], ret0_expected[i], "math is hard");
+ all_match = all_match && ret0[i] == ret0_expected[i];
+ }
+ return all_match ? 0 : 1;
+}
diff --git a/runtime/src/iree/hal/local/executable_loader.c b/runtime/src/iree/hal/local/executable_loader.c
new file mode 100644
index 0000000..0703a9d
--- /dev/null
+++ b/runtime/src/iree/hal/local/executable_loader.c
@@ -0,0 +1,100 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/executable_loader.h"
+
+iree_status_t iree_hal_executable_import_provider_resolve(
+ const iree_hal_executable_import_provider_t import_provider,
+ iree_string_view_t symbol_name, void** out_fn_ptr) {
+ IREE_ASSERT_ARGUMENT(out_fn_ptr);
+ *out_fn_ptr = NULL;
+
+ // A `?` suffix indicates the symbol is weakly linked and can be NULL.
+ bool is_weak = false;
+ if (iree_string_view_ends_with(symbol_name, iree_make_cstring_view("?"))) {
+ is_weak = true;
+ symbol_name = iree_string_view_substr(symbol_name, 0, symbol_name.size - 1);
+ }
+
+ // Note that it's fine for there to be no registered provider if all symbols
+ // are weak.
+ if (import_provider.resolve == NULL) {
+ if (is_weak) return iree_ok_status();
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "no import provider registered for resolving "
+ "executable imports (while try to resolve %.*s)",
+ (int)symbol_name.size, symbol_name.data);
+ }
+
+ iree_status_t status =
+ import_provider.resolve(import_provider.self, symbol_name, out_fn_ptr);
+ if (!iree_status_is_ok(status) && is_weak) {
+ status = iree_status_ignore(status); // ok to fail on weak symbols
+ }
+
+ return status;
+}
+
+void iree_hal_executable_loader_initialize(
+ const void* vtable, iree_hal_executable_import_provider_t import_provider,
+ iree_hal_executable_loader_t* out_base_loader) {
+ iree_atomic_ref_count_init(&out_base_loader->ref_count);
+ out_base_loader->vtable = vtable;
+ out_base_loader->import_provider = import_provider;
+}
+
+void iree_hal_executable_loader_retain(
+ iree_hal_executable_loader_t* executable_loader) {
+ if (IREE_LIKELY(executable_loader)) {
+ iree_atomic_ref_count_inc(&executable_loader->ref_count);
+ }
+}
+
+void iree_hal_executable_loader_release(
+ iree_hal_executable_loader_t* executable_loader) {
+ if (IREE_LIKELY(executable_loader) &&
+ iree_atomic_ref_count_dec(&executable_loader->ref_count) == 1) {
+ executable_loader->vtable->destroy(executable_loader);
+ }
+}
+
+bool iree_hal_executable_loader_query_support(
+ iree_hal_executable_loader_t* executable_loader,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format) {
+ IREE_ASSERT_ARGUMENT(executable_loader);
+ return executable_loader->vtable->query_support(
+ executable_loader, caching_mode, executable_format);
+}
+
+bool iree_hal_query_any_executable_loader_support(
+ iree_host_size_t loader_count, iree_hal_executable_loader_t** loaders,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format) {
+ IREE_ASSERT_ARGUMENT(loaders);
+ for (iree_host_size_t i = 0; i < loader_count; ++i) {
+ if (iree_hal_executable_loader_query_support(loaders[i], caching_mode,
+ executable_format)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+iree_status_t iree_hal_executable_loader_try_load(
+ iree_hal_executable_loader_t* executable_loader,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable) {
+ IREE_ASSERT_ARGUMENT(executable_loader);
+ IREE_ASSERT_ARGUMENT(executable_params);
+ IREE_ASSERT_ARGUMENT(!executable_params->executable_layout_count ||
+ executable_params->executable_layouts);
+ IREE_ASSERT_ARGUMENT(!executable_params->executable_data.data_length ||
+ executable_params->executable_data.data);
+ IREE_ASSERT_ARGUMENT(out_executable);
+ return executable_loader->vtable->try_load(executable_loader,
+ executable_params, out_executable);
+}
diff --git a/runtime/src/iree/hal/local/executable_loader.h b/runtime/src/iree/hal/local/executable_loader.h
new file mode 100644
index 0000000..ae8f6dc
--- /dev/null
+++ b/runtime/src/iree/hal/local/executable_loader.h
@@ -0,0 +1,149 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_EXECUTABLE_LOADER_H_
+#define IREE_HAL_LOCAL_EXECUTABLE_LOADER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/atomics.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_import_provider_t
+//===----------------------------------------------------------------------===//
+
+// Interface used to resolve executable imports at load-time.
+// This virtualizes some external provider and does not take ownership of the
+// instance: callers must ensure that the provider remains valid for the
+// lifetime of the executable loader that it is providing for.
+typedef struct iree_hal_executable_import_provider_t {
+ // TODO(benvanik): version field.
+ IREE_API_UNSTABLE
+
+ // User-defined pointer passed to all functions.
+ void* self;
+
+ // Resolves an import symbol with the given |symbol_name| and stores a pointer
+ // to the function (or its context) in |out_fn_ptr|.
+ iree_status_t(IREE_API_PTR* resolve)(void* self,
+ iree_string_view_t symbol_name,
+ void** out_fn_ptr);
+} iree_hal_executable_import_provider_t;
+
+static inline iree_hal_executable_import_provider_t
+iree_hal_executable_import_provider_null() {
+ iree_hal_executable_import_provider_t provider = {NULL, NULL};
+ return provider;
+}
+
+// Resolves an import symbol with the given |symbol_name| and stores a pointer
+// to the function (or its context) in |out_fn_ptr|.
+//
+// A |symbol_name| ending in `?` indicates that the symbol is weak and is
+// allowed to be resolved to NULL. Such cases will always return OK.
+iree_status_t iree_hal_executable_import_provider_resolve(
+ const iree_hal_executable_import_provider_t import_provider,
+ iree_string_view_t symbol_name, void** out_fn_ptr);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_loader_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_executable_loader_vtable_t
+ iree_hal_executable_loader_vtable_t;
+
+// Interface for compiled executable loader implementations.
+// A loader may be as simple as something that resolves function pointers in the
+// local executable for statically linked executables or as complex as a custom
+// relocatable ELF loader. Loaders are registered and persist for each device
+// they are attached to and may keep internal caches or memoize resources shared
+// by multiple loaded executables.
+//
+// Thread-safe - multiple threads may load executables (including the *same*
+// executable) simultaneously.
+typedef struct iree_hal_executable_loader_t {
+ iree_atomic_ref_count_t ref_count;
+ const iree_hal_executable_loader_vtable_t* vtable;
+ iree_hal_executable_import_provider_t import_provider;
+} iree_hal_executable_loader_t;
+
+// Initializes the base iree_hal_executable_loader_t type.
+// Called by subclasses upon allocating their loader.
+void iree_hal_executable_loader_initialize(
+ const void* vtable, iree_hal_executable_import_provider_t import_provider,
+ iree_hal_executable_loader_t* out_base_loader);
+
+// Retains the given |executable_loader| for the caller.
+void iree_hal_executable_loader_retain(
+ iree_hal_executable_loader_t* executable_loader);
+
+// Releases the given |executable_loader| from the caller.
+void iree_hal_executable_loader_release(
+ iree_hal_executable_loader_t* executable_loader);
+
+// Returns true if the loader can load executables of the given
+// |executable_format|. Note that loading may still fail if the executable uses
+// features not available on the current host or runtime.
+bool iree_hal_executable_loader_query_support(
+ iree_hal_executable_loader_t* executable_loader,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format);
+
+// Returns true if any loader in the list can load executables of the given
+// |executable_format|. Note that loading may still fail if the executable uses
+// features not available on the current host or runtime.
+bool iree_hal_query_any_executable_loader_support(
+ iree_host_size_t loader_count, iree_hal_executable_loader_t** loaders,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format);
+
+// Tries loading the executable data provided in the given format.
+// May fail even if the executable is valid if it requires features not
+// supported by the current host or runtime (such as available architectures,
+// imports, etc).
+//
+// Depending on loader ability the caching_mode is used to enable certain
+// features such as instrumented profiling. Not all formats support these
+// features and cooperation of both the compiler producing the executables and
+// the runtime loader and system are required.
+//
+// Returns IREE_STATUS_CANCELLED when the loader cannot load the file in the
+// given format.
+iree_status_t iree_hal_executable_loader_try_load(
+ iree_hal_executable_loader_t* executable_loader,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_loader_t implementation details
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_executable_loader_vtable_t {
+ void(IREE_API_PTR* destroy)(iree_hal_executable_loader_t* executable_loader);
+
+ bool(IREE_API_PTR* query_support)(
+ iree_hal_executable_loader_t* executable_loader,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format);
+
+ iree_status_t(IREE_API_PTR* try_load)(
+ iree_hal_executable_loader_t* executable_loader,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable);
+} iree_hal_executable_loader_vtable_t;
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_EXECUTABLE_LOADER_H_
diff --git a/runtime/src/iree/hal/local/inline_command_buffer.c b/runtime/src/iree/hal/local/inline_command_buffer.c
new file mode 100644
index 0000000..5b585d0
--- /dev/null
+++ b/runtime/src/iree/hal/local/inline_command_buffer.c
@@ -0,0 +1,553 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/inline_command_buffer.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/fpu_state.h"
+#include "iree/base/internal/math.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/executable_environment.h"
+#include "iree/hal/local/executable_library.h"
+#include "iree/hal/local/local_descriptor_set_layout.h"
+#include "iree/hal/local/local_executable.h"
+#include "iree/hal/local/local_executable_layout.h"
+
+//===----------------------------------------------------------------------===//
+// iree_hal_inline_command_buffer_t
+//===----------------------------------------------------------------------===//
+
+// Inline synchronous one-shot command "buffer".
+typedef struct iree_hal_inline_command_buffer_t {
+ iree_hal_command_buffer_t base;
+ iree_allocator_t host_allocator;
+
+ struct {
+ // A flattened list of all available descriptor set bindings.
+ // As descriptor sets are pushed/bound the bindings will be updated to
+ // represent the fully-translated binding data pointer.
+ //
+ // TODO(benvanik): support proper mapping semantics and track the
+ // iree_hal_buffer_mapping_t and map/unmap where appropriate.
+ void* full_bindings[IREE_HAL_LOCAL_MAX_DESCRIPTOR_SET_COUNT *
+ IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT];
+ size_t full_binding_lengths[IREE_HAL_LOCAL_MAX_DESCRIPTOR_SET_COUNT *
+ IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT];
+
+ // Packed bindings scratch space used during dispatch. Executable bindings
+ // are packed into a dense list with unused bindings removed.
+ void* packed_bindings[IREE_HAL_LOCAL_MAX_DESCRIPTOR_SET_COUNT *
+ IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT];
+ size_t packed_binding_lengths[IREE_HAL_LOCAL_MAX_DESCRIPTOR_SET_COUNT *
+ IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT];
+
+ // All available push constants updated each time push_constants is called.
+ // Reset only with the command buffer and otherwise will maintain its values
+ // during recording to allow for partial push_constants updates.
+ uint32_t push_constants[IREE_HAL_LOCAL_MAX_PUSH_CONSTANT_COUNT];
+
+ // Cached and initialized dispatch state reused for all dispatches.
+ // Individual dispatches must populate the dynamically changing fields like
+ // push_constant_count and binding_count.
+ iree_alignas(64) iree_hal_executable_dispatch_state_v0_t dispatch_state;
+
+ // An opaque tag used to reduce the cost of processor ID queries.
+ iree_cpu_processor_tag_t processor_tag;
+ // Guess at the current processor ID.
+ iree_cpu_processor_id_t processor_id;
+ } state;
+} iree_hal_inline_command_buffer_t;
+
+static const iree_hal_command_buffer_vtable_t
+ iree_hal_inline_command_buffer_vtable;
+
+static iree_hal_inline_command_buffer_t* iree_hal_inline_command_buffer_cast(
+ iree_hal_command_buffer_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_inline_command_buffer_vtable);
+ return (iree_hal_inline_command_buffer_t*)base_value;
+}
+
+static void iree_hal_inline_command_buffer_reset(
+ iree_hal_inline_command_buffer_t* command_buffer) {
+ memset(&command_buffer->state, 0, sizeof(command_buffer->state));
+
+ // Setup the cached dispatch state pointers that don't change.
+ iree_hal_executable_dispatch_state_v0_t* dispatch_state =
+ &command_buffer->state.dispatch_state;
+ dispatch_state->push_constants = command_buffer->state.push_constants;
+ dispatch_state->binding_ptrs = command_buffer->state.packed_bindings;
+ dispatch_state->binding_lengths =
+ command_buffer->state.packed_binding_lengths;
+}
+
+iree_status_t iree_hal_inline_command_buffer_create(
+ iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_allocator_t host_allocator,
+ iree_hal_command_buffer_t** out_command_buffer) {
+ IREE_ASSERT_ARGUMENT(out_command_buffer);
+ *out_command_buffer = NULL;
+ if (!iree_all_bits_set(
+ mode, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT |
+ IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION)) {
+ // This implementation only supports command buffers that are allowed to
+ // execute inline. This mode is a contract with the caller that it is ok if
+ // we begin executing prior to submission.
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "inline command buffers must have a mode with ALLOW_INLINE_EXECUTION");
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_inline_command_buffer_t* command_buffer = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ host_allocator, sizeof(*command_buffer), (void**)&command_buffer);
+ if (iree_status_is_ok(status)) {
+ iree_hal_command_buffer_initialize(
+ device, mode, command_categories, queue_affinity,
+ &iree_hal_inline_command_buffer_vtable, &command_buffer->base);
+ command_buffer->host_allocator = host_allocator;
+ iree_hal_inline_command_buffer_reset(command_buffer);
+
+ *out_command_buffer = &command_buffer->base;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_inline_command_buffer_destroy(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_inline_command_buffer_t* command_buffer =
+ iree_hal_inline_command_buffer_cast(base_command_buffer);
+ iree_allocator_t host_allocator = command_buffer->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_inline_command_buffer_reset(command_buffer);
+ iree_allocator_free(host_allocator, command_buffer);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+bool iree_hal_inline_command_buffer_isa(
+ iree_hal_command_buffer_t* command_buffer) {
+ return iree_hal_command_buffer_dyn_cast(
+ command_buffer, &iree_hal_inline_command_buffer_vtable);
+}
+
+static void* iree_hal_inline_command_buffer_dyn_cast(
+ iree_hal_command_buffer_t* command_buffer, const void* vtable) {
+ if (vtable == &iree_hal_inline_command_buffer_vtable) {
+ IREE_HAL_ASSERT_TYPE(command_buffer, vtable);
+ return command_buffer;
+ }
+ return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_inline_command_buffer_t recording
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_inline_command_buffer_flush_tasks(
+ iree_hal_inline_command_buffer_t* command_buffer);
+
+// Updates the cached processor ID field in the command buffer.
+static void iree_hal_inline_command_buffer_update_processor_id(
+ iree_hal_inline_command_buffer_t* command_buffer) {
+ iree_cpu_requery_processor_id(&command_buffer->state.processor_tag,
+ &command_buffer->state.processor_id);
+}
+
+static iree_status_t iree_hal_inline_command_buffer_begin(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_inline_command_buffer_t* command_buffer =
+ iree_hal_inline_command_buffer_cast(base_command_buffer);
+ iree_hal_inline_command_buffer_reset(command_buffer);
+
+ // Query the processor ID we start out on. We may update it during execution.
+ iree_hal_inline_command_buffer_update_processor_id(command_buffer);
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_inline_command_buffer_end(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_inline_command_buffer_t* command_buffer =
+ iree_hal_inline_command_buffer_cast(base_command_buffer);
+ iree_hal_inline_command_buffer_reset(command_buffer);
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_inline_command_buffer_t debug utilities
+//===----------------------------------------------------------------------===//
+
+static void iree_hal_inline_command_buffer_begin_debug_group(
+ iree_hal_command_buffer_t* base_command_buffer, iree_string_view_t label,
+ iree_hal_label_color_t label_color,
+ const iree_hal_label_location_t* location) {
+ // TODO(benvanik): tracy event stack.
+}
+
+static void iree_hal_inline_command_buffer_end_debug_group(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ // TODO(benvanik): tracy event stack.
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_execution_barrier
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_inline_command_buffer_execution_barrier(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_hal_execution_barrier_flags_t flags,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ // No-op; we execute synchronously.
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_signal_event
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_inline_command_buffer_signal_event(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ // No-op; we execute synchronously.
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_reset_event
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_inline_command_buffer_reset_event(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ // No-op; we execute synchronously.
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_wait_events
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_inline_command_buffer_wait_events(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_host_size_t event_count, const iree_hal_event_t** events,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ // No-op; we execute synchronously.
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_discard_buffer
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_inline_command_buffer_discard_buffer(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_buffer_t* buffer) {
+ // Could be treated as a cache invalidation as it indicates we won't be using
+ // the existing buffer contents again.
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_fill_buffer
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_inline_command_buffer_fill_buffer(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length, const void* pattern,
+ iree_host_size_t pattern_length) {
+ return iree_hal_buffer_map_fill(target_buffer, target_offset, length, pattern,
+ pattern_length);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_update_buffer
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_inline_command_buffer_update_buffer(
+ iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
+ iree_host_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length) {
+ return iree_hal_buffer_map_write(
+ target_buffer, target_offset,
+ (const uint8_t*)source_buffer + source_offset, length);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_copy_buffer
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_inline_command_buffer_copy_buffer(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length) {
+ return iree_hal_buffer_map_copy(source_buffer, source_offset, target_buffer,
+ target_offset, length);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_push_constants
+//===----------------------------------------------------------------------===//
+// NOTE: command buffer state change only; enqueues no tasks.
+
+static iree_status_t iree_hal_inline_command_buffer_push_constants(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, iree_host_size_t offset,
+ const void* values, iree_host_size_t values_length) {
+ iree_hal_inline_command_buffer_t* command_buffer =
+ iree_hal_inline_command_buffer_cast(base_command_buffer);
+
+ if (IREE_UNLIKELY(offset + values_length >=
+ sizeof(command_buffer->state.push_constants))) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "push constant range %zu (length=%zu) out of range",
+ offset, values_length);
+ }
+
+ memcpy((uint8_t*)&command_buffer->state.push_constants + offset, values,
+ values_length);
+
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_push_descriptor_set
+//===----------------------------------------------------------------------===//
+// NOTE: command buffer state change only; enqueues no tasks.
+
+static iree_status_t iree_hal_inline_command_buffer_push_descriptor_set(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings) {
+ iree_hal_inline_command_buffer_t* command_buffer =
+ iree_hal_inline_command_buffer_cast(base_command_buffer);
+
+ if (IREE_UNLIKELY(set >= IREE_HAL_LOCAL_MAX_DESCRIPTOR_SET_COUNT)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "set %u out of bounds", set);
+ }
+
+ iree_host_size_t binding_base =
+ set * IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT;
+ for (iree_host_size_t i = 0; i < binding_count; ++i) {
+ if (IREE_UNLIKELY(bindings[i].binding >=
+ IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "buffer binding index out of bounds");
+ }
+ iree_host_size_t binding_ordinal = binding_base + bindings[i].binding;
+
+ // TODO(benvanik): track mapping so we can properly map/unmap/flush/etc.
+ iree_hal_buffer_mapping_t buffer_mapping = {{0}};
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range(
+ bindings[i].buffer, IREE_HAL_MAPPING_MODE_PERSISTENT,
+ IREE_HAL_MEMORY_ACCESS_ANY, bindings[i].offset, bindings[i].length,
+ &buffer_mapping));
+ command_buffer->state.full_bindings[binding_ordinal] =
+ buffer_mapping.contents.data;
+ command_buffer->state.full_binding_lengths[binding_ordinal] =
+ buffer_mapping.contents.data_length;
+ }
+
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_bind_descriptor_set
+//===----------------------------------------------------------------------===//
+// NOTE: command buffer state change only; enqueues no tasks.
+
+static iree_status_t iree_hal_inline_command_buffer_bind_descriptor_set(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_hal_descriptor_set_t* descriptor_set,
+ iree_host_size_t dynamic_offset_count,
+ const iree_device_size_t* dynamic_offsets) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "descriptor set binding not yet implemented");
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_dispatch
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_inline_command_buffer_dispatch(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z) {
+ iree_hal_inline_command_buffer_t* command_buffer =
+ iree_hal_inline_command_buffer_cast(base_command_buffer);
+
+ iree_hal_local_executable_t* local_executable =
+ iree_hal_local_executable_cast(executable);
+ iree_hal_local_executable_layout_t* local_layout =
+ local_executable->executable_layouts[entry_point];
+ iree_host_size_t local_memory_size =
+ local_executable->dispatch_attrs
+ ? local_executable->dispatch_attrs[entry_point].local_memory_pages *
+ IREE_HAL_WORKGROUP_LOCAL_MEMORY_PAGE_SIZE
+ : 0;
+
+ // Update the ID of the processor we are running on.
+ // We don't know how much time has passed since we last updated as we are
+ // running inline with the user program; if we knew we were going to be
+ // handling a batch of dispatches we could reduce the amount of times we call
+ // this - but that's what the task system is for.
+ iree_hal_inline_command_buffer_update_processor_id(command_buffer);
+
+ iree_hal_executable_dispatch_state_v0_t* dispatch_state =
+ &command_buffer->state.dispatch_state;
+
+ // TODO(benvanik): expose on API or keep fixed on executable.
+ dispatch_state->workgroup_size_x = 1;
+ dispatch_state->workgroup_size_y = 1;
+ dispatch_state->workgroup_size_z = 1;
+ dispatch_state->workgroup_count_x = workgroup_x;
+ dispatch_state->workgroup_count_y = workgroup_y;
+ dispatch_state->workgroup_count_z = workgroup_z;
+
+ // Single-threaded.
+ dispatch_state->max_concurrency = 1;
+
+ // Push constants are pulled directly from the command buffer state, but we
+ // only allow the dispatch to read what we know is initialized based on the
+ // layout.
+ dispatch_state->push_constant_count = local_layout->push_constants;
+
+ // Produce the dense binding list based on the declared bindings used.
+ // This allows us to change the descriptor sets and bindings counts supported
+ // in the HAL independent of any executable as each executable just gets the
+ // flat dense list and doesn't care about our descriptor set stuff.
+ //
+ // Note that we are just directly setting the binding data pointers here with
+ // no ownership/retaining/etc - it's part of the HAL contract that buffers are
+ // kept valid for the duration they may be in use.
+ iree_hal_local_binding_mask_t used_binding_mask = local_layout->used_bindings;
+ iree_host_size_t used_binding_count =
+ iree_math_count_ones_u64(used_binding_mask);
+ dispatch_state->binding_count = used_binding_count;
+ void** binding_ptrs = (void**)dispatch_state->binding_ptrs;
+ size_t* binding_lengths = (size_t*)dispatch_state->binding_lengths;
+ iree_host_size_t binding_base = 0;
+ for (iree_host_size_t i = 0; i < used_binding_count; ++i) {
+ int mask_offset = iree_math_count_trailing_zeros_u64(used_binding_mask);
+ int binding_ordinal = binding_base + mask_offset;
+ binding_base += mask_offset + 1;
+ used_binding_mask = iree_shr(used_binding_mask, mask_offset + 1);
+ binding_ptrs[i] = command_buffer->state.full_bindings[binding_ordinal];
+ if (!binding_ptrs[i]) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "(flat) binding %d is NULL", binding_ordinal);
+ }
+ binding_lengths[i] =
+ command_buffer->state.full_binding_lengths[binding_ordinal];
+ }
+
+ // TODO(benvanik): plumb through an arena or fixed-size reservation to use.
+ // For now when deploying to devices where you want something like the
+ // inline command buffer you probably don't want 256KB of transient memory
+ // getting allocated and retained implicitly - this should be a compiler
+ // option. For now we just malloc here to make things work and strongly
+ // encourage the kind of user who wants synchronous inline execution to not
+ // also want tons of scratch memory.
+ iree_byte_span_t local_memory = iree_make_byte_span(NULL, local_memory_size);
+ if (local_memory_size > 0) {
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc(command_buffer->host_allocator,
+ local_memory_size,
+ (void**)&local_memory.data));
+ }
+
+ // Since we are running on a borrowed thread, we know nothing about the
+ // floating point state. Reset it.
+ iree_fpu_state_t fpu_state =
+ iree_fpu_state_push(IREE_FPU_STATE_FLAG_FLUSH_DENORMALS_TO_ZERO);
+ iree_status_t status = iree_hal_local_executable_issue_dispatch_inline(
+ local_executable, entry_point, dispatch_state,
+ command_buffer->state.processor_id, local_memory);
+ iree_fpu_state_pop(fpu_state);
+
+ if (local_memory.data) {
+ iree_allocator_free(command_buffer->host_allocator, local_memory.data);
+ }
+ return status;
+}
+
+typedef union iree_hal_vec3_t {
+ struct {
+ uint32_t x;
+ uint32_t y;
+ uint32_t z;
+ };
+ uint32_t value[3];
+} iree_hal_vec3_t;
+
+static iree_status_t iree_hal_inline_command_buffer_dispatch_indirect(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ iree_hal_buffer_t* workgroups_buffer,
+ iree_device_size_t workgroups_offset) {
+ // TODO(benvanik): track mapping so we can properly map/unmap/flush/etc.
+ iree_hal_buffer_mapping_t buffer_mapping = {{0}};
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range(
+ workgroups_buffer, IREE_HAL_MAPPING_MODE_PERSISTENT,
+ IREE_HAL_MEMORY_ACCESS_READ, workgroups_offset, 3 * sizeof(uint32_t),
+ &buffer_mapping));
+ iree_hal_vec3_t workgroup_count =
+ *(const iree_hal_vec3_t*)buffer_mapping.contents.data;
+ return iree_hal_inline_command_buffer_dispatch(
+ base_command_buffer, executable, entry_point, workgroup_count.x,
+ workgroup_count.y, workgroup_count.z);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_vtable_t
+//===----------------------------------------------------------------------===//
+
+static const iree_hal_command_buffer_vtable_t
+ iree_hal_inline_command_buffer_vtable = {
+ .destroy = iree_hal_inline_command_buffer_destroy,
+ .dyn_cast = iree_hal_inline_command_buffer_dyn_cast,
+ .begin = iree_hal_inline_command_buffer_begin,
+ .end = iree_hal_inline_command_buffer_end,
+ .begin_debug_group = iree_hal_inline_command_buffer_begin_debug_group,
+ .end_debug_group = iree_hal_inline_command_buffer_end_debug_group,
+ .execution_barrier = iree_hal_inline_command_buffer_execution_barrier,
+ .signal_event = iree_hal_inline_command_buffer_signal_event,
+ .reset_event = iree_hal_inline_command_buffer_reset_event,
+ .wait_events = iree_hal_inline_command_buffer_wait_events,
+ .discard_buffer = iree_hal_inline_command_buffer_discard_buffer,
+ .fill_buffer = iree_hal_inline_command_buffer_fill_buffer,
+ .update_buffer = iree_hal_inline_command_buffer_update_buffer,
+ .copy_buffer = iree_hal_inline_command_buffer_copy_buffer,
+ .push_constants = iree_hal_inline_command_buffer_push_constants,
+ .push_descriptor_set =
+ iree_hal_inline_command_buffer_push_descriptor_set,
+ .bind_descriptor_set =
+ iree_hal_inline_command_buffer_bind_descriptor_set,
+ .dispatch = iree_hal_inline_command_buffer_dispatch,
+ .dispatch_indirect = iree_hal_inline_command_buffer_dispatch_indirect,
+};
diff --git a/runtime/src/iree/hal/local/inline_command_buffer.h b/runtime/src/iree/hal/local/inline_command_buffer.h
new file mode 100644
index 0000000..b98f5e2
--- /dev/null
+++ b/runtime/src/iree/hal/local/inline_command_buffer.h
@@ -0,0 +1,40 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_INLINE_COMMAND_BUFFER_H_
+#define IREE_HAL_LOCAL_INLINE_COMMAND_BUFFER_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates an inline synchronous one-shot single-threaded command "buffer".
+// This is designed for ultra-low latency situations where we know the command
+// buffer is going to be submitted with no wait semaphores indicating that it
+// can begin execution immediately. No inter-command-buffer scheduling will be
+// performed and all barriers and events are ignored.
+//
+// Executes all work on the calling thread synchronously (today).
+//
+// Must have IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION set.
+iree_status_t iree_hal_inline_command_buffer_create(
+ iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_allocator_t host_allocator,
+ iree_hal_command_buffer_t** out_command_buffer);
+
+// Returns true if |command_buffer| is an inline command buffer.
+bool iree_hal_inline_command_buffer_isa(
+ iree_hal_command_buffer_t* command_buffer);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_INLINE_COMMAND_BUFFER_H_
diff --git a/runtime/src/iree/hal/local/loaders/BUILD b/runtime/src/iree/hal/local/loaders/BUILD
new file mode 100644
index 0000000..ac48f01
--- /dev/null
+++ b/runtime/src/iree/hal/local/loaders/BUILD
@@ -0,0 +1,102 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Default implementations for HAL types that use the host resources.
+# These are generally just wrappers around host heap memory and host threads.
+
+load("//iree:build_defs.oss.bzl", "iree_cmake_extra_content", "iree_runtime_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_runtime_cc_library(
+ name = "embedded_library_loader",
+ srcs = ["embedded_library_loader.c"],
+ hdrs = ["embedded_library_loader.h"],
+ defines = [
+ "IREE_HAL_HAVE_EMBEDDED_LIBRARY_LOADER=1",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/local",
+ "//runtime/src/iree/hal/local:executable_library",
+ "//runtime/src/iree/hal/local/elf:elf_module",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "static_library_loader",
+ srcs = ["static_library_loader.c"],
+ hdrs = ["static_library_loader.h"],
+ defines = [
+ "IREE_HAL_HAVE_STATIC_LIBRARY_LOADER=1",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/local",
+ "//runtime/src/iree/hal/local:executable_environment",
+ "//runtime/src/iree/hal/local:executable_library",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "system_library_loader",
+ srcs = ["system_library_loader.c"],
+ hdrs = ["system_library_loader.h"],
+ defines = [
+ "IREE_HAL_HAVE_SYSTEM_LIBRARY_LOADER=1",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal:dynamic_library",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/local",
+ "//runtime/src/iree/hal/local:executable_library",
+ ],
+)
+
+iree_cmake_extra_content(
+ content = """
+if(${IREE_HAL_DRIVER_VMVX} OR ${IREE_HAL_DRIVER_VMVX_SYNC})
+""",
+ inline = True,
+)
+
+iree_runtime_cc_library(
+ name = "vmvx_module_loader",
+ srcs = ["vmvx_module_loader.c"],
+ hdrs = ["vmvx_module_loader.h"],
+ defines = [
+ "IREE_HAL_HAVE_VMVX_MODULE_LOADER=1",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/local",
+ "//runtime/src/iree/hal/local:executable_library",
+ "//runtime/src/iree/modules/vmvx",
+ "//runtime/src/iree/vm",
+ "//runtime/src/iree/vm:bytecode_module",
+ ],
+)
+
+iree_cmake_extra_content(
+ content = """
+endif()
+""",
+ inline = True,
+)
diff --git a/runtime/src/iree/hal/local/loaders/CMakeLists.txt b/runtime/src/iree/hal/local/loaders/CMakeLists.txt
new file mode 100644
index 0000000..71c8b03
--- /dev/null
+++ b/runtime/src/iree/hal/local/loaders/CMakeLists.txt
@@ -0,0 +1,97 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/local/loaders/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ embedded_library_loader
+ HDRS
+ "embedded_library_loader.h"
+ SRCS
+ "embedded_library_loader.c"
+ DEPS
+ iree::base
+ iree::base::core_headers
+ iree::base::tracing
+ iree::hal
+ iree::hal::local
+ iree::hal::local::elf::elf_module
+ iree::hal::local::executable_library
+ DEFINES
+ "IREE_HAL_HAVE_EMBEDDED_LIBRARY_LOADER=1"
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ static_library_loader
+ HDRS
+ "static_library_loader.h"
+ SRCS
+ "static_library_loader.c"
+ DEPS
+ iree::base
+ iree::base::tracing
+ iree::hal
+ iree::hal::local
+ iree::hal::local::executable_environment
+ iree::hal::local::executable_library
+ DEFINES
+ "IREE_HAL_HAVE_STATIC_LIBRARY_LOADER=1"
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ system_library_loader
+ HDRS
+ "system_library_loader.h"
+ SRCS
+ "system_library_loader.c"
+ DEPS
+ iree::base
+ iree::base::core_headers
+ iree::base::internal::dynamic_library
+ iree::base::tracing
+ iree::hal
+ iree::hal::local
+ iree::hal::local::executable_library
+ DEFINES
+ "IREE_HAL_HAVE_SYSTEM_LIBRARY_LOADER=1"
+ PUBLIC
+)
+
+if(${IREE_HAL_DRIVER_VMVX} OR ${IREE_HAL_DRIVER_VMVX_SYNC})
+
+iree_cc_library(
+ NAME
+ vmvx_module_loader
+ HDRS
+ "vmvx_module_loader.h"
+ SRCS
+ "vmvx_module_loader.c"
+ DEPS
+ iree::base
+ iree::base::tracing
+ iree::hal
+ iree::hal::local
+ iree::hal::local::executable_library
+ iree::modules::vmvx
+ iree::vm
+ iree::vm::bytecode_module
+ DEFINES
+ "IREE_HAL_HAVE_VMVX_MODULE_LOADER=1"
+ PUBLIC
+)
+
+endif()
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/hal/local/loaders/embedded_library_loader.c b/runtime/src/iree/hal/local/loaders/embedded_library_loader.c
new file mode 100644
index 0000000..017579e
--- /dev/null
+++ b/runtime/src/iree/hal/local/loaders/embedded_library_loader.c
@@ -0,0 +1,377 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/loaders/embedded_library_loader.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/elf/elf_module.h"
+#include "iree/hal/local/executable_library.h"
+#include "iree/hal/local/local_executable.h"
+#include "iree/hal/local/local_executable_layout.h"
+
+//===----------------------------------------------------------------------===//
+// iree_hal_elf_executable_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_elf_executable_t {
+ iree_hal_local_executable_t base;
+
+ // Loaded ELF module.
+ iree_elf_module_t module;
+
+ // Name used for the file field in tracy and debuggers.
+ iree_string_view_t identifier;
+
+ // Queried metadata from the library.
+ union {
+ const iree_hal_executable_library_header_t** header;
+ const iree_hal_executable_library_v0_t* v0;
+ } library;
+
+ iree_hal_local_executable_layout_t* layouts[];
+} iree_hal_elf_executable_t;
+
+static const iree_hal_local_executable_vtable_t iree_hal_elf_executable_vtable;
+
+static iree_status_t iree_hal_elf_executable_query_library(
+ iree_hal_elf_executable_t* executable) {
+ // Get the exported symbol used to get the library metadata.
+ iree_hal_executable_library_query_fn_t query_fn = NULL;
+ IREE_RETURN_IF_ERROR(iree_elf_module_lookup_export(
+ &executable->module, IREE_HAL_EXECUTABLE_LIBRARY_EXPORT_NAME,
+ (void**)&query_fn));
+
+ // Query for a compatible version of the library.
+ executable->library.header =
+ (const iree_hal_executable_library_header_t**)iree_elf_call_p_ip(
+ query_fn, IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST,
+ &executable->base.environment);
+ if (!executable->library.header) {
+ return iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "executable does not support this version of the runtime (%08X)",
+ IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST);
+ }
+ const iree_hal_executable_library_header_t* header =
+ *executable->library.header;
+
+ // Ensure that if the library is built for a particular sanitizer that we also
+ // were compiled with that sanitizer enabled.
+ switch (header->sanitizer) {
+ case IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_NONE:
+ // Always safe even if the host has a sanitizer enabled; it just means
+ // that we won't be able to catch anything from within the executable,
+ // however checks outside will (often) still trigger when guard pages are
+ // dirtied/etc.
+ break;
+ default:
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "executable requires sanitizer but they are not "
+ "yet supported with embedded libraries: %u",
+ (uint32_t)header->sanitizer);
+ }
+
+ executable->identifier = iree_make_cstring_view(header->name);
+
+ executable->base.dispatch_attrs = executable->library.v0->exports.attrs;
+
+ return iree_ok_status();
+}
+
+// Resolves all of the imports declared by the executable using the given
+// |import_provider|.
+static iree_status_t iree_hal_elf_executable_resolve_imports(
+ iree_hal_elf_executable_t* executable,
+ const iree_hal_executable_import_provider_t import_provider) {
+ const iree_hal_executable_import_table_v0_t* import_table =
+ &executable->library.v0->imports;
+ if (!import_table->count) return iree_ok_status();
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // All calls from the loaded ELF route through our thunk function so that we
+ // can adapt to ABI differences.
+ executable->base.environment.import_thunk =
+ (iree_hal_executable_import_thunk_v0_t)iree_elf_thunk_i_p;
+
+ // Allocate storage for the imports.
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_allocator_malloc(
+ executable->base.host_allocator,
+ import_table->count * sizeof(*executable->base.environment.imports),
+ (void**)&executable->base.environment.imports));
+
+ // Try to resolve each import.
+ // NOTE: imports are sorted alphabetically and if we cared we could use this
+ // information to more efficiently resolve the symbols from providers (O(n)
+ // walk vs potential O(nlogn)/O(n^2)).
+ for (uint32_t i = 0; i < import_table->count; ++i) {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_hal_executable_import_provider_resolve(
+ import_provider, iree_make_cstring_view(import_table->symbols[i]),
+ (void**)&executable->base.environment.imports[i]));
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_elf_executable_create(
+ const iree_hal_executable_params_t* executable_params,
+ const iree_hal_executable_import_provider_t import_provider,
+ iree_allocator_t host_allocator, iree_hal_executable_t** out_executable) {
+ IREE_ASSERT_ARGUMENT(executable_params);
+ IREE_ASSERT_ARGUMENT(executable_params->executable_data.data &&
+ executable_params->executable_data.data_length);
+ IREE_ASSERT_ARGUMENT(!executable_params->executable_layout_count ||
+ executable_params->executable_layouts);
+ IREE_ASSERT_ARGUMENT(!executable_params->constant_count ||
+ executable_params->constants);
+ IREE_ASSERT_ARGUMENT(out_executable);
+ *out_executable = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // TODO(benvanik): rework this so that we load and query the library before
+ // allocating so that we know the import count. Today since we allocate first
+ // we need an additional allocation once we've seen the import table.
+ iree_hal_elf_executable_t* executable = NULL;
+ iree_host_size_t total_size =
+ sizeof(*executable) +
+ executable_params->executable_layout_count *
+ sizeof(*executable->layouts) +
+ executable_params->constant_count * sizeof(*executable_params->constants);
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&executable);
+ if (iree_status_is_ok(status)) {
+ iree_hal_local_executable_initialize(
+ &iree_hal_elf_executable_vtable,
+ executable_params->executable_layout_count,
+ executable_params->executable_layouts, &executable->layouts[0],
+ host_allocator, &executable->base);
+
+ // Copy executable constants so we own them.
+ if (executable_params->constant_count > 0) {
+ uint32_t* target_constants =
+ (uint32_t*)((uint8_t*)executable + sizeof(*executable) +
+ executable_params->executable_layout_count *
+ sizeof(*executable->layouts));
+ memcpy(target_constants, executable_params->constants,
+ executable_params->constant_count *
+ sizeof(*executable_params->constants));
+ executable->base.environment.constants = target_constants;
+ }
+ }
+ if (iree_status_is_ok(status)) {
+ // Attempt to load the ELF module.
+ status = iree_elf_module_initialize_from_memory(
+ executable_params->executable_data, /*import_table=*/NULL,
+ host_allocator, &executable->module);
+ }
+ if (iree_status_is_ok(status)) {
+ // Query metadata and get the entry point function pointers.
+ status = iree_hal_elf_executable_query_library(executable);
+ }
+ if (iree_status_is_ok(status)) {
+ // Resolve imports, if any.
+ status =
+ iree_hal_elf_executable_resolve_imports(executable, import_provider);
+ }
+
+ const bool disable_verification =
+ iree_all_bits_set(executable_params->caching_mode,
+ IREE_HAL_EXECUTABLE_CACHING_MODE_DISABLE_VERIFICATION);
+ if (iree_status_is_ok(status) && !disable_verification) {
+ // Check to make sure that the entry point count matches the layout count.
+ if (executable->library.v0->exports.count !=
+ executable_params->executable_layout_count) {
+ status =
+ iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "executable provides %u entry points but caller "
+ "provided %zu; must match",
+ executable->library.v0->exports.count,
+ executable_params->executable_layout_count);
+ }
+ }
+ if (iree_status_is_ok(status) && !disable_verification) {
+ // Check to make sure that the constant table has values for all constants.
+ if (executable->library.v0->constants.count !=
+ executable_params->constant_count) {
+ status = iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "executable requires %u constants but caller "
+ "provided %zu; must match",
+ executable->library.v0->constants.count,
+ executable_params->constant_count);
+ }
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_executable = (iree_hal_executable_t*)executable;
+ } else {
+ iree_hal_executable_release((iree_hal_executable_t*)executable);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_elf_executable_destroy(
+ iree_hal_executable_t* base_executable) {
+ iree_hal_elf_executable_t* executable =
+ (iree_hal_elf_executable_t*)base_executable;
+ iree_allocator_t host_allocator = executable->base.host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_elf_module_deinitialize(&executable->module);
+
+ if (executable->base.environment.imports != NULL) {
+ iree_allocator_free(host_allocator,
+ (void*)executable->base.environment.imports);
+ }
+
+ iree_hal_local_executable_deinitialize(
+ (iree_hal_local_executable_t*)base_executable);
+ iree_allocator_free(host_allocator, executable);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_elf_executable_issue_call(
+ iree_hal_local_executable_t* base_executable, iree_host_size_t ordinal,
+ const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
+ const iree_hal_executable_workgroup_state_v0_t* workgroup_state) {
+ iree_hal_elf_executable_t* executable =
+ (iree_hal_elf_executable_t*)base_executable;
+ const iree_hal_executable_library_v0_t* library = executable->library.v0;
+
+ if (IREE_UNLIKELY(ordinal >= library->exports.count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "entry point ordinal out of bounds");
+ }
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ iree_string_view_t entry_point_name = iree_string_view_empty();
+ if (library->exports.names != NULL) {
+ entry_point_name = iree_make_cstring_view(library->exports.names[ordinal]);
+ }
+ if (iree_string_view_is_empty(entry_point_name)) {
+ entry_point_name = iree_make_cstring_view("unknown_elf_call");
+ }
+ IREE_TRACE_ZONE_BEGIN_EXTERNAL(
+ z0, executable->identifier.data, executable->identifier.size, ordinal,
+ entry_point_name.data, entry_point_name.size, NULL, 0);
+ if (library->exports.tags != NULL) {
+ const char* tag = library->exports.tags[ordinal];
+ if (tag) {
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, tag);
+ }
+ }
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+ int ret = iree_elf_call_i_ppp(library->exports.ptrs[ordinal],
+ (void*)&base_executable->environment,
+ (void*)dispatch_state, (void*)workgroup_state);
+
+ IREE_TRACE_ZONE_END(z0);
+
+ return ret == 0 ? iree_ok_status()
+ : iree_make_status(
+ IREE_STATUS_INTERNAL,
+ "executable entry point returned catastrophic error %d",
+ ret);
+}
+
+static const iree_hal_local_executable_vtable_t iree_hal_elf_executable_vtable =
+ {
+ .base =
+ {
+ .destroy = iree_hal_elf_executable_destroy,
+ },
+ .issue_call = iree_hal_elf_executable_issue_call,
+};
+
+//===----------------------------------------------------------------------===//
+// iree_hal_embedded_library_loader_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_embedded_library_loader_t {
+ iree_hal_executable_loader_t base;
+ iree_allocator_t host_allocator;
+} iree_hal_embedded_library_loader_t;
+
+static const iree_hal_executable_loader_vtable_t
+ iree_hal_embedded_library_loader_vtable;
+
+iree_status_t iree_hal_embedded_library_loader_create(
+ iree_hal_executable_import_provider_t import_provider,
+ iree_allocator_t host_allocator,
+ iree_hal_executable_loader_t** out_executable_loader) {
+ IREE_ASSERT_ARGUMENT(out_executable_loader);
+ *out_executable_loader = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_embedded_library_loader_t* executable_loader = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ host_allocator, sizeof(*executable_loader), (void**)&executable_loader);
+ if (iree_status_is_ok(status)) {
+ iree_hal_executable_loader_initialize(
+ &iree_hal_embedded_library_loader_vtable, import_provider,
+ &executable_loader->base);
+ executable_loader->host_allocator = host_allocator;
+ *out_executable_loader = (iree_hal_executable_loader_t*)executable_loader;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_embedded_library_loader_destroy(
+ iree_hal_executable_loader_t* base_executable_loader) {
+ iree_hal_embedded_library_loader_t* executable_loader =
+ (iree_hal_embedded_library_loader_t*)base_executable_loader;
+ iree_allocator_t host_allocator = executable_loader->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(host_allocator, executable_loader);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static bool iree_hal_embedded_library_loader_query_support(
+ iree_hal_executable_loader_t* base_executable_loader,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format) {
+ return iree_string_view_equal(
+ executable_format, iree_make_cstring_view("embedded-elf-" IREE_ARCH));
+}
+
+static iree_status_t iree_hal_embedded_library_loader_try_load(
+ iree_hal_executable_loader_t* base_executable_loader,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable) {
+ iree_hal_embedded_library_loader_t* executable_loader =
+ (iree_hal_embedded_library_loader_t*)base_executable_loader;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Perform the load of the ELF and wrap it in an executable handle.
+ iree_status_t status = iree_hal_elf_executable_create(
+ executable_params, base_executable_loader->import_provider,
+ executable_loader->host_allocator, out_executable);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static const iree_hal_executable_loader_vtable_t
+ iree_hal_embedded_library_loader_vtable = {
+ .destroy = iree_hal_embedded_library_loader_destroy,
+ .query_support = iree_hal_embedded_library_loader_query_support,
+ .try_load = iree_hal_embedded_library_loader_try_load,
+};
diff --git a/runtime/src/iree/hal/local/loaders/embedded_library_loader.h b/runtime/src/iree/hal/local/loaders/embedded_library_loader.h
new file mode 100644
index 0000000..7d75396
--- /dev/null
+++ b/runtime/src/iree/hal/local/loaders/embedded_library_loader.h
@@ -0,0 +1,33 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_LOADERS_ELF_LIBRARY_LOADER_H_
+#define IREE_HAL_LOCAL_LOADERS_ELF_LIBRARY_LOADER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/local/executable_loader.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates an executable loader that can load minimally-featured ELF dynamic
+// libraries on any platform. This allows us to use a single file format across
+// all operating systems at the cost of some missing debugging/profiling
+// features.
+iree_status_t iree_hal_embedded_library_loader_create(
+ iree_hal_executable_import_provider_t import_provider,
+ iree_allocator_t host_allocator,
+ iree_hal_executable_loader_t** out_executable_loader);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_LOADERS_ELF_LIBRARY_LOADER_H_
diff --git a/runtime/src/iree/hal/local/loaders/static_library_loader.c b/runtime/src/iree/hal/local/loaders/static_library_loader.c
new file mode 100644
index 0000000..e123938
--- /dev/null
+++ b/runtime/src/iree/hal/local/loaders/static_library_loader.c
@@ -0,0 +1,312 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/loaders/static_library_loader.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/executable_environment.h"
+#include "iree/hal/local/local_executable.h"
+#include "iree/hal/local/local_executable_layout.h"
+
+//===----------------------------------------------------------------------===//
+// iree_hal_static_executable_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_static_executable_t {
+ iree_hal_local_executable_t base;
+
+ // Name used for the file field in tracy and debuggers.
+ iree_string_view_t identifier;
+
+ union {
+ const iree_hal_executable_library_header_t** header;
+ const iree_hal_executable_library_v0_t* v0;
+ } library;
+
+ iree_hal_local_executable_layout_t* layouts[];
+} iree_hal_static_executable_t;
+
+static const iree_hal_local_executable_vtable_t
+ iree_hal_static_executable_vtable;
+
+static iree_status_t iree_hal_static_executable_create(
+ const iree_hal_executable_params_t* executable_params,
+ const iree_hal_executable_library_header_t** library_header,
+ const iree_hal_executable_import_provider_t import_provider,
+ iree_allocator_t host_allocator, iree_hal_executable_t** out_executable) {
+ IREE_ASSERT_ARGUMENT(executable_params);
+ IREE_ASSERT_ARGUMENT(!executable_params->executable_layout_count ||
+ executable_params->executable_layouts);
+ IREE_ASSERT_ARGUMENT(!executable_params->constant_count ||
+ executable_params->constants);
+ IREE_ASSERT_ARGUMENT(library_header);
+ IREE_ASSERT_ARGUMENT(out_executable);
+ *out_executable = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_static_executable_t* executable = NULL;
+ iree_host_size_t total_size =
+ sizeof(*executable) +
+ executable_params->executable_layout_count *
+ sizeof(*executable->layouts) +
+ executable_params->constant_count * sizeof(*executable_params->constants);
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&executable);
+ if (iree_status_is_ok(status)) {
+ iree_hal_local_executable_initialize(
+ &iree_hal_static_executable_vtable,
+ executable_params->executable_layout_count,
+ executable_params->executable_layouts, &executable->layouts[0],
+ host_allocator, &executable->base);
+ executable->library.header = library_header;
+ executable->identifier = iree_make_cstring_view((*library_header)->name);
+ executable->base.dispatch_attrs = executable->library.v0->exports.attrs;
+
+ // Copy executable constants so we own them.
+ if (executable_params->constant_count > 0) {
+ uint32_t* target_constants =
+ (uint32_t*)((uint8_t*)executable + sizeof(*executable) +
+ executable_params->executable_layout_count *
+ sizeof(*executable->layouts));
+ memcpy(target_constants, executable_params->constants,
+ executable_params->constant_count *
+ sizeof(*executable_params->constants));
+ executable->base.environment.constants = target_constants;
+ }
+ }
+
+ if (iree_status_is_ok(status)) {
+ if (executable->library.v0->imports.count > 0) {
+ status =
+ iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "static libraries do not support imports and should "
+ "directly link against the functions they require");
+ }
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_executable = (iree_hal_executable_t*)executable;
+ } else {
+ *out_executable = NULL;
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_static_executable_destroy(
+ iree_hal_executable_t* base_executable) {
+ iree_hal_static_executable_t* executable =
+ (iree_hal_static_executable_t*)base_executable;
+ iree_allocator_t host_allocator = executable->base.host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_local_executable_deinitialize(
+ (iree_hal_local_executable_t*)base_executable);
+ iree_allocator_free(host_allocator, executable);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_static_executable_issue_call(
+ iree_hal_local_executable_t* base_executable, iree_host_size_t ordinal,
+ const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
+ const iree_hal_executable_workgroup_state_v0_t* workgroup_state) {
+ iree_hal_static_executable_t* executable =
+ (iree_hal_static_executable_t*)base_executable;
+ const iree_hal_executable_library_v0_t* library = executable->library.v0;
+
+ if (IREE_UNLIKELY(ordinal >= library->exports.count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "entry point ordinal out of bounds");
+ }
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ iree_string_view_t entry_point_name = iree_string_view_empty();
+ if (library->exports.names != NULL) {
+ entry_point_name = iree_make_cstring_view(library->exports.names[ordinal]);
+ }
+ if (iree_string_view_is_empty(entry_point_name)) {
+ entry_point_name = iree_make_cstring_view("unknown_dylib_call");
+ }
+ IREE_TRACE_ZONE_BEGIN_EXTERNAL(
+ z0, executable->identifier.data, executable->identifier.size, ordinal,
+ entry_point_name.data, entry_point_name.size, NULL, 0);
+ if (library->exports.tags != NULL) {
+ const char* tag = library->exports.tags[ordinal];
+ if (tag) {
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, tag);
+ }
+ }
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+ int ret = library->exports.ptrs[ordinal](&base_executable->environment,
+ dispatch_state, workgroup_state);
+
+ IREE_TRACE_ZONE_END(z0);
+
+ return ret == 0 ? iree_ok_status()
+ : iree_make_status(
+ IREE_STATUS_INTERNAL,
+ "executable entry point returned catastrophic error %d",
+ ret);
+}
+
+static const iree_hal_local_executable_vtable_t
+ iree_hal_static_executable_vtable = {
+ .base =
+ {
+ .destroy = iree_hal_static_executable_destroy,
+ },
+ .issue_call = iree_hal_static_executable_issue_call,
+};
+
+//===----------------------------------------------------------------------===//
+// iree_hal_static_library_loader_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_static_library_loader_t {
+ iree_hal_executable_loader_t base;
+ iree_allocator_t host_allocator;
+ iree_host_size_t library_count;
+ const iree_hal_executable_library_header_t** const libraries[];
+} iree_hal_static_library_loader_t;
+
+static const iree_hal_executable_loader_vtable_t
+ iree_hal_static_library_loader_vtable;
+
+iree_status_t iree_hal_static_library_loader_create(
+ iree_host_size_t library_count,
+ const iree_hal_executable_library_query_fn_t* library_query_fns,
+ iree_hal_executable_import_provider_t import_provider,
+ iree_allocator_t host_allocator,
+ iree_hal_executable_loader_t** out_executable_loader) {
+ IREE_ASSERT_ARGUMENT(!library_count || library_query_fns);
+ IREE_ASSERT_ARGUMENT(out_executable_loader);
+ *out_executable_loader = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_static_library_loader_t* executable_loader = NULL;
+ iree_host_size_t total_size =
+ sizeof(*executable_loader) +
+ sizeof(executable_loader->libraries[0]) * library_count;
+ iree_status_t status = iree_allocator_malloc(host_allocator, total_size,
+ (void**)&executable_loader);
+ if (iree_status_is_ok(status)) {
+ iree_hal_executable_loader_initialize(
+ &iree_hal_static_library_loader_vtable, import_provider,
+ &executable_loader->base);
+ executable_loader->host_allocator = host_allocator;
+ executable_loader->library_count = library_count;
+
+ // Default environment to enable initialization.
+ iree_hal_executable_environment_v0_t environment;
+ iree_hal_executable_environment_initialize(host_allocator, &environment);
+
+ // Query and verify the libraries provided all match our expected version.
+ // It's rare they won't, however static libraries generated with a newer
+ // version of the IREE compiler that are then linked with an older version
+ // of the runtime are difficult to spot otherwise.
+ for (iree_host_size_t i = 0; i < library_count; ++i) {
+ const iree_hal_executable_library_header_t* const* header_ptr =
+ library_query_fns[i](IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST,
+ &environment);
+ if (!header_ptr) {
+ status = iree_make_status(
+ IREE_STATUS_UNAVAILABLE,
+ "failed to query library header for runtime version %d",
+ IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST);
+ break;
+ }
+ const iree_hal_executable_library_header_t* header = *header_ptr;
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, header->name);
+ if (header->version > IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST) {
+ status = iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "executable does not support this version of the "
+ "runtime (executable: %d, runtime: %d)",
+ header->version, IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST);
+ break;
+ }
+ memcpy((void*)&executable_loader->libraries[i], &header_ptr,
+ sizeof(header_ptr));
+ }
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_executable_loader = (iree_hal_executable_loader_t*)executable_loader;
+ } else {
+ iree_allocator_free(host_allocator, executable_loader);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_static_library_loader_destroy(
+ iree_hal_executable_loader_t* base_executable_loader) {
+ iree_hal_static_library_loader_t* executable_loader =
+ (iree_hal_static_library_loader_t*)base_executable_loader;
+ iree_allocator_t host_allocator = executable_loader->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(host_allocator, executable_loader);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static bool iree_hal_static_library_loader_query_support(
+ iree_hal_executable_loader_t* base_executable_loader,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format) {
+ return iree_string_view_equal(executable_format,
+ iree_make_cstring_view("static"));
+}
+
+static iree_status_t iree_hal_static_library_loader_try_load(
+ iree_hal_executable_loader_t* base_executable_loader,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable) {
+ iree_hal_static_library_loader_t* executable_loader =
+ (iree_hal_static_library_loader_t*)base_executable_loader;
+
+ // The executable data is just the name of the library.
+ iree_string_view_t library_name = iree_make_string_view(
+ (const char*)executable_params->executable_data.data,
+ executable_params->executable_data.data_length);
+
+ // Linear scan of the registered libraries; there's usually only one per
+ // module (aka source model) and as such it's a small list and probably not
+ // worth optimizing. We could sort the libraries list by name on loader
+ // creation to perform a binary-search fairly easily, though, at the cost of
+ // the additional code size.
+ for (iree_host_size_t i = 0; i < executable_loader->library_count; ++i) {
+ const iree_hal_executable_library_header_t* header =
+ *executable_loader->libraries[i];
+ if (iree_string_view_equal(library_name,
+ iree_make_cstring_view(header->name))) {
+ return iree_hal_static_executable_create(
+ executable_params, executable_loader->libraries[i],
+ base_executable_loader->import_provider,
+ executable_loader->host_allocator, out_executable);
+ }
+ }
+ return iree_make_status(IREE_STATUS_NOT_FOUND,
+ "no static library with the name '%.*s' registered",
+ (int)library_name.size, library_name.data);
+}
+
+static const iree_hal_executable_loader_vtable_t
+ iree_hal_static_library_loader_vtable = {
+ .destroy = iree_hal_static_library_loader_destroy,
+ .query_support = iree_hal_static_library_loader_query_support,
+ .try_load = iree_hal_static_library_loader_try_load,
+};
diff --git a/runtime/src/iree/hal/local/loaders/static_library_loader.h b/runtime/src/iree/hal/local/loaders/static_library_loader.h
new file mode 100644
index 0000000..63ed4c4
--- /dev/null
+++ b/runtime/src/iree/hal/local/loaders/static_library_loader.h
@@ -0,0 +1,47 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_LOADERS_STATIC_LIBRARY_LOADER_H_
+#define IREE_HAL_LOCAL_LOADERS_STATIC_LIBRARY_LOADER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/local/executable_library.h"
+#include "iree/hal/local/executable_loader.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a library loader that exposes the provided libraries to the HAL for
+// use as executables.
+//
+// This loader will handle executable formats of 'static'. Version checks will
+// ensure that the IREE compiler-produced static library version is one that the
+// runtime can support.
+//
+// The name defined on each library will be used to lookup the executables and
+// must match with the names used during compilation exactly. The
+// iree_hal_executable_params_t used to reference the executables will contain
+// the library name and be used to lookup the library in the list.
+//
+// Multiple static library loaders can be registered in cases when several
+// independent sets of libraries are linked in however duplicate names both
+// within and across loaders will result in undefined behavior.
+iree_status_t iree_hal_static_library_loader_create(
+ iree_host_size_t library_count,
+ const iree_hal_executable_library_query_fn_t* library_query_fns,
+ iree_hal_executable_import_provider_t import_provider,
+ iree_allocator_t host_allocator,
+ iree_hal_executable_loader_t** out_executable_loader);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_LOADERS_STATIC_LIBRARY_LOADER_H_
diff --git a/runtime/src/iree/hal/local/loaders/system_library_loader.c b/runtime/src/iree/hal/local/loaders/system_library_loader.c
new file mode 100644
index 0000000..ebd0213
--- /dev/null
+++ b/runtime/src/iree/hal/local/loaders/system_library_loader.c
@@ -0,0 +1,508 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/loaders/system_library_loader.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/internal/dynamic_library.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/executable_library.h"
+#include "iree/hal/local/local_executable.h"
+#include "iree/hal/local/local_executable_layout.h"
+
+//===----------------------------------------------------------------------===//
+// iree_hal_system_executable_footer_t
+//===----------------------------------------------------------------------===//
+
+// An optional footer that may exist on the system library that is used to add
+// additional debug information for use directly by IREE, such as PDB or dSYM
+// files. This is only expected to be present when there is a debug database
+// but we may want to extend it in the future.
+typedef struct iree_hal_system_executable_footer_t {
+ uint8_t magic[8]; // IREE_HAL_SYSTEM_EXECUTABLE_FOOTER_MAGIC
+ uint32_t version; // IREE_HAL_SYSTEM_EXECUTABLE_FOOTER_VERSION
+ uint32_t flags; // reserved
+ // Offset of the library within the parent data stream.
+ // Almost always zero but here in case we want to allow for chaining.
+ uint64_t library_offset;
+ // Size of the system library in bytes.
+ uint64_t library_size;
+ // Offset of the start of the embedded debug database within the parent data
+ // stream. There may be padding between the library and this offset.
+ uint64_t debug_offset;
+ // Size of the debug database in bytes.
+ uint64_t debug_size;
+} iree_hal_system_executable_footer_t;
+
+// EXPERIMENTAL: this is not a stable interface yet. The binary format may
+// change at any time.
+#define IREE_HAL_SYSTEM_EXECUTABLE_FOOTER_MAGIC "IREEDBG\0"
+#define IREE_HAL_SYSTEM_EXECUTABLE_FOOTER_VERSION 0
+
+// Tries to find an iree_hal_system_executable_footer_t at the end of the
+// given executable data stream.
+static const iree_hal_system_executable_footer_t*
+iree_hal_system_executable_try_query_footer(
+ iree_const_byte_span_t executable_data) {
+ if (executable_data.data_length <
+ sizeof(iree_hal_system_executable_footer_t)) {
+ return NULL;
+ }
+ const uint8_t* footer_ptr = executable_data.data +
+ executable_data.data_length -
+ sizeof(iree_hal_system_executable_footer_t);
+ const iree_hal_system_executable_footer_t* footer =
+ (const iree_hal_system_executable_footer_t*)(footer_ptr);
+ static_assert(sizeof(IREE_HAL_SYSTEM_EXECUTABLE_FOOTER_MAGIC) - /*NUL*/ 1 ==
+ sizeof(footer->magic),
+ "magic number value must match struct size");
+ if (memcmp(footer->magic, IREE_HAL_SYSTEM_EXECUTABLE_FOOTER_MAGIC,
+ sizeof(footer->magic)) != 0) {
+ return NULL;
+ }
+ return footer;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_system_executable_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_system_executable_t {
+ iree_hal_local_executable_t base;
+
+ // Loaded platform dynamic library.
+ iree_dynamic_library_t* handle;
+
+ // Name used for the file field in tracy and debuggers.
+ iree_string_view_t identifier;
+
+ // Queried metadata from the library.
+ union {
+ const iree_hal_executable_library_header_t** header;
+ const iree_hal_executable_library_v0_t* v0;
+ } library;
+
+ iree_hal_local_executable_layout_t* layouts[];
+} iree_hal_system_executable_t;
+
+static const iree_hal_local_executable_vtable_t
+ iree_hal_system_executable_vtable;
+
+// Loads the executable and optional debug database from the given
+// |executable_data| in memory. The memory must remain live for the lifetime
+// of the executable.
+static iree_status_t iree_hal_system_executable_load(
+ iree_hal_system_executable_t* executable,
+ iree_const_byte_span_t executable_data, iree_allocator_t host_allocator) {
+ // Check to see if the library has a footer indicating embedded debug data.
+ iree_const_byte_span_t library_data = iree_make_const_byte_span(NULL, 0);
+ iree_const_byte_span_t debug_data = iree_make_const_byte_span(NULL, 0);
+ const iree_hal_system_executable_footer_t* footer =
+ iree_hal_system_executable_try_query_footer(executable_data);
+ if (footer) {
+ // Debug file present; split the data contents.
+ iree_host_size_t data_length =
+ executable_data.data_length - sizeof(*footer);
+ if (footer->library_size > data_length ||
+ footer->debug_offset + footer->debug_size > data_length) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "system library footer references out of range bytes");
+ }
+ library_data =
+ iree_make_const_byte_span(executable_data.data, footer->library_size);
+ debug_data = iree_make_const_byte_span(
+ executable_data.data + footer->debug_offset, footer->debug_size);
+ } else {
+ // Entire data contents are the library.
+ library_data = executable_data;
+ }
+
+ IREE_RETURN_IF_ERROR(iree_dynamic_library_load_from_memory(
+ iree_make_cstring_view("aot"), library_data,
+ IREE_DYNAMIC_LIBRARY_FLAG_NONE, host_allocator, &executable->handle));
+
+ if (debug_data.data_length > 0) {
+ IREE_RETURN_IF_ERROR(iree_dynamic_library_attach_symbols_from_memory(
+ executable->handle, debug_data));
+ }
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_system_executable_query_library(
+ iree_hal_system_executable_t* executable) {
+ // Get the exported symbol used to get the library metadata.
+ iree_hal_executable_library_query_fn_t query_fn = NULL;
+ IREE_RETURN_IF_ERROR(iree_dynamic_library_lookup_symbol(
+ executable->handle, IREE_HAL_EXECUTABLE_LIBRARY_EXPORT_NAME,
+ (void**)&query_fn));
+
+ // Query for a compatible version of the library.
+ executable->library.header =
+ query_fn(IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST,
+ &executable->base.environment);
+ if (!executable->library.header) {
+ return iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "executable does not support this version of the runtime (%08X)",
+ IREE_HAL_EXECUTABLE_LIBRARY_VERSION_LATEST);
+ }
+ const iree_hal_executable_library_header_t* header =
+ *executable->library.header;
+
+ // Ensure that if the library is built for a particular sanitizer that we also
+ // were compiled with that sanitizer enabled.
+ switch (header->sanitizer) {
+ case IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_NONE:
+ // Always safe even if the host has a sanitizer enabled; it just means
+ // that we won't be able to catch anything from within the executable,
+ // however checks outside will (often) still trigger when guard pages are
+ // dirtied/etc.
+ break;
+#if defined(IREE_SANITIZER_ADDRESS)
+ case IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_ADDRESS:
+ // ASAN is compiled into the host and we can load this library.
+ break;
+#else
+ case IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_ADDRESS:
+ return iree_make_status(
+ IREE_STATUS_UNAVAILABLE,
+ "executable library is compiled with ASAN support but the host "
+ "runtime is not compiled with it enabled; add -fsanitize=address to "
+ "the runtime compilation options");
+#endif // IREE_SANITIZER_ADDRESS
+#if defined(IREE_SANITIZER_THREAD)
+ case IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_THREAD:
+ // TSAN is compiled into the host and we can load this library.
+ break;
+#else
+ case IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_THREAD:
+ return iree_make_status(
+ IREE_STATUS_UNAVAILABLE,
+ "executable library is compiled with TSAN support but the host "
+ "runtime is not compiled with it enabled; add -fsanitize=thread to "
+ "the runtime compilation options");
+#endif // IREE_SANITIZER_THREAD
+ default:
+ return iree_make_status(
+ IREE_STATUS_UNAVAILABLE,
+ "executable library requires a sanitizer the host runtime is not "
+ "compiled to enable/understand: %u",
+ (uint32_t)header->sanitizer);
+ }
+
+ executable->identifier = iree_make_cstring_view(header->name);
+
+ executable->base.dispatch_attrs = executable->library.v0->exports.attrs;
+
+ return iree_ok_status();
+}
+
+static int iree_hal_system_executable_import_thunk_v0(
+ iree_hal_executable_import_v0_t fn_ptr, void* import_params) {
+ return fn_ptr(import_params);
+}
+
+// Resolves all of the imports declared by the executable using the given
+// |import_provider|.
+static iree_status_t iree_hal_system_executable_resolve_imports(
+ iree_hal_system_executable_t* executable,
+ const iree_hal_executable_import_provider_t import_provider) {
+ const iree_hal_executable_import_table_v0_t* import_table =
+ &executable->library.v0->imports;
+ if (!import_table->count) return iree_ok_status();
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Pass all imports right through.
+ executable->base.environment.import_thunk =
+ iree_hal_system_executable_import_thunk_v0;
+
+ // Allocate storage for the imports.
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_allocator_malloc(
+ executable->base.host_allocator,
+ import_table->count * sizeof(*executable->base.environment.imports),
+ (void**)&executable->base.environment.imports));
+
+ // Try to resolve each import.
+ // NOTE: imports are sorted alphabetically and if we cared we could use this
+ // information to more efficiently resolve the symbols from providers (O(n)
+ // walk vs potential O(nlogn)/O(n^2)).
+ for (uint32_t i = 0; i < import_table->count; ++i) {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_hal_executable_import_provider_resolve(
+ import_provider, iree_make_cstring_view(import_table->symbols[i]),
+ (void**)&executable->base.environment.imports[i]));
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_system_executable_create(
+ const iree_hal_executable_params_t* executable_params,
+ const iree_hal_executable_import_provider_t import_provider,
+ iree_allocator_t host_allocator, iree_hal_executable_t** out_executable) {
+ IREE_ASSERT_ARGUMENT(executable_params);
+ IREE_ASSERT_ARGUMENT(executable_params->executable_data.data &&
+ executable_params->executable_data.data_length);
+ IREE_ASSERT_ARGUMENT(!executable_params->executable_layout_count ||
+ executable_params->executable_layouts);
+ IREE_ASSERT_ARGUMENT(!executable_params->constant_count ||
+ executable_params->constants);
+ IREE_ASSERT_ARGUMENT(out_executable);
+ *out_executable = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_system_executable_t* executable = NULL;
+ iree_host_size_t total_size =
+ sizeof(*executable) +
+ executable_params->executable_layout_count *
+ sizeof(*executable->layouts) +
+ executable_params->constant_count * sizeof(*executable_params->constants);
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&executable);
+ if (iree_status_is_ok(status)) {
+ iree_hal_local_executable_initialize(
+ &iree_hal_system_executable_vtable,
+ executable_params->executable_layout_count,
+ executable_params->executable_layouts, &executable->layouts[0],
+ host_allocator, &executable->base);
+
+ // Copy executable constants so we own them.
+ if (executable_params->constant_count > 0) {
+ uint32_t* target_constants =
+ (uint32_t*)((uint8_t*)executable + sizeof(*executable) +
+ executable_params->executable_layout_count *
+ sizeof(*executable->layouts));
+ memcpy(target_constants, executable_params->constants,
+ executable_params->constant_count *
+ sizeof(*executable_params->constants));
+ executable->base.environment.constants = target_constants;
+ }
+ }
+ if (iree_status_is_ok(status)) {
+ // Attempt to extract the embedded library and load it.
+ status = iree_hal_system_executable_load(
+ executable, executable_params->executable_data, host_allocator);
+ }
+ if (iree_status_is_ok(status)) {
+ // Query metadata and get the entry point function pointers.
+ status = iree_hal_system_executable_query_library(executable);
+ }
+ if (iree_status_is_ok(status)) {
+ // Resolve imports, if any.
+ status =
+ iree_hal_system_executable_resolve_imports(executable, import_provider);
+ }
+
+ const bool disable_verification =
+ iree_all_bits_set(executable_params->caching_mode,
+ IREE_HAL_EXECUTABLE_CACHING_MODE_DISABLE_VERIFICATION);
+ if (iree_status_is_ok(status) && !disable_verification) {
+ // Check to make sure that the entry point count matches the layout count.
+ if (executable->library.v0->exports.count !=
+ executable_params->executable_layout_count) {
+ status =
+ iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "executable provides %u entry points but caller "
+ "provided %zu; must match",
+ executable->library.v0->exports.count,
+ executable_params->executable_layout_count);
+ }
+ }
+ if (iree_status_is_ok(status) && !disable_verification) {
+ // Check to make sure that the constant table has values for all constants.
+ if (executable->library.v0->constants.count !=
+ executable_params->constant_count) {
+ status = iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "executable requires %u constants but caller "
+ "provided %zu; must match",
+ executable->library.v0->constants.count,
+ executable_params->constant_count);
+ }
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_executable = (iree_hal_executable_t*)executable;
+ } else {
+ iree_hal_executable_release((iree_hal_executable_t*)executable);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_system_executable_destroy(
+ iree_hal_executable_t* base_executable) {
+ iree_hal_system_executable_t* executable =
+ (iree_hal_system_executable_t*)base_executable;
+ iree_allocator_t host_allocator = executable->base.host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_dynamic_library_release(executable->handle);
+
+ if (executable->base.environment.imports != NULL) {
+ iree_allocator_free(host_allocator,
+ (void*)executable->base.environment.imports);
+ }
+
+ iree_hal_local_executable_deinitialize(
+ (iree_hal_local_executable_t*)base_executable);
+ iree_allocator_free(host_allocator, executable);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_system_executable_issue_call(
+ iree_hal_local_executable_t* base_executable, iree_host_size_t ordinal,
+ const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
+ const iree_hal_executable_workgroup_state_v0_t* workgroup_state) {
+ iree_hal_system_executable_t* executable =
+ (iree_hal_system_executable_t*)base_executable;
+ const iree_hal_executable_library_v0_t* library = executable->library.v0;
+
+ if (IREE_UNLIKELY(ordinal >= library->exports.count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "entry point ordinal out of bounds");
+ }
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ iree_string_view_t entry_point_name = iree_string_view_empty();
+ if (library->exports.names != NULL) {
+ entry_point_name = iree_make_cstring_view(library->exports.names[ordinal]);
+ }
+ if (iree_string_view_is_empty(entry_point_name)) {
+ entry_point_name = iree_make_cstring_view("unknown_dylib_call");
+ }
+ IREE_TRACE_ZONE_BEGIN_EXTERNAL(
+ z0, executable->identifier.data, executable->identifier.size, ordinal,
+ entry_point_name.data, entry_point_name.size, NULL, 0);
+ if (library->exports.tags != NULL) {
+ const char* tag = library->exports.tags[ordinal];
+ if (tag) {
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, tag);
+ }
+ }
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+ int ret = library->exports.ptrs[ordinal](&base_executable->environment,
+ dispatch_state, workgroup_state);
+
+ IREE_TRACE_ZONE_END(z0);
+
+ return ret == 0 ? iree_ok_status()
+ : iree_make_status(
+ IREE_STATUS_INTERNAL,
+ "executable entry point returned catastrophic error %d",
+ ret);
+}
+
+static const iree_hal_local_executable_vtable_t
+ iree_hal_system_executable_vtable = {
+ .base =
+ {
+ .destroy = iree_hal_system_executable_destroy,
+ },
+ .issue_call = iree_hal_system_executable_issue_call,
+};
+
+//===----------------------------------------------------------------------===//
+// iree_hal_system_library_loader_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_system_library_loader_t {
+ iree_hal_executable_loader_t base;
+ iree_allocator_t host_allocator;
+} iree_hal_system_library_loader_t;
+
+static const iree_hal_executable_loader_vtable_t
+ iree_hal_system_library_loader_vtable;
+
+iree_status_t iree_hal_system_library_loader_create(
+ iree_hal_executable_import_provider_t import_provider,
+ iree_allocator_t host_allocator,
+ iree_hal_executable_loader_t** out_executable_loader) {
+ IREE_ASSERT_ARGUMENT(out_executable_loader);
+ *out_executable_loader = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_system_library_loader_t* executable_loader = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ host_allocator, sizeof(*executable_loader), (void**)&executable_loader);
+ if (iree_status_is_ok(status)) {
+ iree_hal_executable_loader_initialize(
+ &iree_hal_system_library_loader_vtable, import_provider,
+ &executable_loader->base);
+ executable_loader->host_allocator = host_allocator;
+ *out_executable_loader = (iree_hal_executable_loader_t*)executable_loader;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_system_library_loader_destroy(
+ iree_hal_executable_loader_t* base_executable_loader) {
+ iree_hal_system_library_loader_t* executable_loader =
+ (iree_hal_system_library_loader_t*)base_executable_loader;
+ iree_allocator_t host_allocator = executable_loader->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(host_allocator, executable_loader);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+#if defined(IREE_PLATFORM_APPLE)
+#define IREE_PLATFORM_DYLIB_TYPE "dylib"
+#elif defined(IREE_PLATFORM_WINDOWS)
+#define IREE_PLATFORM_DYLIB_TYPE "dll"
+#elif defined(IREE_PLATFORM_EMSCRIPTEN)
+#define IREE_PLATFORM_DYLIB_TYPE "wasm"
+#else
+#define IREE_PLATFORM_DYLIB_TYPE "elf"
+#endif // IREE_PLATFORM_*
+
+static bool iree_hal_system_library_loader_query_support(
+ iree_hal_executable_loader_t* base_executable_loader,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format) {
+ return iree_string_view_equal(
+ executable_format,
+ iree_make_cstring_view("system-" IREE_PLATFORM_DYLIB_TYPE "-" IREE_ARCH));
+}
+
+static iree_status_t iree_hal_system_library_loader_try_load(
+ iree_hal_executable_loader_t* base_executable_loader,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable) {
+ iree_hal_system_library_loader_t* executable_loader =
+ (iree_hal_system_library_loader_t*)base_executable_loader;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Perform the load (and requisite disgusting hackery).
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_system_executable_create(
+ executable_params, base_executable_loader->import_provider,
+ executable_loader->host_allocator, out_executable));
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static const iree_hal_executable_loader_vtable_t
+ iree_hal_system_library_loader_vtable = {
+ .destroy = iree_hal_system_library_loader_destroy,
+ .query_support = iree_hal_system_library_loader_query_support,
+ .try_load = iree_hal_system_library_loader_try_load,
+};
diff --git a/runtime/src/iree/hal/local/loaders/system_library_loader.h b/runtime/src/iree/hal/local/loaders/system_library_loader.h
new file mode 100644
index 0000000..23ffdd0
--- /dev/null
+++ b/runtime/src/iree/hal/local/loaders/system_library_loader.h
@@ -0,0 +1,35 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_LOADERS_SYSTEM_LIBRARY_LOADER_H_
+#define IREE_HAL_LOCAL_LOADERS_SYSTEM_LIBRARY_LOADER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/local/executable_loader.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates an executable loader that can load files from platform-supported
+// dynamic libraries (such as .dylib on darwin, .so on linux, .dll on windows).
+//
+// This uses the legacy "dylib"-style format that will be deleted soon and is
+// only a placeholder until the compiler can be switched to output
+// iree_hal_executable_library_t-compatible files.
+iree_status_t iree_hal_system_library_loader_create(
+ iree_hal_executable_import_provider_t import_provider,
+ iree_allocator_t host_allocator,
+ iree_hal_executable_loader_t** out_executable_loader);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_LOADERS_SYSTEM_LIBRARY_LOADER_H_
diff --git a/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c b/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c
new file mode 100644
index 0000000..31f59c1
--- /dev/null
+++ b/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c
@@ -0,0 +1,571 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/loaders/vmvx_module_loader.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/executable_library.h"
+#include "iree/hal/local/local_executable.h"
+#include "iree/hal/local/local_executable_layout.h"
+#include "iree/modules/vmvx/module.h"
+#include "iree/vm/bytecode_module.h"
+
+//===----------------------------------------------------------------------===//
+// iree_hal_vmvx_executable_t
+//===----------------------------------------------------------------------===//
+
+#define IREE_VMVX_ENTRY_SIGNATURE "0rrriiiiiiiii_v"
+
+typedef struct iree_hal_vmvx_executable_t {
+ iree_hal_local_executable_t base;
+
+ // Context containing both the VMVX module and the loaded executable.
+ iree_vm_context_t* context;
+
+ // Resolved entry functions from the module.
+ iree_host_size_t entry_fn_count;
+ iree_vm_function_t entry_fns[];
+} iree_hal_vmvx_executable_t;
+
+static const iree_hal_local_executable_vtable_t iree_hal_vmvx_executable_vtable;
+
+// Verifies that an entry point function exported by the bytecode module matches
+// the calling convention we expect. This avoids the need to check it during
+// dispatch (where returning errors is hard and it'd be expensive).
+static iree_status_t iree_hal_vmvx_executable_verify_entry_point(
+ iree_vm_function_t* entry_fn) {
+ iree_vm_function_signature_t signature = iree_vm_function_signature(entry_fn);
+ if (!iree_string_view_equal(
+ signature.calling_convention,
+ iree_make_cstring_view(IREE_VMVX_ENTRY_SIGNATURE))) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "executable entry point does not match the expected calling "
+ "convention; expected '" IREE_VMVX_ENTRY_SIGNATURE "' but got '%.*s'",
+ (int)signature.calling_convention.size,
+ signature.calling_convention.data);
+ }
+ return iree_ok_status();
+}
+
+// Calls the __set_constants method on |executable| with the given |constants|.
+// We wrap the data in VM buffer and require that it is not retained by the
+// module; the constant values should be extracted and stored in globals.
+// Fails if the constant table is not of the required size.
+static iree_status_t iree_hal_vmvx_executable_set_constants(
+ iree_hal_vmvx_executable_t* executable, iree_vm_module_t* bytecode_module,
+ iree_host_size_t constant_count, const uint32_t* constants) {
+ // Look for the exported function. If it's not present then no constants are
+ // required and if it is then we must have at least one constant.
+ iree_vm_function_t set_function;
+ iree_status_t status = iree_vm_module_lookup_function_by_name(
+ bytecode_module, IREE_VM_FUNCTION_LINKAGE_EXPORT,
+ iree_make_cstring_view("__set_constants"), &set_function);
+ if (iree_status_is_not_found(status)) {
+ // No constants required by the executable.
+ iree_status_ignore(status);
+ if (constant_count > 0) {
+ // ...but we got provided some anyway.
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "executable has no executable-level constants "
+ "but %" PRIhsz " constants were provided",
+ constant_count);
+ }
+ return iree_ok_status(); // nothing to do
+ } else if (!iree_status_is_ok(status)) {
+ return status;
+ } else if (!constant_count || !constants) {
+ // Constants required but none provided.
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "executable requires executable-level constants "
+ "but none were provided");
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // TODO(benvanik): maybe just take the cost of an alloc + clone here so that
+ // we can more gracefully handle the module doing weird things with the inputs
+ // and constants.
+
+ // Wrap the constant memory in an on-stack buffer.
+ iree_vm_buffer_t buffer = {{0}};
+ iree_vm_buffer_initialize(
+ IREE_VM_BUFFER_ACCESS_ORIGIN_HOST,
+ iree_make_byte_span((void*)constants,
+ constant_count * sizeof(*constants)),
+ iree_allocator_null(), &buffer);
+
+ // Setup input list.
+ uint8_t input_storage[64] = {0};
+ iree_vm_list_t* inputs = NULL;
+ iree_vm_type_def_t element_type =
+ iree_vm_type_def_make_ref_type(iree_vm_buffer_type_id());
+ status = iree_vm_list_initialize(
+ iree_make_byte_span(input_storage, sizeof(input_storage)), &element_type,
+ 1, &inputs);
+ if (iree_status_is_ok(status)) {
+ iree_vm_ref_t buffer_ref = iree_vm_buffer_retain_ref(&buffer);
+ status = iree_vm_list_push_ref_move(inputs, &buffer_ref);
+ }
+
+ // Copy the executable constants into the module state.
+ if (iree_status_is_ok(status)) {
+ status =
+ iree_vm_invoke(executable->context, set_function,
+ IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/NULL, inputs,
+ /*outputs=*/NULL, executable->base.host_allocator);
+ }
+
+ // Inputs *must* be released here as we allocated it on the stack.
+ if (inputs) {
+ iree_vm_list_deinitialize(inputs);
+ }
+
+ // Buffer *must* be released here since we don't control the constant
+ // lifetime - this will abort if it's not.
+ iree_vm_buffer_deinitialize(&buffer);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static iree_status_t iree_hal_vmvx_executable_create(
+ iree_vm_context_t* context, iree_vm_module_t* bytecode_module,
+ const iree_hal_executable_params_t* executable_params,
+ iree_allocator_t host_allocator, iree_hal_executable_t** out_executable) {
+ IREE_ASSERT_ARGUMENT(context);
+ IREE_ASSERT_ARGUMENT(bytecode_module);
+ IREE_ASSERT_ARGUMENT(executable_params);
+ IREE_ASSERT_ARGUMENT(!executable_params->executable_layout_count ||
+ executable_params->executable_layouts);
+ IREE_ASSERT_ARGUMENT(out_executable);
+ *out_executable = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_host_size_t entry_count =
+ iree_vm_module_signature(bytecode_module).export_function_count;
+ if (entry_count != executable_params->executable_layout_count) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "executable provides %zu entry points but caller "
+ "provided %zu; must match",
+ entry_count,
+ executable_params->executable_layout_count);
+ }
+
+ iree_hal_vmvx_executable_t* executable = NULL;
+ iree_host_size_t total_size =
+ sizeof(*executable) + entry_count * sizeof(*executable->entry_fns) +
+ entry_count * sizeof(*executable->base.dispatch_attrs) +
+ executable_params->executable_layout_count *
+ sizeof(iree_hal_local_executable_layout_t);
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&executable);
+ iree_hal_executable_dispatch_attrs_v0_t* dispatch_attrs = NULL;
+ if (iree_status_is_ok(status)) {
+ uint8_t* ptr = (uint8_t*)executable + sizeof(*executable) +
+ entry_count * sizeof(*executable->entry_fns);
+ dispatch_attrs = (iree_hal_executable_dispatch_attrs_v0_t*)ptr;
+ ptr += entry_count * sizeof(*executable->base.dispatch_attrs);
+ iree_hal_local_executable_layout_t** executable_layouts_ptr =
+ (iree_hal_local_executable_layout_t**)ptr;
+ iree_hal_local_executable_initialize(
+ &iree_hal_vmvx_executable_vtable,
+ executable_params->executable_layout_count,
+ executable_params->executable_layouts, executable_layouts_ptr,
+ host_allocator, &executable->base);
+ executable->context = context;
+ executable->base.dispatch_attrs = dispatch_attrs;
+ iree_vm_context_retain(executable->context);
+
+ executable->entry_fn_count = entry_count;
+ for (iree_host_size_t i = 0; i < executable->entry_fn_count; ++i) {
+ status = iree_vm_module_lookup_function_by_ordinal(
+ bytecode_module, IREE_VM_FUNCTION_LINKAGE_EXPORT, i,
+ &executable->entry_fns[i]);
+ if (!iree_status_is_ok(status)) break;
+ status = iree_hal_vmvx_executable_verify_entry_point(
+ &executable->entry_fns[i]);
+ if (!iree_status_is_ok(status)) break;
+ }
+ }
+
+ // Query the optional local workgroup size from each entry point.
+ if (iree_status_is_ok(status)) {
+ // TODO(benvanik): pack this more efficiently; this requires a lot of
+ // queries and instead could be a single packed table we can directly
+ // reference from the module. Module-level reflection attrs would help.
+ for (iree_host_size_t i = 0; i < executable->entry_fn_count; ++i) {
+ iree_string_view_t local_memory_str = iree_vm_function_reflection_attr(
+ &executable->entry_fns[i], iree_make_cstring_view("local_memory"));
+ uint32_t local_memory_size = 0;
+ if (!iree_string_view_is_empty(local_memory_str)) {
+ iree_string_view_atoi_uint32(local_memory_str, &local_memory_size);
+ }
+ local_memory_size /= IREE_HAL_WORKGROUP_LOCAL_MEMORY_PAGE_SIZE;
+ dispatch_attrs[i].local_memory_pages = (uint16_t)local_memory_size;
+ }
+ }
+
+ // Provide executable constants to the module.
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_vmvx_executable_set_constants(
+ executable, bytecode_module, executable_params->constant_count,
+ executable_params->constants);
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_executable = (iree_hal_executable_t*)executable;
+ } else {
+ iree_hal_executable_release((iree_hal_executable_t*)executable);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_vmvx_executable_destroy(
+ iree_hal_executable_t* base_executable) {
+ iree_hal_vmvx_executable_t* executable =
+ (iree_hal_vmvx_executable_t*)base_executable;
+ iree_allocator_t host_allocator = executable->base.host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_vm_context_release(executable->context);
+ iree_hal_local_executable_deinitialize(
+ (iree_hal_local_executable_t*)base_executable);
+ iree_allocator_free(host_allocator, executable);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_vmvx_executable_issue_call(
+ iree_hal_local_executable_t* base_executable, iree_host_size_t ordinal,
+ const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
+ const iree_hal_executable_workgroup_state_v0_t* workgroup_state) {
+ iree_hal_vmvx_executable_t* executable =
+ (iree_hal_vmvx_executable_t*)base_executable;
+
+ if (IREE_UNLIKELY(ordinal >= executable->entry_fn_count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "entry point ordinal out of bounds");
+ }
+ iree_vm_function_t entry_fn = executable->entry_fns[ordinal];
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ iree_string_view_t entry_point_name = iree_vm_function_name(&entry_fn);
+ if (iree_string_view_is_empty(entry_point_name)) {
+ entry_point_name = iree_make_cstring_view("unknown_vmvx_call");
+ }
+ IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, entry_point_name.data,
+ entry_point_name.size);
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+ // On-stack interface local to this invocation.
+ // Note that we _could_ share this across all invocations in a dispatch, but
+ // it's tricky to find a good place when threading is happening and it's
+ // intentionally fairly cheap to construct by matching the dispatch_state.
+ // The list would only need to be constructed once and we could avoid the
+ // extraneous retain/releases and mappings.
+ iree_vm_type_def_t buffer_type =
+ iree_vm_type_def_make_ref_type(iree_vm_buffer_type_id());
+ iree_host_size_t binding_list_size =
+ iree_vm_list_storage_size(&buffer_type, dispatch_state->binding_count);
+ void* binding_list_storage = iree_alloca(binding_list_size);
+ iree_vm_list_t* binding_list = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_vm_list_initialize(
+ iree_make_byte_span(binding_list_storage, binding_list_size),
+ &buffer_type, dispatch_state->binding_count, &binding_list));
+ iree_vm_list_retain(binding_list); // for call
+
+ // Map bindings into on-stack VMVX buffers.
+ iree_vm_buffer_t* binding_buffers = (iree_vm_buffer_t*)iree_alloca(
+ dispatch_state->binding_count * sizeof(iree_vm_buffer_t));
+ for (iree_host_size_t i = 0; i < dispatch_state->binding_count; ++i) {
+ iree_vm_buffer_t* binding_buffer = &binding_buffers[i];
+ // TODO(benvanik): executable layout contains the required access
+ // information. We will likely want to encode a bitmap of mutable bindings
+ // such that we can quickly set the access bit, though.
+ iree_vm_buffer_access_t access =
+ IREE_VM_BUFFER_ACCESS_MUTABLE | IREE_VM_BUFFER_ACCESS_ORIGIN_HOST;
+ iree_vm_buffer_initialize(
+ access,
+ iree_make_byte_span(dispatch_state->binding_ptrs[i],
+ dispatch_state->binding_lengths[i]),
+ iree_allocator_null(), binding_buffer);
+ iree_vm_ref_t ref = {0};
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_vm_ref_wrap_assign(binding_buffer, iree_vm_buffer_type_id(),
+ &ref));
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_vm_list_push_ref_retain(binding_list, &ref));
+ }
+
+ // Acquire workgroup local memory for the dispatch.
+ iree_vm_buffer_t local_memory_buffer;
+ iree_vm_buffer_initialize(
+ IREE_VM_BUFFER_ACCESS_MUTABLE | IREE_VM_BUFFER_ACCESS_ORIGIN_HOST,
+ iree_make_byte_span(workgroup_state->local_memory,
+ workgroup_state->local_memory_size),
+ iree_allocator_null(), &local_memory_buffer);
+ iree_vm_buffer_retain(&local_memory_buffer); // for call
+
+ // Map the push constant memory directly from the dispatch state.
+ iree_vm_buffer_t constants_buffer;
+ iree_vm_buffer_initialize(
+ IREE_VM_BUFFER_ACCESS_ORIGIN_HOST,
+ iree_make_byte_span(
+ (void*)dispatch_state->push_constants,
+ sizeof(uint32_t) * dispatch_state->push_constant_count),
+ iree_allocator_null(), &constants_buffer);
+ iree_vm_buffer_retain(&constants_buffer); // for call
+
+ // Prepare call argument buffer. We've verified the signature on creation and
+ // know the exact format we can assume here.
+ //
+ // func.func @entry(
+ // %local_memory: !vmvx.buffer,
+ // %constants: !vmvx.buffer,
+ // %bindings: !util.list<!vmvx.buffer>,
+ // %workgroup_id_x: index,
+ // %workgroup_id_y: index,
+ // %workgroup_id_z: index,
+ // %workgroup_size_x: index,
+ // %workgroup_size_y: index,
+ // %workgroup_size_z: index,
+ // %workgroup_count_x: index,
+ // %workgroup_count_y: index,
+ // %workgroup_count_z: index
+ // )
+ //
+ // NOTE: this level of the VM ABI is supported - but may change in the future.
+ // Users should prefer to use the invocation API that is more stable.
+ struct {
+ iree_vm_ref_t local_memory;
+ iree_vm_ref_t constants;
+ iree_vm_ref_t bindings;
+ uint32_t workgroup_id_x;
+ uint32_t workgroup_id_y;
+ uint32_t workgroup_id_z;
+ uint32_t workgroup_size_x;
+ uint32_t workgroup_size_y;
+ uint32_t workgroup_size_z;
+ uint32_t workgroup_count_x;
+ uint32_t workgroup_count_y;
+ uint32_t workgroup_count_z;
+ } call_args = {
+ .local_memory =
+ {
+ .type = iree_vm_buffer_type_id(),
+ .ptr = &local_memory_buffer,
+ .offsetof_counter = 0,
+ },
+ .constants =
+ {
+ .type = iree_vm_buffer_type_id(),
+ .ptr = &constants_buffer,
+ .offsetof_counter = 0,
+ },
+ .bindings =
+ {
+ .type = iree_vm_list_type_id(),
+ .ptr = binding_list,
+ .offsetof_counter = 0,
+ },
+ .workgroup_id_x = workgroup_state->workgroup_id_x,
+ .workgroup_id_y = workgroup_state->workgroup_id_y,
+ .workgroup_id_z = workgroup_state->workgroup_id_z,
+ .workgroup_size_x = dispatch_state->workgroup_size_x,
+ .workgroup_size_y = dispatch_state->workgroup_size_y,
+ .workgroup_size_z = dispatch_state->workgroup_size_z,
+ .workgroup_count_x = dispatch_state->workgroup_count_x,
+ .workgroup_count_y = dispatch_state->workgroup_count_y,
+ .workgroup_count_z = dispatch_state->workgroup_count_z,
+ };
+
+ // On-stack stack. We really do abuse the stack too much here.
+ // TODO(benvanik): pass in an iree_arena_t that can be used for this.
+ IREE_VM_INLINE_STACK_INITIALIZE(
+ stack, IREE_VM_INVOCATION_FLAG_NONE,
+ iree_vm_context_state_resolver(executable->context),
+ executable->base.host_allocator);
+
+ // Direct call interface.
+ iree_vm_function_call_t call;
+ memset(&call, 0, sizeof(call));
+ call.function = entry_fn;
+ call.arguments = iree_make_byte_span(&call_args, sizeof(call_args));
+ call.results = iree_make_byte_span(NULL, 0);
+ iree_vm_execution_result_t result;
+ iree_status_t status =
+ entry_fn.module->begin_call(entry_fn.module->self, stack, &call, &result);
+
+ iree_vm_stack_deinitialize(stack);
+
+ iree_vm_buffer_deinitialize(&local_memory_buffer);
+ iree_vm_buffer_deinitialize(&constants_buffer);
+ iree_vm_list_deinitialize(binding_list);
+ for (iree_host_size_t i = 0; i < dispatch_state->binding_count; ++i) {
+ iree_vm_buffer_deinitialize(&binding_buffers[i]);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static const iree_hal_local_executable_vtable_t
+ iree_hal_vmvx_executable_vtable = {
+ .base =
+ {
+ .destroy = iree_hal_vmvx_executable_destroy,
+ },
+ .issue_call = iree_hal_vmvx_executable_issue_call,
+};
+
+//===----------------------------------------------------------------------===//
+// iree_hal_vmvx_module_loader_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_vmvx_module_loader_t {
+ iree_hal_executable_loader_t base;
+ iree_allocator_t host_allocator;
+ iree_vm_instance_t* instance;
+ iree_vm_module_t* vmvx_module;
+} iree_hal_vmvx_module_loader_t;
+
+static const iree_hal_executable_loader_vtable_t
+ iree_hal_vmvx_module_loader_vtable;
+
+iree_status_t iree_hal_vmvx_module_loader_create(
+ iree_vm_instance_t* instance, iree_allocator_t host_allocator,
+ iree_hal_executable_loader_t** out_executable_loader) {
+ IREE_ASSERT_ARGUMENT(instance);
+ IREE_ASSERT_ARGUMENT(out_executable_loader);
+ *out_executable_loader = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // A single VMVX module is shared across all loaded executables.
+ IREE_RETURN_IF_ERROR(iree_vmvx_module_register_types());
+ iree_vm_module_t* vmvx_module = NULL;
+ IREE_RETURN_IF_ERROR(iree_vmvx_module_create(host_allocator, &vmvx_module));
+
+ iree_hal_vmvx_module_loader_t* executable_loader = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ host_allocator, sizeof(*executable_loader), (void**)&executable_loader);
+ if (iree_status_is_ok(status)) {
+ iree_hal_executable_loader_initialize(
+ &iree_hal_vmvx_module_loader_vtable,
+ iree_hal_executable_import_provider_null(), &executable_loader->base);
+ executable_loader->host_allocator = host_allocator;
+ executable_loader->instance = instance;
+ iree_vm_instance_retain(executable_loader->instance);
+ executable_loader->vmvx_module = vmvx_module;
+ iree_vm_module_retain(executable_loader->vmvx_module);
+ *out_executable_loader = (iree_hal_executable_loader_t*)executable_loader;
+ }
+
+ iree_vm_module_release(vmvx_module);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_vmvx_module_loader_destroy(
+ iree_hal_executable_loader_t* base_executable_loader) {
+ iree_hal_vmvx_module_loader_t* executable_loader =
+ (iree_hal_vmvx_module_loader_t*)base_executable_loader;
+ iree_allocator_t host_allocator = executable_loader->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_vm_module_release(executable_loader->vmvx_module);
+ iree_vm_instance_release(executable_loader->instance);
+ iree_allocator_free(host_allocator, executable_loader);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static bool iree_hal_vmvx_module_loader_query_support(
+ iree_hal_executable_loader_t* base_executable_loader,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format) {
+ return iree_string_view_equal(executable_format,
+ iree_make_cstring_view("vmvx-bytecode-fb"));
+}
+
+static iree_status_t iree_hal_vmvx_module_loader_try_load(
+ iree_hal_executable_loader_t* base_executable_loader,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable) {
+ iree_hal_vmvx_module_loader_t* executable_loader =
+ (iree_hal_vmvx_module_loader_t*)base_executable_loader;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_const_byte_span_t bytecode_module_data =
+ executable_params->executable_data;
+
+ // If the caching mode allows for aliasing the existing flatbuffer data then
+ // we avoid allocations and just pass the pointer on through. The caller
+ // ensures that the data remains valid for the duration the executable is
+ // loaded. Otherwise, we clone it and let the bytecode module take ownership.
+ iree_allocator_t bytecode_module_allocator;
+ if (iree_all_bits_set(executable_params->caching_mode,
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ALIAS_PROVIDED_DATA)) {
+ // Zero-copy route.
+ bytecode_module_allocator = iree_allocator_null();
+ } else {
+ bytecode_module_allocator = executable_loader->host_allocator;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_clone(executable_loader->host_allocator,
+ executable_params->executable_data,
+ (void**)&bytecode_module_data.data));
+ }
+
+ // Load the user-provided bytecode module. We pass ownership of the data (if
+ // we have it) to the module to manage.
+ iree_vm_module_t* bytecode_module = NULL;
+ iree_status_t status = iree_vm_bytecode_module_create(
+ executable_params->executable_data, bytecode_module_allocator,
+ executable_loader->host_allocator, &bytecode_module);
+
+ // Create the context tying together the shared VMVX module and the
+ // user-provided module that references it. If we wanted to allow custom
+ // modules here for user-provided functions we'd mix them in here.
+ iree_vm_context_t* context = NULL;
+ if (iree_status_is_ok(status)) {
+ iree_vm_module_t* modules[2] = {
+ executable_loader->vmvx_module,
+ bytecode_module,
+ };
+ status = iree_vm_context_create_with_modules(
+ executable_loader->instance, IREE_VM_CONTEXT_FLAG_NONE, modules,
+ IREE_ARRAYSIZE(modules), executable_loader->host_allocator, &context);
+ }
+
+ // Executable takes ownership of the entire context (including the bytecode
+ // module, which itself may own the underlying allocation).
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_vmvx_executable_create(
+ context, bytecode_module, executable_params,
+ executable_loader->host_allocator, out_executable);
+ }
+
+ iree_vm_context_release(context);
+ iree_vm_module_release(bytecode_module);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static const iree_hal_executable_loader_vtable_t
+ iree_hal_vmvx_module_loader_vtable = {
+ .destroy = iree_hal_vmvx_module_loader_destroy,
+ .query_support = iree_hal_vmvx_module_loader_query_support,
+ .try_load = iree_hal_vmvx_module_loader_try_load,
+};
diff --git a/runtime/src/iree/hal/local/loaders/vmvx_module_loader.h b/runtime/src/iree/hal/local/loaders/vmvx_module_loader.h
new file mode 100644
index 0000000..c080052
--- /dev/null
+++ b/runtime/src/iree/hal/local/loaders/vmvx_module_loader.h
@@ -0,0 +1,31 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_LOADERS_VMVX_MODULE_LOADER_H_
+#define IREE_HAL_LOCAL_LOADERS_VMVX_MODULE_LOADER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/local/executable_loader.h"
+#include "iree/vm/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates an executable loader that can load compiled IREE VM bytecode modules
+// using the VMVX module. |instance| will be used for all loaded contexts.
+iree_status_t iree_hal_vmvx_module_loader_create(
+ iree_vm_instance_t* instance, iree_allocator_t host_allocator,
+ iree_hal_executable_loader_t** out_executable_loader);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_LOADERS_VMVX_MODULE_LOADER_H_
diff --git a/runtime/src/iree/hal/local/local_descriptor_set.c b/runtime/src/iree/hal/local/local_descriptor_set.c
new file mode 100644
index 0000000..c4d6210
--- /dev/null
+++ b/runtime/src/iree/hal/local/local_descriptor_set.c
@@ -0,0 +1,83 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/local_descriptor_set.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+
+static const iree_hal_descriptor_set_vtable_t
+ iree_hal_local_descriptor_set_vtable;
+
+iree_hal_local_descriptor_set_t* iree_hal_local_descriptor_set_cast(
+ iree_hal_descriptor_set_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_local_descriptor_set_vtable);
+ return (iree_hal_local_descriptor_set_t*)base_value;
+}
+
+iree_status_t iree_hal_local_descriptor_set_create(
+ iree_hal_descriptor_set_layout_t* base_layout,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings,
+ iree_hal_descriptor_set_t** out_descriptor_set) {
+ IREE_ASSERT_ARGUMENT(base_layout);
+ IREE_ASSERT_ARGUMENT(!binding_count || bindings);
+ IREE_ASSERT_ARGUMENT(out_descriptor_set);
+ *out_descriptor_set = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_local_descriptor_set_layout_t* local_layout =
+ iree_hal_local_descriptor_set_layout_cast(base_layout);
+ IREE_ASSERT_ARGUMENT(local_layout);
+
+ iree_hal_local_descriptor_set_t* descriptor_set = NULL;
+ iree_host_size_t total_size =
+ sizeof(*descriptor_set) +
+ binding_count * sizeof(*descriptor_set->bindings);
+ iree_status_t status = iree_allocator_malloc(
+ local_layout->host_allocator, total_size, (void**)&descriptor_set);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_local_descriptor_set_vtable,
+ &descriptor_set->resource);
+ descriptor_set->layout = local_layout;
+ iree_hal_descriptor_set_layout_retain(base_layout);
+ descriptor_set->binding_count = binding_count;
+ memcpy(descriptor_set->bindings, bindings,
+ binding_count * sizeof(iree_hal_descriptor_set_binding_t));
+ for (iree_host_size_t i = 0; i < descriptor_set->binding_count; ++i) {
+ iree_hal_buffer_retain(descriptor_set->bindings[i].buffer);
+ }
+ *out_descriptor_set = (iree_hal_descriptor_set_t*)descriptor_set;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_local_descriptor_set_destroy(
+ iree_hal_descriptor_set_t* base_descriptor_set) {
+ iree_hal_local_descriptor_set_t* descriptor_set =
+ iree_hal_local_descriptor_set_cast(base_descriptor_set);
+ iree_allocator_t host_allocator = descriptor_set->layout->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ for (iree_host_size_t i = 0; i < descriptor_set->binding_count; ++i) {
+ iree_hal_buffer_release(descriptor_set->bindings[i].buffer);
+ }
+ iree_hal_descriptor_set_layout_release(
+ (iree_hal_descriptor_set_layout_t*)descriptor_set->layout);
+ iree_allocator_free(host_allocator, descriptor_set);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static const iree_hal_descriptor_set_vtable_t
+ iree_hal_local_descriptor_set_vtable = {
+ .destroy = iree_hal_local_descriptor_set_destroy,
+};
diff --git a/runtime/src/iree/hal/local/local_descriptor_set.h b/runtime/src/iree/hal/local/local_descriptor_set.h
new file mode 100644
index 0000000..eba78a0
--- /dev/null
+++ b/runtime/src/iree/hal/local/local_descriptor_set.h
@@ -0,0 +1,37 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_LOCAL_DESCRIPTOR_SET_H_
+#define IREE_HAL_LOCAL_LOCAL_DESCRIPTOR_SET_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/local_descriptor_set_layout.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_hal_local_descriptor_set_t {
+ iree_hal_resource_t resource;
+ iree_hal_local_descriptor_set_layout_t* layout;
+ iree_host_size_t binding_count;
+ iree_hal_descriptor_set_binding_t bindings[];
+} iree_hal_local_descriptor_set_t;
+
+iree_status_t iree_hal_local_descriptor_set_create(
+ iree_hal_descriptor_set_layout_t* layout, iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings,
+ iree_hal_descriptor_set_t** out_descriptor_set);
+
+iree_hal_local_descriptor_set_t* iree_hal_local_descriptor_set_cast(
+ iree_hal_descriptor_set_t* base_value);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_LOCAL_DESCRIPTOR_SET_H_
diff --git a/runtime/src/iree/hal/local/local_descriptor_set_layout.c b/runtime/src/iree/hal/local/local_descriptor_set_layout.c
new file mode 100644
index 0000000..b3af9c6
--- /dev/null
+++ b/runtime/src/iree/hal/local/local_descriptor_set_layout.c
@@ -0,0 +1,77 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/local_descriptor_set_layout.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+
+static const iree_hal_descriptor_set_layout_vtable_t
+ iree_hal_local_descriptor_set_layout_vtable;
+
+iree_hal_local_descriptor_set_layout_t*
+iree_hal_local_descriptor_set_layout_cast(
+ iree_hal_descriptor_set_layout_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value,
+ &iree_hal_local_descriptor_set_layout_vtable);
+ return (iree_hal_local_descriptor_set_layout_t*)base_value;
+}
+
+iree_status_t iree_hal_local_descriptor_set_layout_create(
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ iree_allocator_t host_allocator,
+ iree_hal_descriptor_set_layout_t** out_descriptor_set_layout) {
+ IREE_ASSERT_ARGUMENT(!binding_count || bindings);
+ IREE_ASSERT_ARGUMENT(out_descriptor_set_layout);
+ *out_descriptor_set_layout = NULL;
+ if (binding_count > IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT, "binding count %zu over the limit of %d",
+ binding_count, IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT);
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_local_descriptor_set_layout_t* layout = NULL;
+ iree_host_size_t total_size =
+ sizeof(*layout) + binding_count * sizeof(*layout->bindings);
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&layout);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_local_descriptor_set_layout_vtable,
+ &layout->resource);
+ layout->host_allocator = host_allocator;
+ layout->usage_type = usage_type;
+ layout->binding_count = binding_count;
+ memcpy(layout->bindings, bindings,
+ binding_count * sizeof(iree_hal_descriptor_set_layout_binding_t));
+ *out_descriptor_set_layout = (iree_hal_descriptor_set_layout_t*)layout;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_local_descriptor_set_layout_destroy(
+ iree_hal_descriptor_set_layout_t* base_layout) {
+ iree_hal_local_descriptor_set_layout_t* layout =
+ iree_hal_local_descriptor_set_layout_cast(base_layout);
+ iree_allocator_t host_allocator = layout->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(host_allocator, layout);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static const iree_hal_descriptor_set_layout_vtable_t
+ iree_hal_local_descriptor_set_layout_vtable = {
+ .destroy = iree_hal_local_descriptor_set_layout_destroy,
+};
diff --git a/runtime/src/iree/hal/local/local_descriptor_set_layout.h b/runtime/src/iree/hal/local/local_descriptor_set_layout.h
new file mode 100644
index 0000000..4e11ce2
--- /dev/null
+++ b/runtime/src/iree/hal/local/local_descriptor_set_layout.h
@@ -0,0 +1,42 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_LOCAL_DESCRIPTOR_SET_LAYOUT_H_
+#define IREE_HAL_LOCAL_LOCAL_DESCRIPTOR_SET_LAYOUT_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#define IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT 32
+
+typedef struct iree_hal_local_descriptor_set_layout_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+ iree_hal_descriptor_set_layout_usage_type_t usage_type;
+ iree_host_size_t binding_count;
+ iree_hal_descriptor_set_layout_binding_t bindings[];
+} iree_hal_local_descriptor_set_layout_t;
+
+iree_status_t iree_hal_local_descriptor_set_layout_create(
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ iree_allocator_t host_allocator,
+ iree_hal_descriptor_set_layout_t** out_descriptor_set_layout);
+
+iree_hal_local_descriptor_set_layout_t*
+iree_hal_local_descriptor_set_layout_cast(
+ iree_hal_descriptor_set_layout_t* base_value);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_LOCAL_DESCRIPTOR_SET_LAYOUT_H_
diff --git a/runtime/src/iree/hal/local/local_executable.c b/runtime/src/iree/hal/local/local_executable.c
new file mode 100644
index 0000000..1fd92ec
--- /dev/null
+++ b/runtime/src/iree/hal/local/local_executable.c
@@ -0,0 +1,108 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/local_executable.h"
+
+#include "iree/base/tracing.h"
+#include "iree/hal/local/executable_environment.h"
+
+void iree_hal_local_executable_initialize(
+ const iree_hal_local_executable_vtable_t* vtable,
+ iree_host_size_t executable_layout_count,
+ iree_hal_executable_layout_t* const* source_executable_layouts,
+ iree_hal_local_executable_layout_t** target_executable_layouts,
+ iree_allocator_t host_allocator,
+ iree_hal_local_executable_t* out_base_executable) {
+ iree_hal_resource_initialize(vtable, &out_base_executable->resource);
+ out_base_executable->host_allocator = host_allocator;
+
+ out_base_executable->executable_layout_count = executable_layout_count;
+ out_base_executable->executable_layouts = target_executable_layouts;
+ for (iree_host_size_t i = 0; i < executable_layout_count; ++i) {
+ target_executable_layouts[i] =
+ (iree_hal_local_executable_layout_t*)source_executable_layouts[i];
+ iree_hal_executable_layout_retain(source_executable_layouts[i]);
+ }
+
+ // Function attributes are optional and populated by the parent type.
+ out_base_executable->dispatch_attrs = NULL;
+
+ // Default environment with no imports assigned.
+ iree_hal_executable_environment_initialize(host_allocator,
+ &out_base_executable->environment);
+}
+
+void iree_hal_local_executable_deinitialize(
+ iree_hal_local_executable_t* base_executable) {
+ for (iree_host_size_t i = 0; i < base_executable->executable_layout_count;
+ ++i) {
+ iree_hal_executable_layout_release(
+ (iree_hal_executable_layout_t*)base_executable->executable_layouts[i]);
+ }
+}
+
+iree_hal_local_executable_t* iree_hal_local_executable_cast(
+ iree_hal_executable_t* base_value) {
+ return (iree_hal_local_executable_t*)base_value;
+}
+
+iree_status_t iree_hal_local_executable_issue_call(
+ iree_hal_local_executable_t* executable, iree_host_size_t ordinal,
+ const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
+ const iree_hal_executable_workgroup_state_v0_t* workgroup_state) {
+ IREE_ASSERT_ARGUMENT(executable);
+ IREE_ASSERT_ARGUMENT(dispatch_state);
+ IREE_ASSERT_ARGUMENT(workgroup_state);
+ return ((const iree_hal_local_executable_vtable_t*)
+ executable->resource.vtable)
+ ->issue_call(executable, ordinal, dispatch_state, workgroup_state);
+}
+
+iree_status_t iree_hal_local_executable_issue_dispatch_inline(
+ iree_hal_local_executable_t* executable, iree_host_size_t ordinal,
+ const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
+ uint32_t processor_id, iree_byte_span_t local_memory) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ // TODO(benvanik): annotate with executable name to calculate total time.
+
+ const uint32_t workgroup_count_x = dispatch_state->workgroup_count_x;
+ const uint32_t workgroup_count_y = dispatch_state->workgroup_count_y;
+ const uint32_t workgroup_count_z = dispatch_state->workgroup_count_z;
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ char xyz_string[32];
+ int xyz_string_length =
+ snprintf(xyz_string, IREE_ARRAYSIZE(xyz_string), "%ux%ux%u",
+ workgroup_count_x, workgroup_count_y, workgroup_count_z);
+ IREE_TRACE_ZONE_APPEND_TEXT_STRING_VIEW(z0, xyz_string, xyz_string_length);
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+ iree_status_t status = iree_ok_status();
+
+ iree_alignas(64) iree_hal_executable_workgroup_state_v0_t workgroup_state = {
+ .workgroup_id_x = 0,
+ .workgroup_id_y = 0,
+ .workgroup_id_z = 0,
+ .processor_id = processor_id,
+ .local_memory = local_memory.data,
+ .local_memory_size = (size_t)local_memory.data_length,
+ };
+ for (uint32_t z = 0; z < workgroup_count_z; ++z) {
+ workgroup_state.workgroup_id_z = z;
+ for (uint32_t y = 0; y < workgroup_count_y; ++y) {
+ workgroup_state.workgroup_id_y = y;
+ for (uint32_t x = 0; x < workgroup_count_x; ++x) {
+ workgroup_state.workgroup_id_x = x;
+ status = iree_hal_local_executable_issue_call(
+ executable, ordinal, dispatch_state, &workgroup_state);
+ if (!iree_status_is_ok(status)) break;
+ }
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/local/local_executable.h b/runtime/src/iree/hal/local/local_executable.h
new file mode 100644
index 0000000..d9a42e4
--- /dev/null
+++ b/runtime/src/iree/hal/local/local_executable.h
@@ -0,0 +1,76 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_LOCAL_EXECUTABLE_H_
+#define IREE_HAL_LOCAL_LOCAL_EXECUTABLE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/executable_library.h"
+#include "iree/hal/local/local_executable_layout.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_hal_local_executable_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+ iree_host_size_t executable_layout_count;
+ iree_hal_local_executable_layout_t** executable_layouts;
+
+ // Defines per-entry point how much workgroup local memory is required.
+ // Contains entries with 0 to indicate no local memory is required or >0 in
+ // units of IREE_HAL_WORKGROUP_LOCAL_MEMORY_PAGE_SIZE for the minimum amount
+ // of memory required by the function.
+ const iree_hal_executable_dispatch_attrs_v0_t* dispatch_attrs;
+
+ // Execution environment.
+ iree_hal_executable_environment_v0_t environment;
+} iree_hal_local_executable_t;
+
+typedef struct iree_hal_local_executable_vtable_t {
+ iree_hal_executable_vtable_t base;
+
+ iree_status_t(IREE_API_PTR* issue_call)(
+ iree_hal_local_executable_t* executable, iree_host_size_t ordinal,
+ const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
+ const iree_hal_executable_workgroup_state_v0_t* workgroup_state);
+} iree_hal_local_executable_vtable_t;
+
+// Initializes the local executable base type.
+//
+// Callers must allocate memory for |target_executable_layouts| with at least
+// `executable_layout_count * sizeof(*target_executable_layouts)` bytes.
+void iree_hal_local_executable_initialize(
+ const iree_hal_local_executable_vtable_t* vtable,
+ iree_host_size_t executable_layout_count,
+ iree_hal_executable_layout_t* const* source_executable_layouts,
+ iree_hal_local_executable_layout_t** target_executable_layouts,
+ iree_allocator_t host_allocator,
+ iree_hal_local_executable_t* out_base_executable);
+
+void iree_hal_local_executable_deinitialize(
+ iree_hal_local_executable_t* base_executable);
+
+iree_hal_local_executable_t* iree_hal_local_executable_cast(
+ iree_hal_executable_t* base_value);
+
+iree_status_t iree_hal_local_executable_issue_call(
+ iree_hal_local_executable_t* executable, iree_host_size_t ordinal,
+ const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
+ const iree_hal_executable_workgroup_state_v0_t* workgroup_state);
+
+iree_status_t iree_hal_local_executable_issue_dispatch_inline(
+ iree_hal_local_executable_t* executable, iree_host_size_t ordinal,
+ const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
+ uint32_t processor_id, iree_byte_span_t local_memory);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_LOCAL_EXECUTABLE_H_
diff --git a/runtime/src/iree/hal/local/local_executable_cache.c b/runtime/src/iree/hal/local/local_executable_cache.c
new file mode 100644
index 0000000..c446c28
--- /dev/null
+++ b/runtime/src/iree/hal/local/local_executable_cache.c
@@ -0,0 +1,139 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/local_executable_cache.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "iree/base/tracing.h"
+
+typedef struct iree_hal_local_executable_cache_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+ iree_string_view_t identifier;
+ iree_host_size_t loader_count;
+ iree_hal_executable_loader_t* loaders[];
+} iree_hal_local_executable_cache_t;
+
+static const iree_hal_executable_cache_vtable_t
+ iree_hal_local_executable_cache_vtable;
+
+static iree_hal_local_executable_cache_t* iree_hal_local_executable_cache_cast(
+ iree_hal_executable_cache_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_local_executable_cache_vtable);
+ return (iree_hal_local_executable_cache_t*)base_value;
+}
+
+iree_status_t iree_hal_local_executable_cache_create(
+ iree_string_view_t identifier, iree_host_size_t loader_count,
+ iree_hal_executable_loader_t** loaders, iree_allocator_t host_allocator,
+ iree_hal_executable_cache_t** out_executable_cache) {
+ IREE_ASSERT_ARGUMENT(!loader_count || loaders);
+ IREE_ASSERT_ARGUMENT(out_executable_cache);
+ *out_executable_cache = NULL;
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_local_executable_cache_t* executable_cache = NULL;
+ iree_host_size_t total_size =
+ sizeof(*executable_cache) +
+ loader_count * sizeof(*executable_cache->loaders) + identifier.size;
+ iree_status_t status = iree_allocator_malloc(host_allocator, total_size,
+ (void**)&executable_cache);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_local_executable_cache_vtable,
+ &executable_cache->resource);
+ executable_cache->host_allocator = host_allocator;
+ iree_string_view_append_to_buffer(
+ identifier, &executable_cache->identifier,
+ (char*)executable_cache + total_size - identifier.size);
+
+ executable_cache->loader_count = loader_count;
+ for (iree_host_size_t i = 0; i < executable_cache->loader_count; ++i) {
+ executable_cache->loaders[i] = loaders[i];
+ iree_hal_executable_loader_retain(executable_cache->loaders[i]);
+ }
+
+ *out_executable_cache = (iree_hal_executable_cache_t*)executable_cache;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_local_executable_cache_destroy(
+ iree_hal_executable_cache_t* base_executable_cache) {
+ iree_hal_local_executable_cache_t* executable_cache =
+ iree_hal_local_executable_cache_cast(base_executable_cache);
+ iree_allocator_t host_allocator = executable_cache->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ for (iree_host_size_t i = 0; i < executable_cache->loader_count; ++i) {
+ iree_hal_executable_loader_release(executable_cache->loaders[i]);
+ }
+ iree_allocator_free(host_allocator, executable_cache);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static bool iree_hal_local_executable_cache_can_prepare_format(
+ iree_hal_executable_cache_t* base_executable_cache,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format) {
+ iree_hal_local_executable_cache_t* executable_cache =
+ iree_hal_local_executable_cache_cast(base_executable_cache);
+ for (iree_host_size_t i = 0; i < executable_cache->loader_count; ++i) {
+ if (iree_hal_executable_loader_query_support(
+ executable_cache->loaders[i], caching_mode, executable_format)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static iree_status_t iree_hal_local_executable_cache_prepare_executable(
+ iree_hal_executable_cache_t* base_executable_cache,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable) {
+ iree_hal_local_executable_cache_t* executable_cache =
+ iree_hal_local_executable_cache_cast(base_executable_cache);
+ for (iree_host_size_t i = 0; i < executable_cache->loader_count; ++i) {
+ if (!iree_hal_executable_loader_query_support(
+ executable_cache->loaders[i], executable_params->caching_mode,
+ executable_params->executable_format)) {
+ // Loader definitely can't handle the executable; no use trying so skip.
+ continue;
+ }
+ // The loader _may_ handle the executable; if the specific executable is not
+ // supported then the try will fail with IREE_STATUS_CANCELLED and we should
+ // continue trying other loaders.
+ iree_status_t status = iree_hal_executable_loader_try_load(
+ executable_cache->loaders[i], executable_params, out_executable);
+ if (iree_status_is_ok(status)) {
+ // Executable was successfully loaded.
+ return status;
+ } else if (!iree_status_is_cancelled(status)) {
+ // Error beyond just the try failing due to unsupported formats.
+ return status;
+ }
+ iree_status_ignore(status);
+ }
+ return iree_make_status(
+ IREE_STATUS_NOT_FOUND,
+ "no executable loader registered for the given executable format '%.*s'",
+ (int)executable_params->executable_format.size,
+ executable_params->executable_format.data);
+}
+
+static const iree_hal_executable_cache_vtable_t
+ iree_hal_local_executable_cache_vtable = {
+ .destroy = iree_hal_local_executable_cache_destroy,
+ .can_prepare_format =
+ iree_hal_local_executable_cache_can_prepare_format,
+ .prepare_executable =
+ iree_hal_local_executable_cache_prepare_executable,
+};
diff --git a/runtime/src/iree/hal/local/local_executable_cache.h b/runtime/src/iree/hal/local/local_executable_cache.h
new file mode 100644
index 0000000..0bec265
--- /dev/null
+++ b/runtime/src/iree/hal/local/local_executable_cache.h
@@ -0,0 +1,35 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_LOCAL_EXECUTABLE_CACHE_H_
+#define IREE_HAL_LOCAL_LOCAL_EXECUTABLE_CACHE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/executable_loader.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// TODO(benvanik): when we refactor executable caches this can become something
+// more specialized; like nop_executable_cache (does nothing but pass through)
+// or inproc_lru_executable_cache (simple in-memory LRU of recent executables).
+//
+// We can also set this up so they share storage. Ideally a JIT'ed executable in
+// one device is the same JIT'ed executable in another, and in multi-tenant
+// situations we're likely to want that isolation _and_ sharing.
+
+iree_status_t iree_hal_local_executable_cache_create(
+ iree_string_view_t identifier, iree_host_size_t loader_count,
+ iree_hal_executable_loader_t** loaders, iree_allocator_t host_allocator,
+ iree_hal_executable_cache_t** out_executable_cache);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_LOCAL_EXECUTABLE_CACHE_H_
diff --git a/runtime/src/iree/hal/local/local_executable_layout.c b/runtime/src/iree/hal/local/local_executable_layout.c
new file mode 100644
index 0000000..360a1b0
--- /dev/null
+++ b/runtime/src/iree/hal/local/local_executable_layout.c
@@ -0,0 +1,107 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/local_executable_layout.h"
+
+#include <stddef.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/local/local_descriptor_set_layout.h"
+
+static const iree_hal_executable_layout_vtable_t
+ iree_hal_local_executable_layout_vtable;
+
+iree_hal_local_executable_layout_t* iree_hal_local_executable_layout_cast(
+ iree_hal_executable_layout_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_local_executable_layout_vtable);
+ return (iree_hal_local_executable_layout_t*)base_value;
+}
+
+iree_status_t iree_hal_local_executable_layout_create(
+ iree_host_size_t push_constants, iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ iree_allocator_t host_allocator,
+ iree_hal_executable_layout_t** out_executable_layout) {
+ IREE_ASSERT_ARGUMENT(!set_layout_count || set_layouts);
+ IREE_ASSERT_ARGUMENT(out_executable_layout);
+ *out_executable_layout = NULL;
+ if (set_layout_count > IREE_HAL_LOCAL_MAX_DESCRIPTOR_SET_COUNT) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "set layout count %zu over the limit of %d",
+ set_layout_count,
+ IREE_HAL_LOCAL_MAX_DESCRIPTOR_SET_COUNT);
+ }
+ if (push_constants > IREE_HAL_LOCAL_MAX_PUSH_CONSTANT_COUNT) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "push constant count %zu over the limit of %d",
+ push_constants,
+ IREE_HAL_LOCAL_MAX_PUSH_CONSTANT_COUNT);
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_host_size_t total_size =
+ sizeof(iree_hal_local_executable_layout_t) +
+ set_layout_count * sizeof(iree_hal_descriptor_set_layout_t*);
+
+ iree_hal_local_executable_layout_t* layout = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&layout);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_local_executable_layout_vtable,
+ &layout->resource);
+ layout->host_allocator = host_allocator;
+ layout->push_constants = push_constants;
+ layout->dynamic_binding_count = 0;
+ layout->used_bindings = 0;
+ layout->set_layout_count = set_layout_count;
+ for (iree_host_size_t i = 0; i < set_layout_count; ++i) {
+ layout->set_layouts[i] = set_layouts[i];
+ iree_hal_descriptor_set_layout_retain(layout->set_layouts[i]);
+
+ iree_hal_local_descriptor_set_layout_t* local_set_layout =
+ iree_hal_local_descriptor_set_layout_cast(set_layouts[i]);
+ for (iree_host_size_t j = 0; j < local_set_layout->binding_count; ++j) {
+ const iree_hal_descriptor_set_layout_binding_t* binding =
+ &local_set_layout->bindings[j];
+ layout->used_bindings |=
+ 1ull << (i * IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT + j);
+ switch (binding->type) {
+ case IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ case IREE_HAL_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ ++layout->dynamic_binding_count;
+ break;
+ default:
+ continue;
+ }
+ }
+ }
+ *out_executable_layout = (iree_hal_executable_layout_t*)layout;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_local_executable_layout_destroy(
+ iree_hal_executable_layout_t* base_layout) {
+ iree_hal_local_executable_layout_t* layout =
+ iree_hal_local_executable_layout_cast(base_layout);
+ iree_allocator_t host_allocator = layout->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ for (iree_host_size_t i = 0; i < layout->set_layout_count; ++i) {
+ iree_hal_descriptor_set_layout_release(layout->set_layouts[i]);
+ }
+ iree_allocator_free(host_allocator, layout);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static const iree_hal_executable_layout_vtable_t
+ iree_hal_local_executable_layout_vtable = {
+ .destroy = iree_hal_local_executable_layout_destroy,
+};
diff --git a/runtime/src/iree/hal/local/local_executable_layout.h b/runtime/src/iree/hal/local/local_executable_layout.h
new file mode 100644
index 0000000..3732b9b
--- /dev/null
+++ b/runtime/src/iree/hal/local/local_executable_layout.h
@@ -0,0 +1,50 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_LOCAL_EXECUTABLE_LAYOUT_H_
+#define IREE_HAL_LOCAL_LOCAL_EXECUTABLE_LAYOUT_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#define IREE_HAL_LOCAL_MAX_DESCRIPTOR_SET_COUNT 2
+#define IREE_HAL_LOCAL_MAX_PUSH_CONSTANT_COUNT 64
+
+typedef uint64_t iree_hal_local_binding_mask_t;
+
+#define IREE_HAL_LOCAL_BINDING_MASK_BITS \
+ (sizeof(iree_hal_local_binding_mask_t) * 8)
+
+typedef struct iree_hal_local_executable_layout_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+ iree_host_size_t push_constants;
+ iree_host_size_t dynamic_binding_count;
+ iree_hal_local_binding_mask_t used_bindings;
+ iree_host_size_t set_layout_count;
+ iree_hal_descriptor_set_layout_t* set_layouts[];
+} iree_hal_local_executable_layout_t;
+
+iree_status_t iree_hal_local_executable_layout_create(
+ iree_host_size_t push_constants, iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ iree_allocator_t host_allocator,
+ iree_hal_executable_layout_t** out_executable_layout);
+
+iree_hal_local_executable_layout_t* iree_hal_local_executable_layout_cast(
+ iree_hal_executable_layout_t* base_value);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_LOCAL_EXECUTABLE_LAYOUT_H_
diff --git a/runtime/src/iree/hal/local/sync_device.c b/runtime/src/iree/hal/local/sync_device.c
new file mode 100644
index 0000000..ffb217a
--- /dev/null
+++ b/runtime/src/iree/hal/local/sync_device.c
@@ -0,0 +1,324 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/sync_device.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/local/inline_command_buffer.h"
+#include "iree/hal/local/local_descriptor_set.h"
+#include "iree/hal/local/local_descriptor_set_layout.h"
+#include "iree/hal/local/local_executable_cache.h"
+#include "iree/hal/local/local_executable_layout.h"
+#include "iree/hal/local/sync_event.h"
+#include "iree/hal/local/sync_semaphore.h"
+#include "iree/hal/utils/buffer_transfer.h"
+
+typedef struct iree_hal_sync_device_t {
+ iree_hal_resource_t resource;
+ iree_string_view_t identifier;
+
+ iree_allocator_t host_allocator;
+ iree_hal_allocator_t* device_allocator;
+
+ iree_hal_sync_semaphore_state_t semaphore_state;
+
+ iree_host_size_t loader_count;
+ iree_hal_executable_loader_t* loaders[];
+} iree_hal_sync_device_t;
+
+static const iree_hal_device_vtable_t iree_hal_sync_device_vtable;
+
+static iree_hal_sync_device_t* iree_hal_sync_device_cast(
+ iree_hal_device_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_sync_device_vtable);
+ return (iree_hal_sync_device_t*)base_value;
+}
+
+void iree_hal_sync_device_params_initialize(
+ iree_hal_sync_device_params_t* out_params) {
+ memset(out_params, 0, sizeof(*out_params));
+}
+
+static iree_status_t iree_hal_sync_device_check_params(
+ const iree_hal_sync_device_params_t* params) {
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_sync_device_create(
+ iree_string_view_t identifier, const iree_hal_sync_device_params_t* params,
+ iree_host_size_t loader_count, iree_hal_executable_loader_t** loaders,
+ iree_hal_allocator_t* device_allocator, iree_allocator_t host_allocator,
+ iree_hal_device_t** out_device) {
+ IREE_ASSERT_ARGUMENT(params);
+ IREE_ASSERT_ARGUMENT(!loader_count || loaders);
+ IREE_ASSERT_ARGUMENT(device_allocator);
+ IREE_ASSERT_ARGUMENT(out_device);
+ *out_device = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(z0,
+ iree_hal_sync_device_check_params(params));
+
+ iree_hal_sync_device_t* device = NULL;
+ iree_host_size_t struct_size =
+ sizeof(*device) + loader_count * sizeof(*device->loaders);
+ iree_host_size_t total_size = struct_size + identifier.size;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&device);
+ if (iree_status_is_ok(status)) {
+ memset(device, 0, total_size);
+ iree_hal_resource_initialize(&iree_hal_sync_device_vtable,
+ &device->resource);
+ iree_string_view_append_to_buffer(identifier, &device->identifier,
+ (char*)device + struct_size);
+ device->host_allocator = host_allocator;
+ device->device_allocator = device_allocator;
+ iree_hal_allocator_retain(device_allocator);
+
+ device->loader_count = loader_count;
+ for (iree_host_size_t i = 0; i < device->loader_count; ++i) {
+ device->loaders[i] = loaders[i];
+ iree_hal_executable_loader_retain(device->loaders[i]);
+ }
+
+ iree_hal_sync_semaphore_state_initialize(&device->semaphore_state);
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_device = (iree_hal_device_t*)device;
+ } else {
+ iree_hal_device_release((iree_hal_device_t*)device);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_sync_device_destroy(iree_hal_device_t* base_device) {
+ iree_hal_sync_device_t* device = iree_hal_sync_device_cast(base_device);
+ iree_allocator_t host_allocator = iree_hal_device_host_allocator(base_device);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_sync_semaphore_state_deinitialize(&device->semaphore_state);
+
+ for (iree_host_size_t i = 0; i < device->loader_count; ++i) {
+ iree_hal_executable_loader_release(device->loaders[i]);
+ }
+ iree_hal_allocator_release(device->device_allocator);
+ iree_allocator_free(host_allocator, device);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_string_view_t iree_hal_sync_device_id(
+ iree_hal_device_t* base_device) {
+ iree_hal_sync_device_t* device = iree_hal_sync_device_cast(base_device);
+ return device->identifier;
+}
+
+static iree_allocator_t iree_hal_sync_device_host_allocator(
+ iree_hal_device_t* base_device) {
+ iree_hal_sync_device_t* device = iree_hal_sync_device_cast(base_device);
+ return device->host_allocator;
+}
+
+static iree_hal_allocator_t* iree_hal_sync_device_allocator(
+ iree_hal_device_t* base_device) {
+ iree_hal_sync_device_t* device = iree_hal_sync_device_cast(base_device);
+ return device->device_allocator;
+}
+
+static iree_status_t iree_hal_sync_device_trim(iree_hal_device_t* base_device) {
+ iree_hal_sync_device_t* device = iree_hal_sync_device_cast(base_device);
+ return iree_hal_allocator_trim(device->device_allocator);
+}
+
+static iree_status_t iree_hal_sync_device_query_i32(
+ iree_hal_device_t* base_device, iree_string_view_t category,
+ iree_string_view_t key, int32_t* out_value) {
+ iree_hal_sync_device_t* device = iree_hal_sync_device_cast(base_device);
+ *out_value = 0;
+
+ if (iree_string_view_equal(category,
+ iree_make_cstring_view("hal.executable.format"))) {
+ *out_value =
+ iree_hal_query_any_executable_loader_support(
+ device->loader_count, device->loaders, /*caching_mode=*/0, key)
+ ? 1
+ : 0;
+ return iree_ok_status();
+ } else if (iree_string_view_equal(category,
+ iree_make_cstring_view("hal.device"))) {
+ if (iree_string_view_equal(key, iree_make_cstring_view("concurrency"))) {
+ *out_value = 1;
+ return iree_ok_status();
+ }
+ } else if (iree_string_view_equal(category,
+ iree_make_cstring_view("hal.dispatch"))) {
+ if (iree_string_view_equal(key, iree_make_cstring_view("concurrency"))) {
+ *out_value = 1;
+ return iree_ok_status();
+ }
+ }
+
+ return iree_make_status(
+ IREE_STATUS_NOT_FOUND,
+ "unknown device configuration key value '%.*s :: %.*s'",
+ (int)category.size, category.data, (int)key.size, key.data);
+}
+
+static iree_status_t iree_hal_sync_device_create_command_buffer(
+ iree_hal_device_t* base_device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ iree_hal_command_buffer_t** out_command_buffer) {
+ // TODO(#4680): implement a non-inline command buffer that stores its commands
+ // and can be submitted later on/multiple-times.
+ return iree_hal_inline_command_buffer_create(
+ base_device, mode, command_categories, queue_affinity,
+ iree_hal_device_host_allocator(base_device), out_command_buffer);
+}
+
+static iree_status_t iree_hal_sync_device_create_descriptor_set(
+ iree_hal_device_t* base_device,
+ iree_hal_descriptor_set_layout_t* set_layout,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings,
+ iree_hal_descriptor_set_t** out_descriptor_set) {
+ return iree_hal_local_descriptor_set_create(set_layout, binding_count,
+ bindings, out_descriptor_set);
+}
+
+static iree_status_t iree_hal_sync_device_create_descriptor_set_layout(
+ iree_hal_device_t* base_device,
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ iree_hal_descriptor_set_layout_t** out_descriptor_set_layout) {
+ return iree_hal_local_descriptor_set_layout_create(
+ usage_type, binding_count, bindings,
+ iree_hal_device_host_allocator(base_device), out_descriptor_set_layout);
+}
+
+static iree_status_t iree_hal_sync_device_create_event(
+ iree_hal_device_t* base_device, iree_hal_event_t** out_event) {
+ return iree_hal_sync_event_create(iree_hal_device_host_allocator(base_device),
+ out_event);
+}
+
+static iree_status_t iree_hal_sync_device_create_executable_cache(
+ iree_hal_device_t* base_device, iree_string_view_t identifier,
+ iree_loop_t loop, iree_hal_executable_cache_t** out_executable_cache) {
+ iree_hal_sync_device_t* device = iree_hal_sync_device_cast(base_device);
+ return iree_hal_local_executable_cache_create(
+ identifier, device->loader_count, device->loaders,
+ iree_hal_device_host_allocator(base_device), out_executable_cache);
+}
+
+static iree_status_t iree_hal_sync_device_create_executable_layout(
+ iree_hal_device_t* base_device, iree_host_size_t push_constants,
+ iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ iree_hal_executable_layout_t** out_executable_layout) {
+ return iree_hal_local_executable_layout_create(
+ push_constants, set_layout_count, set_layouts,
+ iree_hal_device_host_allocator(base_device), out_executable_layout);
+}
+
+static iree_status_t iree_hal_sync_device_create_semaphore(
+ iree_hal_device_t* base_device, uint64_t initial_value,
+ iree_hal_semaphore_t** out_semaphore) {
+ iree_hal_sync_device_t* device = iree_hal_sync_device_cast(base_device);
+ return iree_hal_sync_semaphore_create(&device->semaphore_state, initial_value,
+ device->host_allocator, out_semaphore);
+}
+
+static iree_status_t iree_hal_sync_device_queue_submit(
+ iree_hal_device_t* base_device,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches) {
+ iree_hal_sync_device_t* device = iree_hal_sync_device_cast(base_device);
+
+ // TODO(#4680): there is some better error handling here needed; we should
+ // propagate failures to all signal semaphores. Today we aren't as there
+ // shouldn't be any failures or if there are there's not much we'd be able to
+ // do - we already executed everything inline!
+
+ for (iree_host_size_t i = 0; i < batch_count; ++i) {
+ const iree_hal_submission_batch_t* batch = &batches[i];
+
+ // Wait for semaphores to be signaled before performing any work.
+ IREE_RETURN_IF_ERROR(iree_hal_sync_semaphore_multi_wait(
+ &device->semaphore_state, IREE_HAL_WAIT_MODE_ALL,
+ &batch->wait_semaphores, iree_infinite_timeout()));
+
+ // TODO(#4680): if we were doing deferred submissions we would issue them
+ // here. With only inline command buffers we have nothing to do here.
+
+ // Signal all semaphores now that batch work has completed.
+ IREE_RETURN_IF_ERROR(iree_hal_sync_semaphore_multi_signal(
+ &device->semaphore_state, &batch->signal_semaphores));
+ }
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_sync_device_submit_and_wait(
+ iree_hal_device_t* base_device,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches,
+ iree_hal_semaphore_t* wait_semaphore, uint64_t wait_value,
+ iree_timeout_t timeout) {
+ // Submit...
+ IREE_RETURN_IF_ERROR(iree_hal_sync_device_queue_submit(
+ base_device, command_categories, queue_affinity, batch_count, batches));
+
+ // ...and wait.
+ return iree_hal_semaphore_wait(wait_semaphore, wait_value, timeout);
+}
+
+static iree_status_t iree_hal_sync_device_wait_semaphores(
+ iree_hal_device_t* base_device, iree_hal_wait_mode_t wait_mode,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout) {
+ iree_hal_sync_device_t* device = iree_hal_sync_device_cast(base_device);
+ return iree_hal_sync_semaphore_multi_wait(&device->semaphore_state, wait_mode,
+ semaphore_list, timeout);
+}
+
+static iree_status_t iree_hal_sync_device_wait_idle(
+ iree_hal_device_t* base_device, iree_timeout_t timeout) {
+ // No-op (in intended usages). If we allowed multiple threads to call into
+ // the same device then we may want to change this to an atomic flag as to
+ // whether any thread is actively performing work.
+ return iree_ok_status();
+}
+
+static const iree_hal_device_vtable_t iree_hal_sync_device_vtable = {
+ .destroy = iree_hal_sync_device_destroy,
+ .id = iree_hal_sync_device_id,
+ .host_allocator = iree_hal_sync_device_host_allocator,
+ .device_allocator = iree_hal_sync_device_allocator,
+ .trim = iree_hal_sync_device_trim,
+ .query_i32 = iree_hal_sync_device_query_i32,
+ .create_command_buffer = iree_hal_sync_device_create_command_buffer,
+ .create_descriptor_set = iree_hal_sync_device_create_descriptor_set,
+ .create_descriptor_set_layout =
+ iree_hal_sync_device_create_descriptor_set_layout,
+ .create_event = iree_hal_sync_device_create_event,
+ .create_executable_cache = iree_hal_sync_device_create_executable_cache,
+ .create_executable_layout = iree_hal_sync_device_create_executable_layout,
+ .create_semaphore = iree_hal_sync_device_create_semaphore,
+ .transfer_range = iree_hal_device_transfer_mappable_range,
+ .queue_submit = iree_hal_sync_device_queue_submit,
+ .submit_and_wait = iree_hal_sync_device_submit_and_wait,
+ .wait_semaphores = iree_hal_sync_device_wait_semaphores,
+ .wait_idle = iree_hal_sync_device_wait_idle,
+};
diff --git a/runtime/src/iree/hal/local/sync_device.h b/runtime/src/iree/hal/local/sync_device.h
new file mode 100644
index 0000000..de990b7
--- /dev/null
+++ b/runtime/src/iree/hal/local/sync_device.h
@@ -0,0 +1,41 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_SYNC_DEVICE_H_
+#define IREE_HAL_LOCAL_SYNC_DEVICE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/executable_loader.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Parameters configuring an iree_hal_sync_device_t.
+// Must be initialized with iree_hal_sync_device_params_initialize prior to use.
+typedef struct iree_hal_sync_device_params_t {
+ int reserved;
+} iree_hal_sync_device_params_t;
+
+// Initializes |out_params| to default values.
+void iree_hal_sync_device_params_initialize(
+ iree_hal_sync_device_params_t* out_params);
+
+// Creates a new synchronous local CPU device that performs execution inline
+// on threads issuing submissions. |loaders| is the set of executable
+// loaders that are available for loading in the device context.
+iree_status_t iree_hal_sync_device_create(
+ iree_string_view_t identifier, const iree_hal_sync_device_params_t* params,
+ iree_host_size_t loader_count, iree_hal_executable_loader_t** loaders,
+ iree_hal_allocator_t* device_allocator, iree_allocator_t host_allocator,
+ iree_hal_device_t** out_device);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_SYNC_DEVICE_H_
diff --git a/runtime/src/iree/hal/local/sync_driver.c b/runtime/src/iree/hal/local/sync_driver.c
new file mode 100644
index 0000000..c8291f2
--- /dev/null
+++ b/runtime/src/iree/hal/local/sync_driver.c
@@ -0,0 +1,127 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/sync_driver.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+
+#define IREE_HAL_SYNC_DEVICE_ID_DEFAULT 0
+
+typedef struct iree_hal_sync_driver_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+ iree_hal_allocator_t* device_allocator;
+
+ iree_string_view_t identifier;
+ iree_hal_sync_device_params_t default_params;
+
+ iree_host_size_t loader_count;
+ iree_hal_executable_loader_t* loaders[];
+} iree_hal_sync_driver_t;
+
+static const iree_hal_driver_vtable_t iree_hal_sync_driver_vtable;
+
+static iree_hal_sync_driver_t* iree_hal_sync_driver_cast(
+ iree_hal_driver_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_sync_driver_vtable);
+ return (iree_hal_sync_driver_t*)base_value;
+}
+
+iree_status_t iree_hal_sync_driver_create(
+ iree_string_view_t identifier,
+ const iree_hal_sync_device_params_t* default_params,
+ iree_host_size_t loader_count, iree_hal_executable_loader_t** loaders,
+ iree_hal_allocator_t* device_allocator, iree_allocator_t host_allocator,
+ iree_hal_driver_t** out_driver) {
+ IREE_ASSERT_ARGUMENT(default_params);
+ IREE_ASSERT_ARGUMENT(!loader_count || loaders);
+ IREE_ASSERT_ARGUMENT(device_allocator);
+ IREE_ASSERT_ARGUMENT(out_driver);
+ *out_driver = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_sync_driver_t* driver = NULL;
+ iree_host_size_t total_size = sizeof(*driver) +
+ loader_count * sizeof(*driver->loaders) +
+ identifier.size;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&driver);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_sync_driver_vtable,
+ &driver->resource);
+ driver->host_allocator = host_allocator;
+ driver->device_allocator = device_allocator;
+ iree_hal_allocator_retain(device_allocator);
+
+ iree_string_view_append_to_buffer(
+ identifier, &driver->identifier,
+ (char*)driver + total_size - identifier.size);
+ memcpy(&driver->default_params, default_params,
+ sizeof(driver->default_params));
+
+ driver->loader_count = loader_count;
+ for (iree_host_size_t i = 0; i < driver->loader_count; ++i) {
+ driver->loaders[i] = loaders[i];
+ iree_hal_executable_loader_retain(driver->loaders[i]);
+ }
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_driver = (iree_hal_driver_t*)driver;
+ } else {
+ iree_hal_driver_release((iree_hal_driver_t*)driver);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_sync_driver_destroy(iree_hal_driver_t* base_driver) {
+ iree_hal_sync_driver_t* driver = iree_hal_sync_driver_cast(base_driver);
+ iree_allocator_t host_allocator = driver->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_allocator_release(driver->device_allocator);
+ for (iree_host_size_t i = 0; i < driver->loader_count; ++i) {
+ iree_hal_executable_loader_release(driver->loaders[i]);
+ }
+ iree_allocator_free(host_allocator, driver);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_sync_driver_query_available_devices(
+ iree_hal_driver_t* base_driver, iree_allocator_t allocator,
+ iree_hal_device_info_t** out_device_infos,
+ iree_host_size_t* out_device_info_count) {
+ static const iree_hal_device_info_t device_infos[1] = {
+ {
+ .device_id = IREE_HAL_SYNC_DEVICE_ID_DEFAULT,
+ .name = iree_string_view_literal("default"),
+ },
+ };
+ *out_device_info_count = IREE_ARRAYSIZE(device_infos);
+ return iree_allocator_clone(
+ allocator, iree_make_const_byte_span(device_infos, sizeof(device_infos)),
+ (void**)out_device_infos);
+}
+
+static iree_status_t iree_hal_sync_driver_create_device(
+ iree_hal_driver_t* base_driver, iree_hal_device_id_t device_id,
+ iree_allocator_t host_allocator, iree_hal_device_t** out_device) {
+ iree_hal_sync_driver_t* driver = iree_hal_sync_driver_cast(base_driver);
+ return iree_hal_sync_device_create(
+ driver->identifier, &driver->default_params, driver->loader_count,
+ driver->loaders, driver->device_allocator, host_allocator, out_device);
+}
+
+static const iree_hal_driver_vtable_t iree_hal_sync_driver_vtable = {
+ .destroy = iree_hal_sync_driver_destroy,
+ .query_available_devices = iree_hal_sync_driver_query_available_devices,
+ .create_device = iree_hal_sync_driver_create_device,
+};
diff --git a/runtime/src/iree/hal/local/sync_driver.h b/runtime/src/iree/hal/local/sync_driver.h
new file mode 100644
index 0000000..f4ff241
--- /dev/null
+++ b/runtime/src/iree/hal/local/sync_driver.h
@@ -0,0 +1,33 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_SYNC_DRIVER_H_
+#define IREE_HAL_LOCAL_SYNC_DRIVER_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/executable_loader.h"
+#include "iree/hal/local/sync_device.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a new synchronous local CPU driver that creates devices that perform
+// execution inline on threads issuing submissions. |loaders| is the set of
+// executable loaders that are available for loading in each device context.
+iree_status_t iree_hal_sync_driver_create(
+ iree_string_view_t identifier,
+ const iree_hal_sync_device_params_t* default_params,
+ iree_host_size_t loader_count, iree_hal_executable_loader_t** loaders,
+ iree_hal_allocator_t* device_allocator, iree_allocator_t host_allocator,
+ iree_hal_driver_t** out_driver);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_SYNC_DRIVER_H_
diff --git a/runtime/src/iree/hal/local/sync_event.c b/runtime/src/iree/hal/local/sync_event.c
new file mode 100644
index 0000000..47a32c4
--- /dev/null
+++ b/runtime/src/iree/hal/local/sync_event.c
@@ -0,0 +1,57 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/sync_event.h"
+
+#include <stddef.h>
+
+#include "iree/base/tracing.h"
+
+typedef struct iree_hal_sync_event_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+} iree_hal_sync_event_t;
+
+static const iree_hal_event_vtable_t iree_hal_sync_event_vtable;
+
+static iree_hal_sync_event_t* iree_hal_sync_event_cast(
+ iree_hal_event_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_sync_event_vtable);
+ return (iree_hal_sync_event_t*)base_value;
+}
+
+iree_status_t iree_hal_sync_event_create(iree_allocator_t host_allocator,
+ iree_hal_event_t** out_event) {
+ IREE_ASSERT_ARGUMENT(out_event);
+ *out_event = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_sync_event_t* event = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, sizeof(*event), (void**)&event);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_sync_event_vtable, &event->resource);
+ event->host_allocator = host_allocator;
+ *out_event = (iree_hal_event_t*)event;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_sync_event_destroy(iree_hal_event_t* base_event) {
+ iree_hal_sync_event_t* event = iree_hal_sync_event_cast(base_event);
+ iree_allocator_t host_allocator = event->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(host_allocator, event);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static const iree_hal_event_vtable_t iree_hal_sync_event_vtable = {
+ .destroy = iree_hal_sync_event_destroy,
+};
diff --git a/runtime/src/iree/hal/local/sync_event.h b/runtime/src/iree/hal/local/sync_event.h
new file mode 100644
index 0000000..38fb354
--- /dev/null
+++ b/runtime/src/iree/hal/local/sync_event.h
@@ -0,0 +1,24 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_SYNC_EVENT_H_
+#define IREE_HAL_LOCAL_SYNC_EVENT_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+iree_status_t iree_hal_sync_event_create(iree_allocator_t host_allocator,
+ iree_hal_event_t** out_event);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_SYNC_EVENT_H_
diff --git a/runtime/src/iree/hal/local/sync_semaphore.c b/runtime/src/iree/hal/local/sync_semaphore.c
new file mode 100644
index 0000000..ceb5319
--- /dev/null
+++ b/runtime/src/iree/hal/local/sync_semaphore.c
@@ -0,0 +1,409 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/sync_semaphore.h"
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+
+// Sentinel used the semaphore has failed and an error status is set.
+#define IREE_HAL_SYNC_SEMAPHORE_FAILURE_VALUE UINT64_MAX
+
+//===----------------------------------------------------------------------===//
+// iree_hal_sync_semaphore_state_t
+//===----------------------------------------------------------------------===//
+
+void iree_hal_sync_semaphore_state_initialize(
+ iree_hal_sync_semaphore_state_t* out_shared_state) {
+ memset(out_shared_state, 0, sizeof(*out_shared_state));
+ iree_notification_initialize(&out_shared_state->notification);
+}
+
+void iree_hal_sync_semaphore_state_deinitialize(
+ iree_hal_sync_semaphore_state_t* shared_state) {
+ iree_notification_deinitialize(&shared_state->notification);
+ memset(shared_state, 0, sizeof(*shared_state));
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_sync_semaphore_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_sync_semaphore_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+
+ // Shared across all semaphores.
+ iree_hal_sync_semaphore_state_t* shared_state;
+
+ // Guards all mutable fields. We expect low contention on semaphores and since
+ // iree_slim_mutex_t is (effectively) just a CAS this keeps things simpler
+ // than trying to make the entire structure lock-free.
+ iree_slim_mutex_t mutex;
+
+ // Current signaled value. May be IREE_HAL_SYNC_SEMAPHORE_FAILURE_VALUE to
+ // indicate that the semaphore has been signaled for failure and
+ // |failure_status| contains the error.
+ uint64_t current_value;
+
+ // OK or the status passed to iree_hal_semaphore_fail. Owned by the semaphore.
+ iree_status_t failure_status;
+} iree_hal_sync_semaphore_t;
+
+static const iree_hal_semaphore_vtable_t iree_hal_sync_semaphore_vtable;
+
+static iree_hal_sync_semaphore_t* iree_hal_sync_semaphore_cast(
+ iree_hal_semaphore_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_sync_semaphore_vtable);
+ return (iree_hal_sync_semaphore_t*)base_value;
+}
+
+iree_status_t iree_hal_sync_semaphore_create(
+ iree_hal_sync_semaphore_state_t* shared_state, uint64_t initial_value,
+ iree_allocator_t host_allocator, iree_hal_semaphore_t** out_semaphore) {
+ IREE_ASSERT_ARGUMENT(shared_state);
+ IREE_ASSERT_ARGUMENT(out_semaphore);
+ *out_semaphore = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_sync_semaphore_t* semaphore = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ host_allocator, sizeof(*semaphore), (void**)&semaphore);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_sync_semaphore_vtable,
+ &semaphore->resource);
+ semaphore->host_allocator = host_allocator;
+ semaphore->shared_state = shared_state;
+
+ iree_slim_mutex_initialize(&semaphore->mutex);
+ semaphore->current_value = initial_value;
+ semaphore->failure_status = iree_ok_status();
+
+ *out_semaphore = (iree_hal_semaphore_t*)semaphore;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_sync_semaphore_destroy(
+ iree_hal_semaphore_t* base_semaphore) {
+ iree_hal_sync_semaphore_t* semaphore =
+ iree_hal_sync_semaphore_cast(base_semaphore);
+ iree_allocator_t host_allocator = semaphore->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_free(semaphore->failure_status);
+ iree_slim_mutex_deinitialize(&semaphore->mutex);
+ iree_allocator_free(host_allocator, semaphore);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_sync_semaphore_query(
+ iree_hal_semaphore_t* base_semaphore, uint64_t* out_value) {
+ iree_hal_sync_semaphore_t* semaphore =
+ iree_hal_sync_semaphore_cast(base_semaphore);
+
+ iree_slim_mutex_lock(&semaphore->mutex);
+
+ *out_value = semaphore->current_value;
+
+ iree_status_t status = iree_ok_status();
+ if (*out_value >= IREE_HAL_SYNC_SEMAPHORE_FAILURE_VALUE) {
+ status = iree_status_clone(semaphore->failure_status);
+ }
+
+ iree_slim_mutex_unlock(&semaphore->mutex);
+
+ return status;
+}
+
+// Signals |semaphore| to |new_value| or returns an error if doing so would be
+// invalid. The semaphore mutex must be held.
+static iree_status_t iree_hal_sync_semaphore_signal_unsafe(
+ iree_hal_sync_semaphore_t* semaphore, uint64_t new_value) {
+ if (new_value <= semaphore->current_value) {
+ uint64_t current_value IREE_ATTRIBUTE_UNUSED = semaphore->current_value;
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "semaphore values must be monotonically "
+ "increasing; current_value=%" PRIu64
+ ", new_value=%" PRIu64,
+ current_value, new_value);
+ }
+
+ // Update to the new value.
+ semaphore->current_value = new_value;
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_sync_semaphore_signal(
+ iree_hal_semaphore_t* base_semaphore, uint64_t new_value) {
+ iree_hal_sync_semaphore_t* semaphore =
+ iree_hal_sync_semaphore_cast(base_semaphore);
+
+ iree_slim_mutex_lock(&semaphore->mutex);
+ iree_status_t status =
+ iree_hal_sync_semaphore_signal_unsafe(semaphore, new_value);
+ iree_slim_mutex_unlock(&semaphore->mutex);
+
+ if (iree_status_is_ok(status)) {
+ // Post a global notification so that any waiter will wake.
+ // TODO(#4680): make notifications per-semaphore; would make multi-wait
+ // impossible with iree_notification_t and we'd have to use wait handles.
+ iree_notification_post(&semaphore->shared_state->notification,
+ IREE_ALL_WAITERS);
+ }
+
+ return status;
+}
+
+static void iree_hal_sync_semaphore_fail(iree_hal_semaphore_t* base_semaphore,
+ iree_status_t status) {
+ iree_hal_sync_semaphore_t* semaphore =
+ iree_hal_sync_semaphore_cast(base_semaphore);
+
+ iree_slim_mutex_lock(&semaphore->mutex);
+
+ // Try to set our local status - we only preserve the first failure so only
+ // do this if we are going from a valid semaphore to a failed one.
+ if (!iree_status_is_ok(semaphore->failure_status)) {
+ // Previous status was not OK; drop our new status.
+ IREE_IGNORE_ERROR(status);
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ return;
+ }
+
+ // Signal to our failure sentinel value.
+ semaphore->current_value = IREE_HAL_SYNC_SEMAPHORE_FAILURE_VALUE;
+ semaphore->failure_status = status;
+
+ iree_slim_mutex_unlock(&semaphore->mutex);
+
+ iree_notification_post(&semaphore->shared_state->notification,
+ IREE_ALL_WAITERS);
+}
+
+iree_status_t iree_hal_sync_semaphore_multi_signal(
+ iree_hal_sync_semaphore_state_t* shared_state,
+ const iree_hal_semaphore_list_t* semaphore_list) {
+ // Try to signal all semaphores, stopping if we encounter any issues.
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < semaphore_list->count; ++i) {
+ iree_hal_sync_semaphore_t* semaphore =
+ iree_hal_sync_semaphore_cast(semaphore_list->semaphores[i]);
+ iree_slim_mutex_lock(&semaphore->mutex);
+ status = iree_hal_sync_semaphore_signal_unsafe(
+ semaphore, semaphore_list->payload_values[i]);
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ if (!iree_status_is_ok(status)) break;
+ }
+
+ // Notify all waiters that we've updated semaphores. They'll wake and check
+ // to see if they are satisfied.
+ // NOTE: we do this even if there was a failure as we may have signaled some
+ // of the list.
+ iree_notification_post(&shared_state->notification, IREE_ALL_WAITERS);
+
+ return status;
+}
+
+typedef struct iree_hal_sync_semaphore_notify_state_t {
+ iree_hal_sync_semaphore_t* semaphore;
+ uint64_t value;
+} iree_hal_sync_semaphore_notify_state_t;
+
+static bool iree_hal_sync_semaphore_is_signaled(
+ iree_hal_sync_semaphore_notify_state_t* state) {
+ iree_hal_sync_semaphore_t* semaphore = state->semaphore;
+ iree_slim_mutex_lock(&semaphore->mutex);
+ bool is_signaled = semaphore->current_value >= state->value ||
+ !iree_status_is_ok(semaphore->failure_status);
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ return is_signaled;
+}
+
+static iree_status_t iree_hal_sync_semaphore_wait(
+ iree_hal_semaphore_t* base_semaphore, uint64_t value,
+ iree_timeout_t timeout) {
+ iree_hal_sync_semaphore_t* semaphore =
+ iree_hal_sync_semaphore_cast(base_semaphore);
+
+ // Try to see if we can return immediately.
+ iree_slim_mutex_lock(&semaphore->mutex);
+ if (!iree_status_is_ok(semaphore->failure_status)) {
+ // Fastest path: failed; return an error to tell callers to query for it.
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ return iree_status_from_code(IREE_STATUS_ABORTED);
+ } else if (semaphore->current_value >= value) {
+ // Fast path: already satisfied.
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ return iree_ok_status();
+ } else if (iree_timeout_is_immediate(timeout)) {
+ // Not satisfied but a poll, so can avoid the expensive wait handle work.
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+ iree_slim_mutex_unlock(&semaphore->mutex);
+
+ // TODO(#4680): we should be checking for DEADLINE_EXCEEDED here. This is
+ // easy when it's iree_timeout_is_infinite (we can just use the notification
+ // as below) but if it's an actual deadline we'll need to probably switch to
+ // iree_wait_handle_t.
+
+ // Perform wait on the global notification. Will wait forever.
+ iree_hal_sync_semaphore_state_t* shared_state = semaphore->shared_state;
+ iree_hal_sync_semaphore_notify_state_t notify_state = {
+ .semaphore = semaphore,
+ .value = value,
+ };
+ iree_notification_await(
+ &shared_state->notification,
+ (iree_condition_fn_t)iree_hal_sync_semaphore_is_signaled,
+ (void*)¬ify_state, timeout);
+
+ iree_status_t status = iree_ok_status();
+ iree_slim_mutex_lock(&semaphore->mutex);
+ if (!iree_status_is_ok(semaphore->failure_status)) {
+ // Semaphore has failed.
+ status = iree_status_from_code(IREE_STATUS_ABORTED);
+ } else if (semaphore->current_value < value) {
+ // Deadline expired before the semaphore was signaled.
+ status = iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ return status;
+}
+
+// Returns true if any semaphore in the list has signaled (or failed).
+// Used with with iree_condition_fn_t and must match that signature.
+static bool iree_hal_sync_semaphore_any_signaled(
+ const iree_hal_semaphore_list_t* semaphore_list) {
+ for (iree_host_size_t i = 0; i < semaphore_list->count; ++i) {
+ iree_hal_sync_semaphore_t* semaphore =
+ iree_hal_sync_semaphore_cast(semaphore_list->semaphores[i]);
+ iree_slim_mutex_lock(&semaphore->mutex);
+ bool is_signaled =
+ semaphore->current_value >= semaphore_list->payload_values[i] ||
+ !iree_status_is_ok(semaphore->failure_status);
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ if (is_signaled) return true;
+ }
+ return false;
+}
+
+// Returns true if all semaphores in the list has signaled (or any failed).
+// Used with with iree_condition_fn_t and must match that signature.
+static bool iree_hal_sync_semaphore_all_signaled(
+ const iree_hal_semaphore_list_t* semaphore_list) {
+ for (iree_host_size_t i = 0; i < semaphore_list->count; ++i) {
+ iree_hal_sync_semaphore_t* semaphore =
+ iree_hal_sync_semaphore_cast(semaphore_list->semaphores[i]);
+ iree_slim_mutex_lock(&semaphore->mutex);
+ bool is_signaled =
+ semaphore->current_value >= semaphore_list->payload_values[i] ||
+ !iree_status_is_ok(semaphore->failure_status);
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ if (!is_signaled) return false;
+ }
+ return true;
+}
+
+// Returns a status derived from the |semaphore_list| at the current time:
+// - IREE_STATUS_OK: any or all semaphores signaled (based on |wait_mode|).
+// - IREE_STATUS_ABORTED: one or more semaphores failed.
+// - IREE_STATUS_DEADLINE_EXCEEDED: any or all semaphores unsignaled.
+static iree_status_t iree_hal_sync_semaphore_result_from_state(
+ iree_hal_wait_mode_t wait_mode,
+ const iree_hal_semaphore_list_t* semaphore_list) {
+ bool any_signaled = false;
+ bool all_signaled = true;
+ bool any_failed = false;
+ for (iree_host_size_t i = 0; i < semaphore_list->count; ++i) {
+ iree_hal_sync_semaphore_t* semaphore =
+ iree_hal_sync_semaphore_cast(semaphore_list->semaphores[i]);
+ iree_slim_mutex_lock(&semaphore->mutex);
+ if (!iree_status_is_ok(semaphore->failure_status)) {
+ // Semaphore has failed.
+ any_failed = true;
+ } else if (semaphore->current_value < semaphore_list->payload_values[i]) {
+ // Deadline expired before the semaphore was signaled.
+ all_signaled = false;
+ } else {
+ // Signaled!
+ any_signaled = true;
+ }
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ }
+ if (any_failed) {
+ // Always prioritize failure state.
+ return iree_status_from_code(IREE_STATUS_ABORTED);
+ }
+ switch (wait_mode) {
+ default:
+ case IREE_HAL_WAIT_MODE_ALL:
+ return all_signaled
+ ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ case IREE_HAL_WAIT_MODE_ANY:
+ return any_signaled
+ ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+}
+
+iree_status_t iree_hal_sync_semaphore_multi_wait(
+ iree_hal_sync_semaphore_state_t* shared_state,
+ iree_hal_wait_mode_t wait_mode,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout) {
+ IREE_ASSERT_ARGUMENT(semaphore_list);
+ if (semaphore_list->count == 0) {
+ return iree_ok_status();
+ } else if (semaphore_list->count == 1) {
+ // Fast-path for a single semaphore.
+ return iree_hal_semaphore_wait(semaphore_list->semaphores[0],
+ semaphore_list->payload_values[0], timeout);
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Fast-path for polling; we'll never wait and can just do a quick query.
+ if (iree_timeout_is_immediate(timeout)) {
+ iree_status_t status =
+ iree_hal_sync_semaphore_result_from_state(wait_mode, semaphore_list);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+
+ // Perform wait on the global notification.
+ iree_notification_await(
+ &shared_state->notification,
+ wait_mode == IREE_HAL_WAIT_MODE_ALL
+ ? (iree_condition_fn_t)iree_hal_sync_semaphore_all_signaled
+ : (iree_condition_fn_t)iree_hal_sync_semaphore_any_signaled,
+ (void*)semaphore_list, iree_infinite_timeout());
+
+ // We may have been successful - or may have a partial failure.
+ iree_status_t status =
+ iree_hal_sync_semaphore_result_from_state(wait_mode, semaphore_list);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static const iree_hal_semaphore_vtable_t iree_hal_sync_semaphore_vtable = {
+ .destroy = iree_hal_sync_semaphore_destroy,
+ .query = iree_hal_sync_semaphore_query,
+ .signal = iree_hal_sync_semaphore_signal,
+ .fail = iree_hal_sync_semaphore_fail,
+ .wait = iree_hal_sync_semaphore_wait,
+};
diff --git a/runtime/src/iree/hal/local/sync_semaphore.h b/runtime/src/iree/hal/local/sync_semaphore.h
new file mode 100644
index 0000000..ecc6be6
--- /dev/null
+++ b/runtime/src/iree/hal/local/sync_semaphore.h
@@ -0,0 +1,74 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_SYNC_SEMAPHORE_H_
+#define IREE_HAL_LOCAL_SYNC_SEMAPHORE_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_hal_sync_semaphore_state_t
+//===----------------------------------------------------------------------===//
+
+// State shared between all sync semaphores.
+// Owned by the device and guaranteed to remain valid for the lifetime of any
+// semaphore created from it.
+typedef struct iree_hal_sync_semaphore_state_t {
+ // In-process notification signaled when any semaphore value changes.
+ iree_notification_t notification;
+} iree_hal_sync_semaphore_state_t;
+
+// Initializes state used to perform semaphore synchronization.
+void iree_hal_sync_semaphore_state_initialize(
+ iree_hal_sync_semaphore_state_t* out_shared_state);
+
+// Deinitializes state used to perform semaphore synchronization; no semaphores
+// must be live with references.
+void iree_hal_sync_semaphore_state_deinitialize(
+ iree_hal_sync_semaphore_state_t* shared_state);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_sync_semaphore_t
+//===----------------------------------------------------------------------===//
+
+// Creates a semaphore that allows for ordering of operations on the local host.
+// Backed by a shared iree_notification_t in |shared_state|. Not efficient under
+// high contention or many simultaneous users but that's not what the
+// synchronous backend is intended for - if you want something efficient in the
+// face of hundreds or thousands of active asynchronous operations then use the
+// task system.
+iree_status_t iree_hal_sync_semaphore_create(
+ iree_hal_sync_semaphore_state_t* shared_state, uint64_t initial_value,
+ iree_allocator_t host_allocator, iree_hal_semaphore_t** out_semaphore);
+
+// Performs a signal of a list of semaphores.
+// The semaphores will transition to their new values (nearly) atomically and
+// batching up signals will reduce synchronization overhead.
+iree_status_t iree_hal_sync_semaphore_multi_signal(
+ iree_hal_sync_semaphore_state_t* shared_state,
+ const iree_hal_semaphore_list_t* semaphore_list);
+
+// Performs a multi-wait on one or more semaphores.
+// Returns IREE_STATUS_DEADLINE_EXCEEDED if the wait does not complete before
+// |timeout| elapses.
+iree_status_t iree_hal_sync_semaphore_multi_wait(
+ iree_hal_sync_semaphore_state_t* shared_state,
+ iree_hal_wait_mode_t wait_mode,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_SYNC_SEMAPHORE_H_
diff --git a/runtime/src/iree/hal/local/task_command_buffer.c b/runtime/src/iree/hal/local/task_command_buffer.c
new file mode 100644
index 0000000..0ed6533
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_command_buffer.c
@@ -0,0 +1,1023 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/task_command_buffer.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/executable_environment.h"
+#include "iree/hal/local/executable_library.h"
+#include "iree/hal/local/local_descriptor_set_layout.h"
+#include "iree/hal/local/local_executable.h"
+#include "iree/hal/local/local_executable_layout.h"
+#include "iree/hal/utils/resource_set.h"
+#include "iree/task/affinity_set.h"
+#include "iree/task/list.h"
+#include "iree/task/submission.h"
+#include "iree/task/task.h"
+
+//===----------------------------------------------------------------------===//
+// iree_hal_task_command_buffer_t
+//===----------------------------------------------------------------------===//
+
+// iree/task/-based command buffer.
+// We track a minimal amount of state here and incrementally build out the task
+// DAG that we can submit to the task system directly. There's no intermediate
+// data structures and we produce the iree_task_ts directly. In the steady state
+// all allocations are served from a shared per-device block pool with no
+// additional allocations required during recording or execution. That means our
+// command buffer here is essentially just a builder for the task system types
+// and manager of the lifetime of the tasks.
+typedef struct iree_hal_task_command_buffer_t {
+ iree_hal_command_buffer_t base;
+ iree_allocator_t host_allocator;
+
+ iree_task_scope_t* scope;
+
+ // Arena used for all allocations; references the shared device block pool.
+ iree_arena_allocator_t arena;
+
+ // Maintains a reference to all resources used within the command buffer.
+ // Reset on each begin.
+ iree_hal_resource_set_t* resource_set;
+
+ // One or more tasks at the root of the command buffer task DAG.
+ // These tasks are all able to execute concurrently and will be the initial
+ // ready task set in the submission.
+ iree_task_list_t root_tasks;
+
+ // One or more tasks at the leaves of the DAG.
+ // Only once all these tasks have completed execution will the command buffer
+ // be considered completed as a whole.
+ //
+ // An empty list indicates that root_tasks are also the leaves.
+ iree_task_list_t leaf_tasks;
+
+ // TODO(benvanik): move this out of the struct and allocate from the arena -
+ // we only need this during recording and it's ~4KB of waste otherwise.
+ // State tracked within the command buffer during recording only.
+ struct {
+ // The last global barrier that was inserted, if any.
+ // The barrier is allocated and inserted into the DAG when requested but the
+ // actual barrier dependency list is only allocated and set on flushes.
+ // This lets us allocate the appropriately sized barrier task list from the
+ // arena even though when the barrier is recorded we don't yet know what
+ // other tasks we'll be emitting as we walk the command stream.
+ iree_task_barrier_t* open_barrier;
+
+ // The number of tasks in the open barrier (|open_tasks|), used to quickly
+ // allocate storage for the task list without needing to walk the list.
+ iree_host_size_t open_task_count;
+
+ // All execution tasks emitted that must execute after |open_barrier|.
+ iree_task_list_t open_tasks;
+
+ // A flattened list of all available descriptor set bindings.
+ // As descriptor sets are pushed/bound the bindings will be updated to
+ // represent the fully-translated binding data pointer.
+ // TODO(benvanik): support proper mapping semantics and track the
+ // iree_hal_buffer_mapping_t and map/unmap where appropriate.
+ void* bindings[IREE_HAL_LOCAL_MAX_DESCRIPTOR_SET_COUNT *
+ IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT];
+ iree_device_size_t
+ binding_lengths[IREE_HAL_LOCAL_MAX_DESCRIPTOR_SET_COUNT *
+ IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT];
+
+ // All available push constants updated each time push_constants is called.
+ // Reset only with the command buffer and otherwise will maintain its values
+ // during recording to allow for partial push_constants updates.
+ uint32_t push_constants[IREE_HAL_LOCAL_MAX_PUSH_CONSTANT_COUNT];
+ } state;
+} iree_hal_task_command_buffer_t;
+
+static const iree_hal_command_buffer_vtable_t
+ iree_hal_task_command_buffer_vtable;
+
+static iree_hal_task_command_buffer_t* iree_hal_task_command_buffer_cast(
+ iree_hal_command_buffer_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_task_command_buffer_vtable);
+ return (iree_hal_task_command_buffer_t*)base_value;
+}
+
+iree_status_t iree_hal_task_command_buffer_create(
+ iree_hal_device_t* device, iree_task_scope_t* scope,
+ iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator,
+ iree_hal_command_buffer_t** out_command_buffer) {
+ IREE_ASSERT_ARGUMENT(out_command_buffer);
+ *out_command_buffer = NULL;
+ if (!iree_all_bits_set(mode, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT)) {
+ // If we want reuse we'd need to support duplicating the task DAG after
+ // recording or have some kind of copy-on-submit behavior that does so if
+ // a command buffer is submitted for execution twice. Allowing for the same
+ // command buffer to be enqueued multiple times would be fine so long as
+ // execution doesn't overlap (`cmdbuf|cmdbuf` vs
+ // `cmdbuf -> semaphore -> cmdbuf`) though we'd still need to be careful
+ // that we did the enqueuing and reset of the task structures at the right
+ // times. Definitely something that'll be useful in the future... but not
+ // today :)
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "only one-shot command buffer usage is supported");
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_task_command_buffer_t* command_buffer = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ host_allocator, sizeof(*command_buffer), (void**)&command_buffer);
+ if (iree_status_is_ok(status)) {
+ iree_hal_command_buffer_initialize(
+ device, mode, command_categories, queue_affinity,
+ &iree_hal_task_command_buffer_vtable, &command_buffer->base);
+ command_buffer->host_allocator = host_allocator;
+ command_buffer->scope = scope;
+ iree_arena_initialize(block_pool, &command_buffer->arena);
+ iree_task_list_initialize(&command_buffer->root_tasks);
+ iree_task_list_initialize(&command_buffer->leaf_tasks);
+ memset(&command_buffer->state, 0, sizeof(command_buffer->state));
+ status = iree_hal_resource_set_allocate(block_pool,
+ &command_buffer->resource_set);
+ }
+ if (iree_status_is_ok(status)) {
+ *out_command_buffer = &command_buffer->base;
+ } else {
+ iree_hal_command_buffer_release(&command_buffer->base);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_task_command_buffer_reset(
+ iree_hal_task_command_buffer_t* command_buffer) {
+ memset(&command_buffer->state, 0, sizeof(command_buffer->state));
+ iree_task_list_discard(&command_buffer->leaf_tasks);
+ iree_task_list_discard(&command_buffer->root_tasks);
+ iree_hal_resource_set_reset(command_buffer->resource_set);
+ iree_arena_reset(&command_buffer->arena);
+}
+
+static void iree_hal_task_command_buffer_destroy(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_task_command_buffer_cast(base_command_buffer);
+ iree_allocator_t host_allocator = command_buffer->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_task_command_buffer_reset(command_buffer);
+ iree_arena_deinitialize(&command_buffer->arena);
+ iree_hal_resource_set_free(command_buffer->resource_set);
+ iree_allocator_free(host_allocator, command_buffer);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+bool iree_hal_task_command_buffer_isa(
+ iree_hal_command_buffer_t* command_buffer) {
+ return iree_hal_command_buffer_dyn_cast(command_buffer,
+ &iree_hal_task_command_buffer_vtable);
+}
+
+static void* iree_hal_task_command_buffer_dyn_cast(
+ iree_hal_command_buffer_t* command_buffer, const void* vtable) {
+ if (vtable == &iree_hal_task_command_buffer_vtable) {
+ IREE_HAL_ASSERT_TYPE(command_buffer, vtable);
+ return command_buffer;
+ }
+ return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_task_command_buffer_t recording
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_task_command_buffer_flush_tasks(
+ iree_hal_task_command_buffer_t* command_buffer);
+
+static iree_status_t iree_hal_task_command_buffer_begin(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_task_command_buffer_cast(base_command_buffer);
+ iree_hal_task_command_buffer_reset(command_buffer);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_task_command_buffer_end(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_task_command_buffer_cast(base_command_buffer);
+
+ // Flush any open barriers.
+ IREE_RETURN_IF_ERROR(
+ iree_hal_task_command_buffer_flush_tasks(command_buffer));
+
+ // Move the tasks from the leaf list (tail) to the root list (head) if this
+ // was the first set of tasks recorded.
+ if (iree_task_list_is_empty(&command_buffer->root_tasks) &&
+ !iree_task_list_is_empty(&command_buffer->leaf_tasks)) {
+ iree_task_list_move(&command_buffer->leaf_tasks,
+ &command_buffer->root_tasks);
+ }
+
+ return iree_ok_status();
+}
+
+// Flushes all open tasks to the previous barrier and prepares for more
+// recording. The root tasks are also populated here when required as this is
+// the one place where we can see both halves of the most recent synchronization
+// event: those tasks recorded prior (if any) and the task that marks the set of
+// tasks that will be recorded after (if any).
+static iree_status_t iree_hal_task_command_buffer_flush_tasks(
+ iree_hal_task_command_buffer_t* command_buffer) {
+ iree_task_barrier_t* open_barrier = command_buffer->state.open_barrier;
+ if (open_barrier != NULL) {
+ // There is an open barrier we need to fixup the fork out to all of the open
+ // tasks that were recorded after it.
+ iree_task_t* task_head =
+ iree_task_list_front(&command_buffer->state.open_tasks);
+ iree_host_size_t dependent_task_count =
+ command_buffer->state.open_task_count;
+ if (dependent_task_count == 1) {
+ // Special-case: only one open task so we can avoid the additional barrier
+ // overhead by reusing the completion task.
+ iree_task_set_completion_task(&open_barrier->header, task_head);
+ } else if (dependent_task_count > 1) {
+ // Allocate the list of tasks we'll stash back on the previous barrier.
+ // Since we couldn't know at the time how many tasks would end up in the
+ // barrier we had to defer it until now.
+ iree_task_t** dependent_tasks = NULL;
+ IREE_RETURN_IF_ERROR(iree_arena_allocate(
+ &command_buffer->arena, dependent_task_count * sizeof(iree_task_t*),
+ (void**)&dependent_tasks));
+ iree_task_t* task = task_head;
+ for (iree_host_size_t i = 0; i < dependent_task_count; ++i) {
+ dependent_tasks[i] = task;
+ task = task->next_task;
+ }
+ iree_task_barrier_set_dependent_tasks(open_barrier, dependent_task_count,
+ dependent_tasks);
+ }
+ }
+ command_buffer->state.open_barrier = NULL;
+
+ // Move the open tasks to the tail as they represent the first half of the
+ // *next* barrier that will be inserted.
+ if (command_buffer->state.open_task_count > 0) {
+ iree_task_list_move(&command_buffer->state.open_tasks,
+ &command_buffer->leaf_tasks);
+ command_buffer->state.open_task_count = 0;
+ }
+
+ return iree_ok_status();
+}
+
+// Emits a global barrier, splitting execution into all prior recorded tasks
+// and all subsequent recorded tasks. This is currently the critical piece that
+// limits our concurrency: changing to fine-grained barriers (via barrier
+// buffers or events) will allow more work to overlap at the cost of more brain
+// to build out the proper task graph.
+static iree_status_t iree_hal_task_command_buffer_emit_global_barrier(
+ iree_hal_task_command_buffer_t* command_buffer) {
+ // Flush open tasks to the previous barrier. This resets our state such that
+ // we can assign the new open barrier and start recording tasks for it.
+ // Previous tasks will be moved into the leaf_tasks list.
+ IREE_RETURN_IF_ERROR(
+ iree_hal_task_command_buffer_flush_tasks(command_buffer));
+
+ // Allocate the new open barrier.
+ // As we are recording forward we can't yet assign the dependent tasks (the
+ // second half of the synchronization domain) and instead are just inserting
+ // it so we can setup the join from previous tasks (the first half of the
+ // synchronization domain).
+ iree_task_barrier_t* barrier = NULL;
+ IREE_RETURN_IF_ERROR(iree_arena_allocate(&command_buffer->arena,
+ sizeof(*barrier), (void**)&barrier));
+ iree_task_barrier_initialize_empty(command_buffer->scope, barrier);
+
+ // If there were previous tasks then join them to the barrier.
+ for (iree_task_t* task = iree_task_list_front(&command_buffer->leaf_tasks);
+ task != NULL; task = task->next_task) {
+ iree_task_set_completion_task(task, &barrier->header);
+ }
+
+ // Move the tasks from the leaf list (tail) to the root list (head) if this
+ // was the first set of tasks recorded.
+ if (iree_task_list_is_empty(&command_buffer->root_tasks) &&
+ !iree_task_list_is_empty(&command_buffer->leaf_tasks)) {
+ iree_task_list_move(&command_buffer->leaf_tasks,
+ &command_buffer->root_tasks);
+ }
+
+ // Reset the tail of the command buffer to the barrier. This leaves us in a
+ // consistent state if the recording ends immediate after this (the barrier
+ // will be the last task).
+ iree_task_list_initialize(&command_buffer->leaf_tasks);
+ iree_task_list_push_back(&command_buffer->leaf_tasks, &barrier->header);
+
+ // NOTE: all new tasks emitted will be executed after this barrier.
+ command_buffer->state.open_barrier = barrier;
+ command_buffer->state.open_task_count = 0;
+
+ return iree_ok_status();
+}
+
+// Emits a the given execution |task| into the current open synchronization
+// scope (after state.open_barrier and before the next barrier).
+static iree_status_t iree_hal_task_command_buffer_emit_execution_task(
+ iree_hal_task_command_buffer_t* command_buffer, iree_task_t* task) {
+ if (command_buffer->state.open_barrier == NULL) {
+ // If there is no open barrier then we are at the head and going right into
+ // the task DAG.
+ iree_task_list_push_back(&command_buffer->leaf_tasks, task);
+ } else {
+ // Append to the open task list that will be flushed to the open barrier.
+ iree_task_list_push_back(&command_buffer->state.open_tasks, task);
+ ++command_buffer->state.open_task_count;
+ }
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_task_command_buffer_t execution
+//===----------------------------------------------------------------------===//
+
+iree_status_t iree_hal_task_command_buffer_issue(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_task_queue_state_t* queue_state, iree_task_t* retire_task,
+ iree_arena_allocator_t* arena, iree_task_submission_t* pending_submission) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_command_buffer_dyn_cast(base_command_buffer,
+ &iree_hal_task_command_buffer_vtable);
+ IREE_ASSERT_TRUE(command_buffer);
+
+ // If the command buffer is empty (valid!) then we are a no-op.
+ bool has_root_tasks = !iree_task_list_is_empty(&command_buffer->root_tasks);
+ if (!has_root_tasks) {
+ return iree_ok_status();
+ }
+
+ bool has_leaf_tasks = !iree_task_list_is_empty(&command_buffer->leaf_tasks);
+ if (has_leaf_tasks) {
+ // Chain the retire task onto the leaf tasks as their completion indicates
+ // that all commands have completed.
+ for (iree_task_t* task = command_buffer->leaf_tasks.head; task != NULL;
+ task = task->next_task) {
+ iree_task_set_completion_task(task, retire_task);
+ }
+ } else {
+ // If we have no leaf tasks it means that this is a single layer DAG and
+ // after the root tasks complete the entire command buffer has completed.
+ for (iree_task_t* task = command_buffer->root_tasks.head; task != NULL;
+ task = task->next_task) {
+ iree_task_set_completion_task(task, retire_task);
+ }
+ }
+
+ // Enqueue all root tasks that are ready to run immediately.
+ // After this all of the command buffer tasks are owned by the submission and
+ // we need to ensure the command buffer doesn't try to discard them.
+ iree_task_submission_enqueue_list(pending_submission,
+ &command_buffer->root_tasks);
+ iree_task_list_initialize(&command_buffer->leaf_tasks);
+
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_task_command_buffer_t debug utilities
+//===----------------------------------------------------------------------===//
+
+static void iree_hal_task_command_buffer_begin_debug_group(
+ iree_hal_command_buffer_t* base_command_buffer, iree_string_view_t label,
+ iree_hal_label_color_t label_color,
+ const iree_hal_label_location_t* location) {
+ // TODO(benvanik): tracy event stack.
+}
+
+static void iree_hal_task_command_buffer_end_debug_group(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ // TODO(benvanik): tracy event stack.
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_execution_barrier
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_task_command_buffer_execution_barrier(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_hal_execution_barrier_flags_t flags,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_task_command_buffer_cast(base_command_buffer);
+
+ // TODO(benvanik): actual DAG construction. Right now we are just doing simple
+ // global barriers each time and forcing a join-fork point.
+ return iree_hal_task_command_buffer_emit_global_barrier(command_buffer);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_signal_event
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_task_command_buffer_signal_event(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ // TODO(#4518): implement events. For now we just insert global barriers.
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_reset_event
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_task_command_buffer_reset_event(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ // TODO(#4518): implement events. For now we just insert global barriers.
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_wait_events
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_task_command_buffer_wait_events(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_host_size_t event_count, const iree_hal_event_t** events,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_task_command_buffer_cast(base_command_buffer);
+ // TODO(#4518): implement events. For now we just insert global barriers.
+ return iree_hal_task_command_buffer_emit_global_barrier(command_buffer);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_discard_buffer
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_hal_task_command_buffer_discard_buffer(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_buffer_t* buffer) {
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_fill_buffer
+//===----------------------------------------------------------------------===//
+// NOTE: for large copies we dispatch this as tiles for parallelism.
+// We'd want to do some measurement for when it's worth it; filling a 200KB
+// buffer: maybe not, filling a 200MB buffer: yeah. For now we just do
+// arbitrarily sized chunks.
+
+// TODO(benvanik): make this a configurable setting. Must be aligned to pattern
+// length so pick a power of two.
+#define IREE_HAL_CMD_FILL_SLICE_LENGTH (128 * 1024)
+
+typedef struct iree_hal_cmd_fill_buffer_t {
+ iree_task_dispatch_t task;
+ iree_hal_buffer_t* target_buffer;
+ iree_device_size_t target_offset;
+ iree_device_size_t length;
+ uint32_t pattern_length;
+ uint8_t pattern[8];
+} iree_hal_cmd_fill_buffer_t;
+
+static iree_status_t iree_hal_cmd_fill_tile(
+ void* user_context, const iree_task_tile_context_t* tile_context,
+ iree_task_submission_t* pending_submission) {
+ const iree_hal_cmd_fill_buffer_t* cmd =
+ (const iree_hal_cmd_fill_buffer_t*)user_context;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ uint32_t length_per_slice = tile_context->workgroup_size[0];
+ iree_device_size_t slice_offset =
+ tile_context->workgroup_xyz[0] * length_per_slice;
+ iree_device_size_t remaining_length = cmd->length - slice_offset;
+ iree_device_size_t slice_length =
+ iree_min(length_per_slice, remaining_length);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, (uint64_t)slice_length);
+
+ iree_status_t status = iree_hal_buffer_map_fill(
+ cmd->target_buffer, cmd->target_offset + slice_offset, slice_length,
+ cmd->pattern, cmd->pattern_length);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static iree_status_t iree_hal_task_command_buffer_fill_buffer(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length, const void* pattern,
+ iree_host_size_t pattern_length) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_task_command_buffer_cast(base_command_buffer);
+
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &target_buffer));
+
+ iree_hal_cmd_fill_buffer_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_arena_allocate(&command_buffer->arena, sizeof(*cmd), (void**)&cmd));
+
+ const uint32_t workgroup_size[3] = {
+ /*x=*/IREE_HAL_CMD_FILL_SLICE_LENGTH,
+ /*y=*/1,
+ /*z=*/1,
+ };
+ const uint32_t workgroup_count[3] = {
+ /*x=*/length / workgroup_size[0] + 1,
+ /*y=*/1,
+ /*z=*/1,
+ };
+ iree_task_dispatch_initialize(
+ command_buffer->scope,
+ iree_task_make_dispatch_closure(iree_hal_cmd_fill_tile, (void*)cmd),
+ workgroup_size, workgroup_count, &cmd->task);
+ cmd->target_buffer = target_buffer;
+ cmd->target_offset = target_offset;
+ cmd->length = length;
+ memcpy(cmd->pattern, pattern, pattern_length);
+ cmd->pattern_length = pattern_length;
+
+ return iree_hal_task_command_buffer_emit_execution_task(command_buffer,
+ &cmd->task.header);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_update_buffer
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_update_buffer_t {
+ iree_task_call_t task;
+ iree_hal_buffer_t* target_buffer;
+ iree_device_size_t target_offset;
+ iree_device_size_t length;
+ uint8_t source_buffer[];
+} iree_hal_cmd_update_buffer_t;
+
+static iree_status_t iree_hal_cmd_update_buffer(
+ void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ const iree_hal_cmd_update_buffer_t* cmd =
+ (const iree_hal_cmd_update_buffer_t*)user_context;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = iree_hal_buffer_map_write(
+ cmd->target_buffer, cmd->target_offset, cmd->source_buffer, cmd->length);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static iree_status_t iree_hal_task_command_buffer_update_buffer(
+ iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
+ iree_host_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_task_command_buffer_cast(base_command_buffer);
+
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &target_buffer));
+
+ iree_host_size_t total_cmd_size =
+ sizeof(iree_hal_cmd_update_buffer_t) + length;
+
+ iree_hal_cmd_update_buffer_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_arena_allocate(&command_buffer->arena,
+ total_cmd_size, (void**)&cmd));
+
+ iree_task_call_initialize(
+ command_buffer->scope,
+ iree_task_make_call_closure(iree_hal_cmd_update_buffer, (void*)cmd),
+ &cmd->task);
+ cmd->target_buffer = target_buffer;
+ cmd->target_offset = target_offset;
+ cmd->length = length;
+
+ memcpy(cmd->source_buffer, (const uint8_t*)source_buffer + source_offset,
+ cmd->length);
+
+ return iree_hal_task_command_buffer_emit_execution_task(command_buffer,
+ &cmd->task.header);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_copy_buffer
+//===----------------------------------------------------------------------===//
+// NOTE: for large copies we dispatch this as tiles for parallelism.
+// We'd want to do some measurement for when it's worth it; copying a 200KB
+// buffer: maybe not, copying a 200MB buffer: yeah. For now we just do
+// arbitrarily sized chunks.
+
+// TODO(benvanik): make this a configurable setting. Must be aligned to pattern
+// length so pick a power of two.
+#define IREE_HAL_CMD_COPY_SLICE_LENGTH (128 * 1024)
+
+typedef struct iree_hal_cmd_copy_buffer_t {
+ iree_task_dispatch_t task;
+ iree_hal_buffer_t* source_buffer;
+ iree_device_size_t source_offset;
+ iree_hal_buffer_t* target_buffer;
+ iree_device_size_t target_offset;
+ iree_device_size_t length;
+} iree_hal_cmd_copy_buffer_t;
+
+static iree_status_t iree_hal_cmd_copy_tile(
+ void* user_context, const iree_task_tile_context_t* tile_context,
+ iree_task_submission_t* pending_submission) {
+ const iree_hal_cmd_copy_buffer_t* cmd =
+ (const iree_hal_cmd_copy_buffer_t*)user_context;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ uint32_t length_per_slice = tile_context->workgroup_size[0];
+ iree_device_size_t slice_offset =
+ tile_context->workgroup_xyz[0] * length_per_slice;
+ iree_device_size_t remaining_length = cmd->length - slice_offset;
+ iree_device_size_t slice_length =
+ iree_min(length_per_slice, remaining_length);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, (uint64_t)slice_length);
+
+ iree_status_t status = iree_hal_buffer_map_copy(
+ cmd->source_buffer, cmd->source_offset + slice_offset, cmd->target_buffer,
+ cmd->target_offset + slice_offset, slice_length);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static iree_status_t iree_hal_task_command_buffer_copy_buffer(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_task_command_buffer_cast(base_command_buffer);
+
+ const iree_hal_buffer_t* buffers[2] = {source_buffer, target_buffer};
+ IREE_RETURN_IF_ERROR(
+ iree_hal_resource_set_insert(command_buffer->resource_set, 2, buffers));
+
+ iree_hal_cmd_copy_buffer_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_arena_allocate(&command_buffer->arena, sizeof(*cmd), (void**)&cmd));
+
+ const uint32_t workgroup_size[3] = {
+ /*x=*/IREE_HAL_CMD_COPY_SLICE_LENGTH,
+ /*y=*/1,
+ /*z=*/1,
+ };
+ const uint32_t workgroup_count[3] = {
+ /*x=*/length / workgroup_size[0] + 1,
+ /*y=*/1,
+ /*z=*/1,
+ };
+ iree_task_dispatch_initialize(
+ command_buffer->scope,
+ iree_task_make_dispatch_closure(iree_hal_cmd_copy_tile, (void*)cmd),
+ workgroup_size, workgroup_count, &cmd->task);
+ cmd->source_buffer = source_buffer;
+ cmd->source_offset = source_offset;
+ cmd->target_buffer = target_buffer;
+ cmd->target_offset = target_offset;
+ cmd->length = length;
+
+ return iree_hal_task_command_buffer_emit_execution_task(command_buffer,
+ &cmd->task.header);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_push_constants
+//===----------------------------------------------------------------------===//
+// NOTE: command buffer state change only; enqueues no tasks.
+
+static iree_status_t iree_hal_task_command_buffer_push_constants(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, iree_host_size_t offset,
+ const void* values, iree_host_size_t values_length) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_task_command_buffer_cast(base_command_buffer);
+
+ if (IREE_UNLIKELY(offset + values_length >=
+ sizeof(command_buffer->state.push_constants))) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "push constant range %zu (length=%zu) out of range",
+ offset, values_length);
+ }
+
+ memcpy((uint8_t*)&command_buffer->state.push_constants + offset, values,
+ values_length);
+
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_push_descriptor_set
+//===----------------------------------------------------------------------===//
+// NOTE: command buffer state change only; enqueues no tasks.
+
+static iree_status_t iree_hal_task_command_buffer_push_descriptor_set(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_task_command_buffer_cast(base_command_buffer);
+
+ if (IREE_UNLIKELY(set >= IREE_HAL_LOCAL_MAX_DESCRIPTOR_SET_COUNT)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "set %u out of bounds", set);
+ }
+
+ iree_host_size_t binding_base =
+ set * IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT;
+ for (iree_host_size_t i = 0; i < binding_count; ++i) {
+ if (IREE_UNLIKELY(bindings[i].binding >=
+ IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "buffer binding index out of bounds");
+ }
+ iree_host_size_t binding_ordinal = binding_base + bindings[i].binding;
+
+ // TODO(benvanik): batch insert by getting the resources in their own list.
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &bindings[i].buffer));
+
+ // TODO(benvanik): track mapping so we can properly map/unmap/flush/etc.
+ iree_hal_buffer_mapping_t buffer_mapping = {{0}};
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range(
+ bindings[i].buffer, IREE_HAL_MAPPING_MODE_PERSISTENT,
+ IREE_HAL_MEMORY_ACCESS_ANY, bindings[i].offset, bindings[i].length,
+ &buffer_mapping));
+ command_buffer->state.bindings[binding_ordinal] =
+ buffer_mapping.contents.data;
+ command_buffer->state.binding_lengths[binding_ordinal] =
+ buffer_mapping.contents.data_length;
+ }
+
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_bind_descriptor_set
+//===----------------------------------------------------------------------===//
+// NOTE: command buffer state change only; enqueues no tasks.
+
+static iree_status_t iree_hal_task_command_buffer_bind_descriptor_set(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_hal_descriptor_set_t* descriptor_set,
+ iree_host_size_t dynamic_offset_count,
+ const iree_device_size_t* dynamic_offsets) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "descriptor set binding not yet implemented");
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_dispatch
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_dispatch_t {
+ iree_task_dispatch_t task;
+ iree_hal_local_executable_t* executable;
+ int32_t ordinal;
+
+ // Total number of available 4 byte push constant values in |push_constants|.
+ uint16_t push_constant_count;
+
+ // Total number of binding base pointers in |binding_ptrs| and
+ // |binding_lengths|. The set is packed densely based on which binidngs are
+ // used (known at compile-time).
+ uint16_t binding_count;
+
+ // Following this structure in memory there are 3 tables:
+ // - const uint32_t push_constants[push_constant_count];
+ // - void* binding_ptrs[binding_count];
+ // - const size_t binding_lengths[binding_count];
+} iree_hal_cmd_dispatch_t;
+
+static iree_status_t iree_hal_cmd_dispatch_tile(
+ void* user_context, const iree_task_tile_context_t* tile_context,
+ iree_task_submission_t* pending_submission) {
+ const iree_hal_cmd_dispatch_t* cmd =
+ (const iree_hal_cmd_dispatch_t*)user_context;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // We could share this across all workgroups in a dispatch and reduce cache
+ // pressure as all cores would be hitting the same hot read-only cache line.
+ // It'd grow the size of iree_hal_cmd_dispatch_t by a few dozen bytes, though,
+ // and so we'd need some profiling to see if it's worth it (fixed command
+ // buffer cost vs potential for saving a cache miss or two).
+ iree_alignas(64) iree_hal_executable_dispatch_state_v0_t dispatch_state = {
+ .workgroup_size_x = tile_context->workgroup_size[0],
+ .workgroup_size_y = tile_context->workgroup_size[1],
+ .workgroup_size_z = tile_context->workgroup_size[2],
+ .push_constant_count = cmd->push_constant_count,
+ .workgroup_count_x = tile_context->workgroup_count[0],
+ .workgroup_count_y = tile_context->workgroup_count[1],
+ .workgroup_count_z = tile_context->workgroup_count[2],
+ .max_concurrency =
+ iree_task_affinity_set_count_ones(cmd->task.header.affinity_set),
+ .binding_count = cmd->binding_count,
+ };
+ uint8_t* cmd_ptr = (uint8_t*)cmd + sizeof(*cmd);
+ dispatch_state.push_constants = (uint32_t*)cmd_ptr;
+ cmd_ptr += cmd->push_constant_count * sizeof(*dispatch_state.push_constants);
+ dispatch_state.binding_ptrs = (void**)cmd_ptr;
+ cmd_ptr += cmd->binding_count * sizeof(*dispatch_state.binding_ptrs);
+ dispatch_state.binding_lengths = (size_t*)cmd_ptr;
+ cmd_ptr += cmd->binding_count * sizeof(*dispatch_state.binding_lengths);
+
+ const iree_alignas(64)
+ iree_hal_executable_workgroup_state_v0_t workgroup_state = {
+ .workgroup_id_x = tile_context->workgroup_xyz[0],
+ .workgroup_id_y = tile_context->workgroup_xyz[1],
+ .workgroup_id_z = tile_context->workgroup_xyz[2],
+ .reserved = 0,
+ .processor_id = tile_context->processor_id,
+ .local_memory = tile_context->local_memory.data,
+ .local_memory_size = (size_t)tile_context->local_memory.data_length,
+ };
+ iree_status_t status = iree_hal_local_executable_issue_call(
+ cmd->executable, cmd->ordinal, &dispatch_state, &workgroup_state);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static iree_status_t iree_hal_task_command_buffer_build_dispatch(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z,
+ iree_hal_cmd_dispatch_t** out_cmd) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_task_command_buffer_cast(base_command_buffer);
+
+ iree_hal_local_executable_t* local_executable =
+ iree_hal_local_executable_cast(executable);
+ iree_hal_local_executable_layout_t* local_layout =
+ local_executable->executable_layouts[entry_point];
+ iree_host_size_t push_constant_count = local_layout->push_constants;
+ iree_hal_local_binding_mask_t used_binding_mask = local_layout->used_bindings;
+ iree_host_size_t used_binding_count =
+ iree_math_count_ones_u64(used_binding_mask);
+
+ // To save a few command buffer bytes we narrow these:
+ if (IREE_UNLIKELY(push_constant_count >= UINT16_MAX) ||
+ IREE_UNLIKELY(used_binding_count >= UINT16_MAX)) {
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "too many bindings/push constants");
+ }
+
+ iree_hal_cmd_dispatch_t* cmd = NULL;
+ iree_host_size_t total_cmd_size =
+ sizeof(*cmd) + push_constant_count * sizeof(uint32_t) +
+ used_binding_count * sizeof(void*) +
+ used_binding_count * sizeof(iree_device_size_t);
+ IREE_RETURN_IF_ERROR(iree_arena_allocate(&command_buffer->arena,
+ total_cmd_size, (void**)&cmd));
+
+ cmd->executable = local_executable;
+ cmd->ordinal = entry_point;
+ cmd->push_constant_count = push_constant_count;
+ cmd->binding_count = used_binding_count;
+
+ const uint32_t workgroup_count[3] = {workgroup_x, workgroup_y, workgroup_z};
+ // TODO(benvanik): expose on API or keep fixed on executable.
+ const uint32_t workgroup_size[3] = {1, 1, 1};
+ iree_task_dispatch_initialize(
+ command_buffer->scope,
+ iree_task_make_dispatch_closure(iree_hal_cmd_dispatch_tile, (void*)cmd),
+ workgroup_size, workgroup_count, &cmd->task);
+
+ // Tell the task system how much workgroup local memory is required for the
+ // dispatch; each invocation of the entry point will have at least as much
+ // scratch memory available during execution.
+ cmd->task.local_memory_size =
+ local_executable->dispatch_attrs
+ ? local_executable->dispatch_attrs[entry_point].local_memory_pages *
+ IREE_HAL_WORKGROUP_LOCAL_MEMORY_PAGE_SIZE
+ : 0;
+
+ // Copy only the push constant range used by the executable.
+ uint8_t* cmd_ptr = (uint8_t*)cmd + sizeof(*cmd);
+ uint32_t* push_constants = (uint32_t*)cmd_ptr;
+ memcpy(push_constants, command_buffer->state.push_constants,
+ push_constant_count * sizeof(*push_constants));
+ cmd_ptr += push_constant_count * sizeof(*push_constants);
+
+ // Produce the dense binding list based on the declared bindings used.
+ // This allows us to change the descriptor sets and bindings counts supported
+ // in the HAL independent of any executable as each executable just gets the
+ // flat dense list and doesn't care about our descriptor set stuff.
+ //
+ // Note that we are just directly setting the binding data pointers here with
+ // no ownership/retaining/etc - it's part of the HAL contract that buffers are
+ // kept valid for the duration they may be in use.
+ void** binding_ptrs = (void**)cmd_ptr;
+ cmd_ptr += used_binding_count * sizeof(*binding_ptrs);
+ size_t* binding_lengths = (size_t*)cmd_ptr;
+ cmd_ptr += used_binding_count * sizeof(*binding_lengths);
+ iree_host_size_t binding_base = 0;
+ for (iree_host_size_t i = 0; i < used_binding_count; ++i) {
+ int mask_offset = iree_math_count_trailing_zeros_u64(used_binding_mask);
+ int binding_ordinal = binding_base + mask_offset;
+ binding_base += mask_offset + 1;
+ used_binding_mask = iree_shr(used_binding_mask, mask_offset + 1);
+ binding_ptrs[i] = command_buffer->state.bindings[binding_ordinal];
+ binding_lengths[i] = command_buffer->state.binding_lengths[binding_ordinal];
+ if (!binding_ptrs[i]) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "(flat) binding %d is NULL", binding_ordinal);
+ }
+ }
+
+ *out_cmd = cmd;
+ return iree_hal_task_command_buffer_emit_execution_task(command_buffer,
+ &cmd->task.header);
+}
+
+static iree_status_t iree_hal_task_command_buffer_dispatch(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_task_command_buffer_cast(base_command_buffer);
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &executable));
+ iree_hal_cmd_dispatch_t* cmd = NULL;
+ return iree_hal_task_command_buffer_build_dispatch(
+ base_command_buffer, executable, entry_point, workgroup_x, workgroup_y,
+ workgroup_z, &cmd);
+}
+
+static iree_status_t iree_hal_task_command_buffer_dispatch_indirect(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ iree_hal_buffer_t* workgroups_buffer,
+ iree_device_size_t workgroups_offset) {
+ iree_hal_task_command_buffer_t* command_buffer =
+ iree_hal_task_command_buffer_cast(base_command_buffer);
+
+ const void* resources[2] = {executable, workgroups_buffer};
+ IREE_RETURN_IF_ERROR(
+ iree_hal_resource_set_insert(command_buffer->resource_set, 2, resources));
+
+ // TODO(benvanik): track mapping so we can properly map/unmap/flush/etc.
+ iree_hal_buffer_mapping_t buffer_mapping = {{0}};
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range(
+ workgroups_buffer, IREE_HAL_MAPPING_MODE_PERSISTENT,
+ IREE_HAL_MEMORY_ACCESS_READ, workgroups_offset, 3 * sizeof(uint32_t),
+ &buffer_mapping));
+
+ iree_hal_cmd_dispatch_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_task_command_buffer_build_dispatch(
+ base_command_buffer, executable, entry_point, 0, 0, 0, &cmd));
+ cmd->task.workgroup_count.ptr = (const uint32_t*)buffer_mapping.contents.data;
+ cmd->task.header.flags |= IREE_TASK_FLAG_DISPATCH_INDIRECT;
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_vtable_t
+//===----------------------------------------------------------------------===//
+
+static const iree_hal_command_buffer_vtable_t
+ iree_hal_task_command_buffer_vtable = {
+ .destroy = iree_hal_task_command_buffer_destroy,
+ .dyn_cast = iree_hal_task_command_buffer_dyn_cast,
+ .begin = iree_hal_task_command_buffer_begin,
+ .end = iree_hal_task_command_buffer_end,
+ .begin_debug_group = iree_hal_task_command_buffer_begin_debug_group,
+ .end_debug_group = iree_hal_task_command_buffer_end_debug_group,
+ .execution_barrier = iree_hal_task_command_buffer_execution_barrier,
+ .signal_event = iree_hal_task_command_buffer_signal_event,
+ .reset_event = iree_hal_task_command_buffer_reset_event,
+ .wait_events = iree_hal_task_command_buffer_wait_events,
+ .discard_buffer = iree_hal_task_command_buffer_discard_buffer,
+ .fill_buffer = iree_hal_task_command_buffer_fill_buffer,
+ .update_buffer = iree_hal_task_command_buffer_update_buffer,
+ .copy_buffer = iree_hal_task_command_buffer_copy_buffer,
+ .push_constants = iree_hal_task_command_buffer_push_constants,
+ .push_descriptor_set = iree_hal_task_command_buffer_push_descriptor_set,
+ .bind_descriptor_set = iree_hal_task_command_buffer_bind_descriptor_set,
+ .dispatch = iree_hal_task_command_buffer_dispatch,
+ .dispatch_indirect = iree_hal_task_command_buffer_dispatch_indirect,
+};
diff --git a/runtime/src/iree/hal/local/task_command_buffer.h b/runtime/src/iree/hal/local/task_command_buffer.h
new file mode 100644
index 0000000..5e18fbd
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_command_buffer.h
@@ -0,0 +1,58 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_TASK_COMMAND_BUFFER_H_
+#define IREE_HAL_LOCAL_TASK_COMMAND_BUFFER_H_
+
+#include "iree/base/api.h"
+#include "iree/base/internal/arena.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/task_queue_state.h"
+#include "iree/task/scope.h"
+#include "iree/task/task.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+iree_status_t iree_hal_task_command_buffer_create(
+ iree_hal_device_t* device, iree_task_scope_t* scope,
+ iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator,
+ iree_hal_command_buffer_t** out_command_buffer);
+
+// Returns true if |command_buffer| is a task system command buffer.
+bool iree_hal_task_command_buffer_isa(
+ iree_hal_command_buffer_t* command_buffer);
+
+// Issues a recorded command buffer using the serial |queue_state|.
+// |queue_state| is used to track the synchronization scope of the queue from
+// prior commands such as signaled events and will be mutated as events are
+// reset or new events are signaled.
+//
+// |retire_task| will be scheduled once all commands issued from the command
+// buffer retire and can be used as a fence point.
+//
+// Any new tasks that are allocated as part of the issue operation (such as
+// barrier tasks to handle event synchronization) will be acquired from |arena|.
+// The lifetime of |arena| must be at least that of |retire_task| ensuring that
+// all of the allocated commands issued have completed and their memory in the
+// arena can be recycled.
+//
+// |pending_submission| will receive the ready list of commands and must be
+// submitted to the executor (or discarded on failure) by the caller.
+iree_status_t iree_hal_task_command_buffer_issue(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_task_queue_state_t* queue_state, iree_task_t* retire_task,
+ iree_arena_allocator_t* arena, iree_task_submission_t* pending_submission);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_TASK_COMMAND_BUFFER_H_
diff --git a/runtime/src/iree/hal/local/task_device.c b/runtime/src/iree/hal/local/task_device.c
new file mode 100644
index 0000000..6170367
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_device.c
@@ -0,0 +1,377 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/task_device.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/internal/arena.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/local_descriptor_set.h"
+#include "iree/hal/local/local_descriptor_set_layout.h"
+#include "iree/hal/local/local_executable_cache.h"
+#include "iree/hal/local/local_executable_layout.h"
+#include "iree/hal/local/task_command_buffer.h"
+#include "iree/hal/local/task_event.h"
+#include "iree/hal/local/task_queue.h"
+#include "iree/hal/local/task_semaphore.h"
+#include "iree/hal/utils/buffer_transfer.h"
+
+typedef struct iree_hal_task_device_t {
+ iree_hal_resource_t resource;
+ iree_string_view_t identifier;
+
+ // Block pool used for small allocations like tasks and submissions.
+ iree_arena_block_pool_t small_block_pool;
+
+ // Block pool used for command buffers with a larger block size (as command
+ // buffers can contain inlined data uploads).
+ iree_arena_block_pool_t large_block_pool;
+
+ iree_task_executor_t* executor;
+
+ iree_host_size_t loader_count;
+ iree_hal_executable_loader_t** loaders;
+
+ iree_allocator_t host_allocator;
+ iree_hal_allocator_t* device_allocator;
+
+ iree_host_size_t queue_count;
+ iree_hal_task_queue_t queues[];
+} iree_hal_task_device_t;
+
+static const iree_hal_device_vtable_t iree_hal_task_device_vtable;
+
+static iree_hal_task_device_t* iree_hal_task_device_cast(
+ iree_hal_device_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_task_device_vtable);
+ return (iree_hal_task_device_t*)base_value;
+}
+
+void iree_hal_task_device_params_initialize(
+ iree_hal_task_device_params_t* out_params) {
+ out_params->arena_block_size = 32 * 1024;
+ out_params->queue_count = 8;
+}
+
+static iree_status_t iree_hal_task_device_check_params(
+ const iree_hal_task_device_params_t* params) {
+ if (params->arena_block_size < 4096) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "arena block size too small (< 4096 bytes)");
+ }
+ if (params->queue_count == 0) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "at least one queue is required");
+ }
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_task_device_create(
+ iree_string_view_t identifier, const iree_hal_task_device_params_t* params,
+ iree_task_executor_t* executor, iree_host_size_t loader_count,
+ iree_hal_executable_loader_t** loaders,
+ iree_hal_allocator_t* device_allocator, iree_allocator_t host_allocator,
+ iree_hal_device_t** out_device) {
+ IREE_ASSERT_ARGUMENT(params);
+ IREE_ASSERT_ARGUMENT(!loader_count || loaders);
+ IREE_ASSERT_ARGUMENT(device_allocator);
+ IREE_ASSERT_ARGUMENT(out_device);
+ *out_device = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(z0,
+ iree_hal_task_device_check_params(params));
+
+ iree_hal_task_device_t* device = NULL;
+ iree_host_size_t struct_size = sizeof(*device) +
+ params->queue_count * sizeof(*device->queues) +
+ loader_count * sizeof(*device->loaders);
+ iree_host_size_t total_size = struct_size + identifier.size;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&device);
+ if (iree_status_is_ok(status)) {
+ memset(device, 0, total_size);
+ iree_hal_resource_initialize(&iree_hal_task_device_vtable,
+ &device->resource);
+ iree_string_view_append_to_buffer(identifier, &device->identifier,
+ (char*)device + struct_size);
+ device->host_allocator = host_allocator;
+ device->device_allocator = device_allocator;
+ iree_hal_allocator_retain(device_allocator);
+
+ iree_arena_block_pool_initialize(4096, host_allocator,
+ &device->small_block_pool);
+ iree_arena_block_pool_initialize(params->arena_block_size, host_allocator,
+ &device->large_block_pool);
+
+ device->executor = executor;
+ iree_task_executor_retain(device->executor);
+
+ device->loader_count = loader_count;
+ device->loaders =
+ (iree_hal_executable_loader_t**)((uint8_t*)device + sizeof(*device) +
+ params->queue_count *
+ sizeof(*device->queues));
+ for (iree_host_size_t i = 0; i < device->loader_count; ++i) {
+ device->loaders[i] = loaders[i];
+ iree_hal_executable_loader_retain(device->loaders[i]);
+ }
+
+ device->queue_count = params->queue_count;
+ for (iree_host_size_t i = 0; i < device->queue_count; ++i) {
+ // TODO(benvanik): add a number to each queue ID.
+ iree_hal_task_queue_initialize(device->identifier, device->executor,
+ &device->small_block_pool,
+ &device->queues[i]);
+ }
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_device = (iree_hal_device_t*)device;
+ } else {
+ iree_hal_device_release((iree_hal_device_t*)device);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_task_device_destroy(iree_hal_device_t* base_device) {
+ iree_hal_task_device_t* device = iree_hal_task_device_cast(base_device);
+ iree_allocator_t host_allocator = iree_hal_device_host_allocator(base_device);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ for (iree_host_size_t i = 0; i < device->queue_count; ++i) {
+ iree_hal_task_queue_deinitialize(&device->queues[i]);
+ }
+ for (iree_host_size_t i = 0; i < device->loader_count; ++i) {
+ iree_hal_executable_loader_release(device->loaders[i]);
+ }
+ iree_task_executor_release(device->executor);
+ iree_arena_block_pool_deinitialize(&device->large_block_pool);
+ iree_arena_block_pool_deinitialize(&device->small_block_pool);
+ iree_hal_allocator_release(device->device_allocator);
+ iree_allocator_free(host_allocator, device);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_string_view_t iree_hal_task_device_id(
+ iree_hal_device_t* base_device) {
+ iree_hal_task_device_t* device = iree_hal_task_device_cast(base_device);
+ return device->identifier;
+}
+
+static iree_allocator_t iree_hal_task_device_host_allocator(
+ iree_hal_device_t* base_device) {
+ iree_hal_task_device_t* device = iree_hal_task_device_cast(base_device);
+ return device->host_allocator;
+}
+
+static iree_hal_allocator_t* iree_hal_task_device_allocator(
+ iree_hal_device_t* base_device) {
+ iree_hal_task_device_t* device = iree_hal_task_device_cast(base_device);
+ return device->device_allocator;
+}
+
+static iree_status_t iree_hal_task_device_trim(iree_hal_device_t* base_device) {
+ iree_hal_task_device_t* device = iree_hal_task_device_cast(base_device);
+ iree_arena_block_pool_trim(&device->small_block_pool);
+ iree_arena_block_pool_trim(&device->large_block_pool);
+ iree_task_executor_trim(device->executor);
+ return iree_hal_allocator_trim(device->device_allocator);
+}
+
+static iree_status_t iree_hal_task_device_query_i32(
+ iree_hal_device_t* base_device, iree_string_view_t category,
+ iree_string_view_t key, int32_t* out_value) {
+ iree_hal_task_device_t* device = iree_hal_task_device_cast(base_device);
+ *out_value = 0;
+
+ if (iree_string_view_equal(category,
+ iree_make_cstring_view("hal.executable.format"))) {
+ *out_value =
+ iree_hal_query_any_executable_loader_support(
+ device->loader_count, device->loaders, /*caching_mode=*/0, key)
+ ? 1
+ : 0;
+ return iree_ok_status();
+ } else if (iree_string_view_equal(category,
+ iree_make_cstring_view("hal.device"))) {
+ if (iree_string_view_equal(key, iree_make_cstring_view("concurrency"))) {
+ *out_value = (int32_t)device->queue_count;
+ return iree_ok_status();
+ }
+ } else if (iree_string_view_equal(category,
+ iree_make_cstring_view("hal.dispatch"))) {
+ if (iree_string_view_equal(key, iree_make_cstring_view("concurrency"))) {
+ *out_value = (int32_t)iree_task_executor_worker_count(device->executor);
+ return iree_ok_status();
+ }
+ }
+
+ return iree_make_status(
+ IREE_STATUS_NOT_FOUND,
+ "unknown device configuration key value '%.*s :: %.*s'",
+ (int)category.size, category.data, (int)key.size, key.data);
+}
+
+// Returns the queue index to submit work to based on the |queue_affinity|.
+//
+// If we wanted to have dedicated transfer queues we'd fork off based on
+// command_categories. For now all queues are general purpose.
+static iree_host_size_t iree_hal_task_device_select_queue(
+ iree_hal_task_device_t* device,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity) {
+ // TODO(benvanik): evaluate if we want to obscure this mapping a bit so that
+ // affinity really means "equivalent affinities map to equivalent queues" and
+ // not a specific queue index.
+ return queue_affinity % device->queue_count;
+}
+
+static iree_status_t iree_hal_task_device_create_command_buffer(
+ iree_hal_device_t* base_device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ iree_hal_command_buffer_t** out_command_buffer) {
+ iree_hal_task_device_t* device = iree_hal_task_device_cast(base_device);
+ iree_host_size_t queue_index = iree_hal_task_device_select_queue(
+ device, command_categories, queue_affinity);
+ return iree_hal_task_command_buffer_create(
+ base_device, &device->queues[queue_index].scope, mode, command_categories,
+ queue_affinity, &device->large_block_pool, device->host_allocator,
+ out_command_buffer);
+}
+
+static iree_status_t iree_hal_task_device_create_descriptor_set(
+ iree_hal_device_t* base_device,
+ iree_hal_descriptor_set_layout_t* set_layout,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings,
+ iree_hal_descriptor_set_t** out_descriptor_set) {
+ return iree_hal_local_descriptor_set_create(set_layout, binding_count,
+ bindings, out_descriptor_set);
+}
+
+static iree_status_t iree_hal_task_device_create_descriptor_set_layout(
+ iree_hal_device_t* base_device,
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ iree_hal_descriptor_set_layout_t** out_descriptor_set_layout) {
+ return iree_hal_local_descriptor_set_layout_create(
+ usage_type, binding_count, bindings,
+ iree_hal_device_host_allocator(base_device), out_descriptor_set_layout);
+}
+
+static iree_status_t iree_hal_task_device_create_event(
+ iree_hal_device_t* base_device, iree_hal_event_t** out_event) {
+ return iree_hal_task_event_create(iree_hal_device_host_allocator(base_device),
+ out_event);
+}
+
+static iree_status_t iree_hal_task_device_create_executable_cache(
+ iree_hal_device_t* base_device, iree_string_view_t identifier,
+ iree_loop_t loop, iree_hal_executable_cache_t** out_executable_cache) {
+ iree_hal_task_device_t* device = iree_hal_task_device_cast(base_device);
+ return iree_hal_local_executable_cache_create(
+ identifier, device->loader_count, device->loaders,
+ iree_hal_device_host_allocator(base_device), out_executable_cache);
+}
+
+static iree_status_t iree_hal_task_device_create_executable_layout(
+ iree_hal_device_t* base_device, iree_host_size_t push_constants,
+ iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ iree_hal_executable_layout_t** out_executable_layout) {
+ return iree_hal_local_executable_layout_create(
+ push_constants, set_layout_count, set_layouts,
+ iree_hal_device_host_allocator(base_device), out_executable_layout);
+}
+
+static iree_status_t iree_hal_task_device_create_semaphore(
+ iree_hal_device_t* base_device, uint64_t initial_value,
+ iree_hal_semaphore_t** out_semaphore) {
+ iree_hal_task_device_t* device = iree_hal_task_device_cast(base_device);
+ return iree_hal_task_semaphore_create(
+ iree_task_executor_event_pool(device->executor), initial_value,
+ device->host_allocator, out_semaphore);
+}
+
+static iree_status_t iree_hal_task_device_queue_submit(
+ iree_hal_device_t* base_device,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches) {
+ iree_hal_task_device_t* device = iree_hal_task_device_cast(base_device);
+ iree_host_size_t queue_index = iree_hal_task_device_select_queue(
+ device, command_categories, queue_affinity);
+ return iree_hal_task_queue_submit(&device->queues[queue_index], batch_count,
+ batches);
+}
+
+static iree_status_t iree_hal_task_device_submit_and_wait(
+ iree_hal_device_t* base_device,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches,
+ iree_hal_semaphore_t* wait_semaphore, uint64_t wait_value,
+ iree_timeout_t timeout) {
+ // Submit...
+ IREE_RETURN_IF_ERROR(iree_hal_task_device_queue_submit(
+ base_device, command_categories, queue_affinity, batch_count, batches));
+
+ // ...and wait.
+ return iree_hal_semaphore_wait(wait_semaphore, wait_value, timeout);
+}
+
+static iree_status_t iree_hal_task_device_wait_semaphores(
+ iree_hal_device_t* base_device, iree_hal_wait_mode_t wait_mode,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout) {
+ iree_hal_task_device_t* device = iree_hal_task_device_cast(base_device);
+ return iree_hal_task_semaphore_multi_wait(
+ wait_mode, semaphore_list, timeout,
+ iree_task_executor_event_pool(device->executor),
+ &device->large_block_pool);
+}
+
+static iree_status_t iree_hal_task_device_wait_idle(
+ iree_hal_device_t* base_device, iree_timeout_t timeout) {
+ iree_hal_task_device_t* device = iree_hal_task_device_cast(base_device);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < device->queue_count; ++i) {
+ status = iree_hal_task_queue_wait_idle(&device->queues[i], timeout);
+ if (!iree_status_is_ok(status)) break;
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static const iree_hal_device_vtable_t iree_hal_task_device_vtable = {
+ .destroy = iree_hal_task_device_destroy,
+ .id = iree_hal_task_device_id,
+ .host_allocator = iree_hal_task_device_host_allocator,
+ .device_allocator = iree_hal_task_device_allocator,
+ .trim = iree_hal_task_device_trim,
+ .query_i32 = iree_hal_task_device_query_i32,
+ .create_command_buffer = iree_hal_task_device_create_command_buffer,
+ .create_descriptor_set = iree_hal_task_device_create_descriptor_set,
+ .create_descriptor_set_layout =
+ iree_hal_task_device_create_descriptor_set_layout,
+ .create_event = iree_hal_task_device_create_event,
+ .create_executable_cache = iree_hal_task_device_create_executable_cache,
+ .create_executable_layout = iree_hal_task_device_create_executable_layout,
+ .create_semaphore = iree_hal_task_device_create_semaphore,
+ .transfer_range = iree_hal_device_transfer_mappable_range,
+ .queue_submit = iree_hal_task_device_queue_submit,
+ .submit_and_wait = iree_hal_task_device_submit_and_wait,
+ .wait_semaphores = iree_hal_task_device_wait_semaphores,
+ .wait_idle = iree_hal_task_device_wait_idle,
+};
diff --git a/runtime/src/iree/hal/local/task_device.h b/runtime/src/iree/hal/local/task_device.h
new file mode 100644
index 0000000..d43c1cf
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_device.h
@@ -0,0 +1,51 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_TASK_DEVICE_H_
+#define IREE_HAL_LOCAL_TASK_DEVICE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/executable_loader.h"
+#include "iree/task/executor.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Parameters configuring an iree_hal_task_device_t.
+// Must be initialized with iree_hal_task_device_params_initialize prior to use.
+typedef struct iree_hal_task_device_params_t {
+ // Number of queues exposed on the device.
+ // Each queue acts as a separate synchronization scope where all work executes
+ // concurrently unless prohibited by semaphores.
+ iree_host_size_t queue_count;
+
+ // Total size of each block in the device shared block pool.
+ // Larger sizes will lower overhead and ensure the heap isn't hit for
+ // transient allocations while also increasing memory consumption.
+ iree_host_size_t arena_block_size;
+} iree_hal_task_device_params_t;
+
+// Initializes |out_params| to default values.
+void iree_hal_task_device_params_initialize(
+ iree_hal_task_device_params_t* out_params);
+
+// Creates a new iree/task/-based local CPU device that uses |executor| for
+// scheduling tasks. |loaders| is the set of executable loaders that are
+// available for loading in the device context.
+iree_status_t iree_hal_task_device_create(
+ iree_string_view_t identifier, const iree_hal_task_device_params_t* params,
+ iree_task_executor_t* executor, iree_host_size_t loader_count,
+ iree_hal_executable_loader_t** loaders,
+ iree_hal_allocator_t* device_allocator, iree_allocator_t host_allocator,
+ iree_hal_device_t** out_device);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_TASK_DEVICE_H_
diff --git a/runtime/src/iree/hal/local/task_driver.c b/runtime/src/iree/hal/local/task_driver.c
new file mode 100644
index 0000000..49218c4
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_driver.c
@@ -0,0 +1,134 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/task_driver.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+
+#define IREE_HAL_TASK_DEVICE_ID_DEFAULT 0
+
+typedef struct iree_hal_task_driver_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+ iree_hal_allocator_t* device_allocator;
+
+ iree_string_view_t identifier;
+ iree_hal_task_device_params_t default_params;
+
+ iree_task_executor_t* executor;
+
+ iree_host_size_t loader_count;
+ iree_hal_executable_loader_t* loaders[];
+} iree_hal_task_driver_t;
+
+static const iree_hal_driver_vtable_t iree_hal_task_driver_vtable;
+
+static iree_hal_task_driver_t* iree_hal_task_driver_cast(
+ iree_hal_driver_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_task_driver_vtable);
+ return (iree_hal_task_driver_t*)base_value;
+}
+
+iree_status_t iree_hal_task_driver_create(
+ iree_string_view_t identifier,
+ const iree_hal_task_device_params_t* default_params,
+ iree_task_executor_t* executor, iree_host_size_t loader_count,
+ iree_hal_executable_loader_t** loaders,
+ iree_hal_allocator_t* device_allocator, iree_allocator_t host_allocator,
+ iree_hal_driver_t** out_driver) {
+ IREE_ASSERT_ARGUMENT(default_params);
+ IREE_ASSERT_ARGUMENT(!loader_count || loaders);
+ IREE_ASSERT_ARGUMENT(device_allocator);
+ IREE_ASSERT_ARGUMENT(out_driver);
+ *out_driver = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_task_driver_t* driver = NULL;
+ iree_host_size_t struct_size =
+ sizeof(*driver) + loader_count * sizeof(*driver->loaders);
+ iree_host_size_t total_size = struct_size + identifier.size;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&driver);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_task_driver_vtable,
+ &driver->resource);
+ driver->host_allocator = host_allocator;
+ driver->device_allocator = device_allocator;
+ iree_hal_allocator_retain(device_allocator);
+
+ iree_string_view_append_to_buffer(identifier, &driver->identifier,
+ (char*)driver + struct_size);
+ memcpy(&driver->default_params, default_params,
+ sizeof(driver->default_params));
+
+ driver->executor = executor;
+ iree_task_executor_retain(driver->executor);
+
+ driver->loader_count = loader_count;
+ for (iree_host_size_t i = 0; i < driver->loader_count; ++i) {
+ driver->loaders[i] = loaders[i];
+ iree_hal_executable_loader_retain(driver->loaders[i]);
+ }
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_driver = (iree_hal_driver_t*)driver;
+ } else {
+ iree_hal_driver_release((iree_hal_driver_t*)driver);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_task_driver_destroy(iree_hal_driver_t* base_driver) {
+ iree_hal_task_driver_t* driver = iree_hal_task_driver_cast(base_driver);
+ iree_allocator_t host_allocator = driver->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_allocator_release(driver->device_allocator);
+ for (iree_host_size_t i = 0; i < driver->loader_count; ++i) {
+ iree_hal_executable_loader_release(driver->loaders[i]);
+ }
+ iree_task_executor_release(driver->executor);
+ iree_allocator_free(host_allocator, driver);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_task_driver_query_available_devices(
+ iree_hal_driver_t* base_driver, iree_allocator_t allocator,
+ iree_hal_device_info_t** out_device_infos,
+ iree_host_size_t* out_device_info_count) {
+ static const iree_hal_device_info_t device_infos[1] = {
+ {
+ .device_id = IREE_HAL_TASK_DEVICE_ID_DEFAULT,
+ .name = iree_string_view_literal("default"),
+ },
+ };
+ *out_device_info_count = IREE_ARRAYSIZE(device_infos);
+ return iree_allocator_clone(
+ allocator, iree_make_const_byte_span(device_infos, sizeof(device_infos)),
+ (void**)out_device_infos);
+}
+
+static iree_status_t iree_hal_task_driver_create_device(
+ iree_hal_driver_t* base_driver, iree_hal_device_id_t device_id,
+ iree_allocator_t host_allocator, iree_hal_device_t** out_device) {
+ iree_hal_task_driver_t* driver = iree_hal_task_driver_cast(base_driver);
+ return iree_hal_task_device_create(
+ driver->identifier, &driver->default_params, driver->executor,
+ driver->loader_count, driver->loaders, driver->device_allocator,
+ host_allocator, out_device);
+}
+
+static const iree_hal_driver_vtable_t iree_hal_task_driver_vtable = {
+ .destroy = iree_hal_task_driver_destroy,
+ .query_available_devices = iree_hal_task_driver_query_available_devices,
+ .create_device = iree_hal_task_driver_create_device,
+};
diff --git a/runtime/src/iree/hal/local/task_driver.h b/runtime/src/iree/hal/local/task_driver.h
new file mode 100644
index 0000000..4c36d2a
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_driver.h
@@ -0,0 +1,35 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_TASK_DRIVER_H_
+#define IREE_HAL_LOCAL_TASK_DRIVER_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/executable_loader.h"
+#include "iree/hal/local/task_device.h"
+#include "iree/task/executor.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a new iree/task/-based local CPU driver that creates devices sharing
+// the same |executor| for scheduling tasks. |loaders| is the set of executable
+// loaders that are available for loading in each device context.
+iree_status_t iree_hal_task_driver_create(
+ iree_string_view_t identifier,
+ const iree_hal_task_device_params_t* default_params,
+ iree_task_executor_t* executor, iree_host_size_t loader_count,
+ iree_hal_executable_loader_t** loaders,
+ iree_hal_allocator_t* device_allocator, iree_allocator_t host_allocator,
+ iree_hal_driver_t** out_driver);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_TASK_DRIVER_H_
diff --git a/runtime/src/iree/hal/local/task_event.c b/runtime/src/iree/hal/local/task_event.c
new file mode 100644
index 0000000..ec806a6
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_event.c
@@ -0,0 +1,57 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/task_event.h"
+
+#include <stddef.h>
+
+#include "iree/base/tracing.h"
+
+typedef struct iree_hal_task_event_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+} iree_hal_task_event_t;
+
+static const iree_hal_event_vtable_t iree_hal_task_event_vtable;
+
+static iree_hal_task_event_t* iree_hal_task_event_cast(
+ iree_hal_event_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_task_event_vtable);
+ return (iree_hal_task_event_t*)base_value;
+}
+
+iree_status_t iree_hal_task_event_create(iree_allocator_t host_allocator,
+ iree_hal_event_t** out_event) {
+ IREE_ASSERT_ARGUMENT(out_event);
+ *out_event = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_task_event_t* event = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, sizeof(*event), (void**)&event);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_task_event_vtable, &event->resource);
+ event->host_allocator = host_allocator;
+ *out_event = (iree_hal_event_t*)event;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_task_event_destroy(iree_hal_event_t* base_event) {
+ iree_hal_task_event_t* event = iree_hal_task_event_cast(base_event);
+ iree_allocator_t host_allocator = event->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(host_allocator, event);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static const iree_hal_event_vtable_t iree_hal_task_event_vtable = {
+ .destroy = iree_hal_task_event_destroy,
+};
diff --git a/runtime/src/iree/hal/local/task_event.h b/runtime/src/iree/hal/local/task_event.h
new file mode 100644
index 0000000..91bbff7
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_event.h
@@ -0,0 +1,24 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_TASK_EVENT_H_
+#define IREE_HAL_LOCAL_TASK_EVENT_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+iree_status_t iree_hal_task_event_create(iree_allocator_t host_allocator,
+ iree_hal_event_t** out_event);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_TASK_EVENT_H_
diff --git a/runtime/src/iree/hal/local/task_queue.c b/runtime/src/iree/hal/local/task_queue.c
new file mode 100644
index 0000000..23fcb43
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_queue.c
@@ -0,0 +1,557 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/task_queue.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/local/task_command_buffer.h"
+#include "iree/hal/local/task_semaphore.h"
+#include "iree/task/submission.h"
+
+// Each submission is turned into a DAG for execution:
+//
+// +--------------------+ To preserve the sequential issue order an edge is
+// | (previous issue) | added between the previous outstanding issue (if
+// +--------------------+ it exists) such that all issues run in the order
+// | they were submitted to the queue. Note that this
+// v is *only* the issue; the commands issued by two
+// +--------------------+ submissions may still overlap and are only
+// | sequence barrier | guaranteed to begin execution in order.
+// +--------------------+
+// |
+// | +--------------+
+// +-> | +--------------+ Unsatisfied waits are scheduled as wait tasks and
+// . +-| sema waits | block the issuing of commands until all have
+// . +--------------+ been satisfied. If the wait is immediately
+// . | | | | | following a signal from the same queue then it
+// +--------+-+-+-+-+ elided - only cross-queue or external waits
+// | actually go down to system wait handles.
+// v
+// +--------------------+ Command buffers in the batch are issued in-order
+// | command issue | as if all commands had been recorded into the same
+// +--------------------+ command buffer (excluding recording state like
+// | push constants). The dependencies between commands
+// | +--------------+ are determined by the events and barriers recorded
+// +-> | +--------------+ in each command buffer.
+// . +-| commands |
+// . +--------------+
+// . | | | | |
+// +--------+-+-+-+-+
+// |
+// v
+// +--------------------+ After all commands within the batch complete the
+// | semaphore signals | submission is retired and all semaphores are
+// +--------------------+ signaled. Note that this may happen *before* other
+// | earlier submissions complete if there were no
+// ... dependencies between the commands in each batch.
+//
+// Could this be simplified? Probably. Improvements to the task system to allow
+// for efficient multiwaits and better stitching of independent DAGs would help.
+
+//===----------------------------------------------------------------------===//
+// Utilities
+//===----------------------------------------------------------------------===//
+
+// Clones a list of semaphores into an |arena| and initializes |out_target_list|
+// to reference the newly-cloned data.
+static iree_status_t iree_hal_semaphore_list_clone(
+ const iree_hal_semaphore_list_t* source_list, iree_arena_allocator_t* arena,
+ iree_hal_semaphore_list_t* out_target_list) {
+ iree_host_size_t semaphores_size =
+ source_list->count * sizeof(out_target_list->semaphores[0]);
+ iree_host_size_t payload_values_size =
+ source_list->count * sizeof(out_target_list->payload_values[0]);
+ iree_host_size_t total_size = semaphores_size + payload_values_size;
+ uint8_t* buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_arena_allocate(arena, total_size, (void**)&buffer));
+
+ out_target_list->count = source_list->count;
+ out_target_list->semaphores = (iree_hal_semaphore_t**)buffer;
+ out_target_list->payload_values = (uint64_t*)(buffer + semaphores_size);
+
+ for (iree_host_size_t i = 0; i < source_list->count; ++i) {
+ out_target_list->semaphores[i] = source_list->semaphores[i];
+ iree_hal_semaphore_retain(out_target_list->semaphores[i]);
+ out_target_list->payload_values[i] = source_list->payload_values[i];
+ }
+
+ return iree_ok_status();
+}
+
+static void iree_hal_semaphore_list_release(iree_hal_semaphore_list_t* list) {
+ for (iree_host_size_t i = 0; i < list->count; ++i) {
+ iree_hal_semaphore_release(list->semaphores[i]);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_task_queue_wait_cmd_t
+//===----------------------------------------------------------------------===//
+
+// Task to fork out and wait on one or more semaphores.
+// This optimizes for same-queue semaphore chaining by ensuring that semaphores
+// used to stitch together subsequent submissions never have to go to the system
+// to wait as the implicit queue ordering ensures that the signals would have
+// happened prior to the sequence command being executed. Cross-queue semaphores
+// will still cause waits if they have not yet been signaled.
+typedef struct iree_hal_task_queue_wait_cmd_t {
+ // Call to iree_hal_task_queue_wait_cmd.
+ iree_task_call_t task;
+
+ // Arena used for the submission - additional tasks can be allocated from
+ // this.
+ iree_arena_allocator_t* arena;
+
+ // A list of semaphores to wait on prior to issuing the rest of the
+ // submission.
+ iree_hal_semaphore_list_t wait_semaphores;
+} iree_hal_task_queue_wait_cmd_t;
+
+// Forks out multiple wait tasks prior to issuing the commands.
+static iree_status_t iree_hal_task_queue_wait_cmd(
+ void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ iree_hal_task_queue_wait_cmd_t* cmd = (iree_hal_task_queue_wait_cmd_t*)task;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < cmd->wait_semaphores.count; ++i) {
+ status = iree_hal_task_semaphore_enqueue_timepoint(
+ cmd->wait_semaphores.semaphores[i],
+ cmd->wait_semaphores.payload_values[i],
+ cmd->task.header.completion_task, cmd->arena, pending_submission);
+ if (IREE_UNLIKELY(!iree_status_is_ok(status))) break;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// Cleanup for iree_hal_task_queue_wait_cmd_t that releases the retained
+// semaphores.
+static void iree_hal_task_queue_wait_cmd_cleanup(
+ iree_task_t* task, iree_status_code_t status_code) {
+ iree_hal_task_queue_wait_cmd_t* cmd = (iree_hal_task_queue_wait_cmd_t*)task;
+ iree_hal_semaphore_list_release(&cmd->wait_semaphores);
+}
+
+// Allocates and initializes a iree_hal_task_queue_wait_cmd_t task.
+static iree_status_t iree_hal_task_queue_wait_cmd_allocate(
+ iree_task_scope_t* scope, const iree_hal_semaphore_list_t* wait_semaphores,
+ iree_arena_allocator_t* arena, iree_hal_task_queue_wait_cmd_t** out_cmd) {
+ iree_hal_task_queue_wait_cmd_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_arena_allocate(arena, sizeof(*cmd), (void**)&cmd));
+ iree_task_call_initialize(
+ scope, iree_task_make_call_closure(iree_hal_task_queue_wait_cmd, 0),
+ &cmd->task);
+ iree_task_set_cleanup_fn(&cmd->task.header,
+ iree_hal_task_queue_wait_cmd_cleanup);
+ cmd->arena = arena;
+
+ // Clone the wait semaphores from the batch - we retain them and their
+ // payloads.
+ IREE_RETURN_IF_ERROR(iree_hal_semaphore_list_clone(wait_semaphores, arena,
+ &cmd->wait_semaphores));
+
+ *out_cmd = cmd;
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_task_queue_issue_cmd_t
+//===----------------------------------------------------------------------===//
+
+// Task to issue all the command buffers in the batch.
+// After this task completes the commands have been issued but have not yet
+// completed and the issued commands may complete in any order.
+typedef struct iree_hal_task_queue_issue_cmd_t {
+ // Call to iree_hal_task_queue_issue_cmd.
+ iree_task_call_t task;
+
+ // Arena used for the submission - additional tasks can be allocated from
+ // this.
+ iree_arena_allocator_t* arena;
+
+ // Nasty back reference to the queue so that we can clear the tail_issue_task
+ // if we are the last issue pending.
+ iree_hal_task_queue_t* queue;
+
+ // Command buffers to be issued in the order the appeared in the submission.
+ iree_host_size_t command_buffer_count;
+ iree_hal_command_buffer_t* command_buffers[];
+} iree_hal_task_queue_issue_cmd_t;
+
+// Issues a set of command buffers without waiting for them to complete.
+static iree_status_t iree_hal_task_queue_issue_cmd(
+ void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ iree_hal_task_queue_issue_cmd_t* cmd = (iree_hal_task_queue_issue_cmd_t*)task;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status = iree_ok_status();
+
+ // NOTE: it's ok for there to be no command buffers - in that case the
+ // submission was purely for synchronization.
+ if (cmd->command_buffer_count > 0) {
+ for (iree_host_size_t i = 0; i < cmd->command_buffer_count; ++i) {
+ if (iree_hal_task_command_buffer_isa(cmd->command_buffers[i])) {
+ status = iree_hal_task_command_buffer_issue(
+ cmd->command_buffers[i], &cmd->queue->state,
+ cmd->task.header.completion_task, cmd->arena, pending_submission);
+ } else {
+ status = iree_make_status(
+ IREE_STATUS_UNIMPLEMENTED,
+ "unsupported command buffer type for task queue submission");
+ }
+ if (IREE_UNLIKELY(!iree_status_is_ok(status))) break;
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// Cleanup for iree_hal_task_queue_issue_cmd_t that resets the queue state
+// tracking the last in-flight issue.
+static void iree_hal_task_queue_issue_cmd_cleanup(
+ iree_task_t* task, iree_status_code_t status_code) {
+ iree_hal_task_queue_issue_cmd_t* cmd = (iree_hal_task_queue_issue_cmd_t*)task;
+
+ // Reset queue tail issue task if it was us.
+ iree_slim_mutex_lock(&cmd->queue->mutex);
+ if (cmd->queue->tail_issue_task == task) {
+ cmd->queue->tail_issue_task = NULL;
+ }
+ iree_slim_mutex_unlock(&cmd->queue->mutex);
+}
+
+// Allocates and initializes a iree_hal_task_queue_issue_cmd_t task.
+static iree_status_t iree_hal_task_queue_issue_cmd_allocate(
+ iree_task_scope_t* scope, iree_hal_task_queue_t* queue,
+ iree_task_t* retire_task, iree_host_size_t command_buffer_count,
+ iree_hal_command_buffer_t** const command_buffers,
+ iree_arena_allocator_t* arena, iree_hal_task_queue_issue_cmd_t** out_cmd) {
+ iree_hal_task_queue_issue_cmd_t* cmd = NULL;
+ iree_host_size_t total_cmd_size =
+ sizeof(*cmd) + command_buffer_count * sizeof(*cmd->command_buffers);
+ IREE_RETURN_IF_ERROR(
+ iree_arena_allocate(arena, total_cmd_size, (void**)&cmd));
+ iree_task_call_initialize(
+ scope, iree_task_make_call_closure(iree_hal_task_queue_issue_cmd, 0),
+ &cmd->task);
+ iree_task_set_completion_task(&cmd->task.header, retire_task);
+ iree_task_set_cleanup_fn(&cmd->task.header,
+ iree_hal_task_queue_issue_cmd_cleanup);
+ cmd->arena = arena;
+ cmd->queue = queue;
+
+ cmd->command_buffer_count = command_buffer_count;
+ memcpy(cmd->command_buffers, command_buffers,
+ cmd->command_buffer_count * sizeof(*cmd->command_buffers));
+
+ *out_cmd = cmd;
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_task_queue_retire_cmd_t
+//===----------------------------------------------------------------------===//
+
+// Task to retire the submission and free the transient memory allocated for
+// it. The task is issued only once all commands from all command buffers in
+// the submission complete. Semaphores will be signaled and dependent
+// submissions may be issued.
+typedef struct iree_hal_task_queue_retire_cmd_t {
+ // Call to iree_hal_task_queue_retire_cmd.
+ iree_task_call_t task;
+
+ // Original arena used for all transient allocations required for the
+ // submission. All queue-related commands are allocated from this, **including
+ // this retire command**.
+ iree_arena_allocator_t arena;
+
+ // A list of semaphores to signal upon retiring.
+ iree_hal_semaphore_list_t signal_semaphores;
+} iree_hal_task_queue_retire_cmd_t;
+
+// Retires a submission by signaling semaphores to their desired value and
+// disposing of the temporary arena memory used for the submission.
+static iree_status_t iree_hal_task_queue_retire_cmd(
+ void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ iree_hal_task_queue_retire_cmd_t* cmd =
+ (iree_hal_task_queue_retire_cmd_t*)task;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Signal all semaphores to their new values.
+ // Note that if any signal fails then the whole command will fail and all
+ // semaphores will be signaled to the failure state.
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < cmd->signal_semaphores.count; ++i) {
+ status =
+ iree_hal_semaphore_signal(cmd->signal_semaphores.semaphores[i],
+ cmd->signal_semaphores.payload_values[i]);
+ if (IREE_UNLIKELY(!iree_status_is_ok(status))) break;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// Cleanup for iree_hal_task_queue_retire_cmd_t that ensures that the arena
+// holding the submission is properly disposed and that semaphores are signaled
+// (or signaled to failure if the command failed).
+static void iree_hal_task_queue_retire_cmd_cleanup(
+ iree_task_t* task, iree_status_code_t status_code) {
+ iree_hal_task_queue_retire_cmd_t* cmd =
+ (iree_hal_task_queue_retire_cmd_t*)task;
+
+ // If the command failed then fail all semaphores to ensure future
+ // submissions fail as well (including those on other queues).
+ if (IREE_UNLIKELY(status_code != IREE_STATUS_OK)) {
+ for (iree_host_size_t i = 0; i < cmd->signal_semaphores.count; ++i) {
+ iree_hal_semaphore_fail(cmd->signal_semaphores.semaphores[i],
+ iree_status_from_code(status_code));
+ }
+ }
+
+ // Release all semaphores.
+ iree_hal_semaphore_list_release(&cmd->signal_semaphores);
+
+ // Drop all memory used by the submission (**including cmd**).
+ iree_arena_allocator_t arena = cmd->arena;
+ cmd = NULL;
+ iree_arena_deinitialize(&arena);
+}
+
+// Allocates and initializes a iree_hal_task_queue_retire_cmd_t task.
+// The command will own an arena that can be used for other submission-related
+// allocations.
+static iree_status_t iree_hal_task_queue_retire_cmd_allocate(
+ iree_task_scope_t* scope,
+ const iree_hal_semaphore_list_t* signal_semaphores,
+ iree_arena_block_pool_t* block_pool,
+ iree_hal_task_queue_retire_cmd_t** out_cmd) {
+ // Make an arena we'll use for allocating the command itself.
+ iree_arena_allocator_t arena;
+ iree_arena_initialize(block_pool, &arena);
+
+ // Allocate the command from the arena.
+ iree_hal_task_queue_retire_cmd_t* cmd = NULL;
+ iree_status_t status =
+ iree_arena_allocate(&arena, sizeof(*cmd), (void**)&cmd);
+ if (iree_status_is_ok(status)) {
+ iree_task_call_initialize(
+ scope, iree_task_make_call_closure(iree_hal_task_queue_retire_cmd, 0),
+ &cmd->task);
+ iree_task_set_cleanup_fn(&cmd->task.header,
+ iree_hal_task_queue_retire_cmd_cleanup);
+ }
+
+ // Clone the signal semaphores from the batch - we retain them and their
+ // payloads.
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_semaphore_list_clone(signal_semaphores, &arena,
+ &cmd->signal_semaphores);
+ }
+
+ if (iree_status_is_ok(status)) {
+ // Transfer ownership of the arena to command.
+ memcpy(&cmd->arena, &arena, sizeof(cmd->arena));
+ *out_cmd = cmd;
+ } else {
+ iree_arena_deinitialize(&arena);
+ }
+ return status;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_task_queue_t
+//===----------------------------------------------------------------------===//
+
+void iree_hal_task_queue_initialize(iree_string_view_t identifier,
+ iree_task_executor_t* executor,
+ iree_arena_block_pool_t* block_pool,
+ iree_hal_task_queue_t* out_queue) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, identifier.data, identifier.size);
+
+ memset(out_queue, 0, sizeof(*out_queue));
+
+ out_queue->executor = executor;
+ iree_task_executor_retain(out_queue->executor);
+ out_queue->block_pool = block_pool;
+
+ iree_task_scope_initialize(identifier, &out_queue->scope);
+
+ iree_slim_mutex_initialize(&out_queue->mutex);
+ iree_hal_task_queue_state_initialize(&out_queue->state);
+ out_queue->tail_issue_task = NULL;
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_hal_task_queue_deinitialize(iree_hal_task_queue_t* queue) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_ignore(
+ iree_task_scope_wait_idle(&queue->scope, IREE_TIME_INFINITE_FUTURE));
+
+ iree_slim_mutex_lock(&queue->mutex);
+ IREE_ASSERT(!queue->tail_issue_task);
+ iree_slim_mutex_unlock(&queue->mutex);
+
+ iree_hal_task_queue_state_deinitialize(&queue->state);
+ iree_slim_mutex_deinitialize(&queue->mutex);
+ iree_task_scope_deinitialize(&queue->scope);
+ iree_task_executor_release(queue->executor);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_task_queue_submit_batch(
+ iree_hal_task_queue_t* queue, const iree_hal_submission_batch_t* batch) {
+ // Task to retire the submission and free the transient memory allocated for
+ // it (including the command itself). We allocate this first so it can get an
+ // arena which we will use to allocate all other commands.
+ iree_hal_task_queue_retire_cmd_t* retire_cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_task_queue_retire_cmd_allocate(
+ &queue->scope, &batch->signal_semaphores, queue->block_pool,
+ &retire_cmd));
+
+ // NOTE: if we fail from here on we must drop the retire_cmd arena.
+ iree_status_t status = iree_ok_status();
+
+ // A fence we'll use to detect when the entire submission has completed.
+ // TODO(benvanik): fold into the retire command.
+ iree_task_fence_t* fence = NULL;
+ status =
+ iree_task_executor_acquire_fence(queue->executor, &queue->scope, &fence);
+ iree_task_set_completion_task(&retire_cmd->task.header, &fence->header);
+
+ // Task to fork and wait for unsatisfied semaphore dependencies.
+ // This is optional and only required if we have previous submissions still
+ // in-flight - if the queue is empty then we can directly schedule the waits.
+ iree_hal_task_queue_wait_cmd_t* wait_cmd = NULL;
+ if (iree_status_is_ok(status) && batch->wait_semaphores.count > 0) {
+ status = iree_hal_task_queue_wait_cmd_allocate(
+ &queue->scope, &batch->wait_semaphores, &retire_cmd->arena, &wait_cmd);
+ }
+
+ // Task to issue all the command buffers in the batch.
+ // After this task completes the commands have been issued but have not yet
+ // completed and the issued commands may complete in any order.
+ iree_hal_task_queue_issue_cmd_t* issue_cmd = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_task_queue_issue_cmd_allocate(
+ &queue->scope, queue, &retire_cmd->task.header,
+ batch->command_buffer_count, batch->command_buffers, &retire_cmd->arena,
+ &issue_cmd);
+ }
+
+ // Last chance for failure - from here on we are submitting.
+ if (IREE_UNLIKELY(!iree_status_is_ok(status))) {
+ iree_arena_deinitialize(&retire_cmd->arena);
+ return status;
+ }
+
+ iree_task_submission_t submission;
+ iree_task_submission_initialize(&submission);
+
+ // Sequencing: wait on semaphores or go directly into the executor queue.
+ if (wait_cmd != NULL) {
+ // Ensure that we only issue command buffers after all waits have completed.
+ iree_task_set_completion_task(&wait_cmd->task.header,
+ &issue_cmd->task.header);
+ iree_task_submission_enqueue(&submission, &wait_cmd->task.header);
+ } else {
+ // No waits needed; directly enqueue.
+ iree_task_submission_enqueue(&submission, &issue_cmd->task.header);
+ }
+
+ iree_slim_mutex_lock(&queue->mutex);
+
+ // If there is an in-flight issue pending then we need to chain onto that
+ // so that we ensure FIFO submission order is preserved. Note that we are only
+ // waiting for the issue to complete and *not* all of the commands that are
+ // issued.
+ if (queue->tail_issue_task != NULL) {
+ iree_task_set_completion_task(queue->tail_issue_task,
+ &issue_cmd->task.header);
+ }
+ queue->tail_issue_task = &issue_cmd->task.header;
+
+ iree_slim_mutex_unlock(&queue->mutex);
+
+ // Submit the tasks immediately. The executor may queue them up until we
+ // force the flush after all batches have been processed.
+ iree_task_executor_submit(queue->executor, &submission);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_task_queue_submit_batches(
+ iree_hal_task_queue_t* queue, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches) {
+ // For now we process each batch independently. To elide additional semaphore
+ // work and prevent unneeded coordinator scheduling logic we could instead
+ // build the whole DAG prior to submitting.
+ for (iree_host_size_t i = 0; i < batch_count; ++i) {
+ const iree_hal_submission_batch_t* batch = &batches[i];
+ IREE_RETURN_IF_ERROR(iree_hal_task_queue_submit_batch(queue, batch));
+ }
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_task_queue_submit(
+ iree_hal_task_queue_t* queue, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status =
+ iree_hal_task_queue_submit_batches(queue, batch_count, batches);
+ if (iree_status_is_ok(status)) {
+ iree_task_executor_flush(queue->executor);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_status_t iree_hal_task_queue_submit_and_wait(
+ iree_hal_task_queue_t* queue, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches,
+ iree_hal_semaphore_t* wait_semaphore, uint64_t wait_value,
+ iree_timeout_t timeout) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_convert_timeout_to_absolute(&timeout);
+
+ // Queue all of the batches.
+ iree_status_t status =
+ iree_hal_task_queue_submit_batches(queue, batch_count, batches);
+ if (iree_status_is_ok(status)) {
+ // Flush the pending submissions and begin processing, then wait until idle.
+ // TODO(benvanik): get a wait_handle we can pass to
+ // iree_task_executor_donate_caller - it'll flush + do work.
+ iree_task_executor_flush(queue->executor);
+ status = iree_hal_task_queue_wait_idle(queue, timeout);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+iree_status_t iree_hal_task_queue_wait_idle(iree_hal_task_queue_t* queue,
+ iree_timeout_t timeout) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
+ iree_status_t status = iree_task_scope_wait_idle(&queue->scope, deadline_ns);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/local/task_queue.h b/runtime/src/iree/hal/local/task_queue.h
new file mode 100644
index 0000000..7a60191
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_queue.h
@@ -0,0 +1,79 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_TASK_QUEUE_H_
+#define IREE_HAL_LOCAL_TASK_QUEUE_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/arena.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/task_queue_state.h"
+#include "iree/task/executor.h"
+#include "iree/task/scope.h"
+#include "iree/task/task.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_hal_task_queue_t {
+ // Shared executor that the queue submits tasks to.
+ iree_task_executor_t* executor;
+
+ // Shared block pool for allocating submission transients (tasks/events/etc).
+ iree_arena_block_pool_t* block_pool;
+
+ // Scope used for all tasks in the queue.
+ // This allows for easy waits on all outstanding queue tasks as well as
+ // differentiation of tasks within the executor.
+ iree_task_scope_t scope;
+
+ // Guards queue state. Submissions and waits may come from any user thread and
+ // we do a bit of bookkeeping during command buffer issue that will come from
+ // an executor thread.
+ iree_slim_mutex_t mutex;
+
+ // State tracking used during command buffer issue.
+ // The intra-queue synchronization (barriers/events) carries across command
+ // buffers and this is used to rendezvous the tasks in each set.
+ iree_hal_task_queue_state_t state;
+
+ // The last active iree_hal_task_queue_issue_cmd_t submitted to the queue.
+ // If this is NULL then there are no issues pending - though there may still
+ // be active work that was previously issued. This is used to chain together
+ // issues in FIFO order such that all submissions *issue* in order but not
+ // *execute* in order.
+ iree_task_t* tail_issue_task;
+} iree_hal_task_queue_t;
+
+void iree_hal_task_queue_initialize(iree_string_view_t identifier,
+ iree_task_executor_t* executor,
+ iree_arena_block_pool_t* block_pool,
+ iree_hal_task_queue_t* out_queue);
+
+void iree_hal_task_queue_deinitialize(iree_hal_task_queue_t* queue);
+
+iree_status_t iree_hal_task_queue_submit(
+ iree_hal_task_queue_t* queue, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches);
+
+iree_status_t iree_hal_task_queue_submit_and_wait(
+ iree_hal_task_queue_t* queue, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches,
+ iree_hal_semaphore_t* wait_semaphore, uint64_t wait_value,
+ iree_timeout_t timeout);
+
+iree_status_t iree_hal_task_queue_wait_idle(iree_hal_task_queue_t* queue,
+ iree_timeout_t timeout);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_TASK_QUEUE_H_
diff --git a/runtime/src/iree/hal/local/task_queue_state.c b/runtime/src/iree/hal/local/task_queue_state.c
new file mode 100644
index 0000000..34ce329
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_queue_state.c
@@ -0,0 +1,17 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/task_queue_state.h"
+
+#include <string.h>
+
+void iree_hal_task_queue_state_initialize(
+ iree_hal_task_queue_state_t* out_queue_state) {
+ memset(out_queue_state, 0, sizeof(*out_queue_state));
+}
+
+void iree_hal_task_queue_state_deinitialize(
+ iree_hal_task_queue_state_t* queue_state) {}
diff --git a/runtime/src/iree/hal/local/task_queue_state.h b/runtime/src/iree/hal/local/task_queue_state.h
new file mode 100644
index 0000000..40efc90
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_queue_state.h
@@ -0,0 +1,41 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_TASK_QUEUE_STATE_H_
+#define IREE_HAL_LOCAL_TASK_QUEUE_STATE_H_
+
+#include "iree/base/api.h"
+#include "iree/base/internal/atomics.h"
+#include "iree/hal/api.h"
+#include "iree/task/scope.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// State tracking for an individual queue.
+//
+// Thread-compatible: only intended to be used by a queue with the submission
+// lock held.
+typedef struct iree_hal_task_queue_state_t {
+ // TODO(#4518): track event state.
+ int reserved;
+} iree_hal_task_queue_state_t;
+
+// Initializes queue state with the given |identifier| used to annotate tasks
+// submitted to the queue.
+void iree_hal_task_queue_state_initialize(
+ iree_hal_task_queue_state_t* out_queue_state);
+
+// Deinitializes queue state and cleans up any tracking intermediates.
+void iree_hal_task_queue_state_deinitialize(
+ iree_hal_task_queue_state_t* queue_state);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_TASK_QUEUE_STATE_H_
diff --git a/runtime/src/iree/hal/local/task_semaphore.c b/runtime/src/iree/hal/local/task_semaphore.c
new file mode 100644
index 0000000..a783b5c
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_semaphore.c
@@ -0,0 +1,505 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/task_semaphore.h"
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/internal/wait_handle.h"
+#include "iree/base/tracing.h"
+
+// Sentinel used the semaphore has failed and an error status is set.
+#define IREE_HAL_TASK_SEMAPHORE_FAILURE_VALUE UINT64_MAX
+
+//===----------------------------------------------------------------------===//
+// iree_hal_task_timepoint_t
+//===----------------------------------------------------------------------===//
+
+// Represents a point in the timeline that someone is waiting to be reached.
+// When the semaphore is signaled to at least the specified value then the
+// given event will be signaled and the timepoint discarded.
+//
+// Instances are owned and retained by the caller that requested them - usually
+// in the arena associated with the submission, but could be on the stack of a
+// synchronously waiting thread.
+typedef struct iree_hal_task_timepoint_t {
+ struct iree_hal_task_timepoint_t* next;
+ struct iree_hal_task_timepoint_t* prev;
+ uint64_t payload_value;
+ iree_event_t event;
+} iree_hal_task_timepoint_t;
+
+// A doubly-linked FIFO list of timepoints.
+// The order of the timepoints does *not* match increasing payload values but
+// instead the order they were added to the list.
+//
+// Note that the timepoints are not owned by the list - this just nicely
+// stitches together timepoints for the semaphore.
+typedef struct iree_hal_task_timepoint_list_t {
+ iree_hal_task_timepoint_t* head;
+ iree_hal_task_timepoint_t* tail;
+} iree_hal_task_timepoint_list_t;
+
+static void iree_hal_task_timepoint_list_initialize(
+ iree_hal_task_timepoint_list_t* out_list) {
+ memset(out_list, 0, sizeof(*out_list));
+}
+
+// Moves |source_list| into |out_target_list|.
+// |source_list| will be reset and the prior contents of |out_target_list| will
+// be discarded.
+static void iree_hal_task_timepoint_list_move(
+ iree_hal_task_timepoint_list_t* source_list,
+ iree_hal_task_timepoint_list_t* out_target_list) {
+ memcpy(out_target_list, source_list, sizeof(*out_target_list));
+ memset(source_list, 0, sizeof(*source_list));
+}
+
+// Appends a timepoint to the end of the timepoint list.
+static void iree_hal_task_timepoint_list_append(
+ iree_hal_task_timepoint_list_t* list,
+ iree_hal_task_timepoint_t* timepoint) {
+ timepoint->next = NULL;
+ timepoint->prev = list->tail;
+ if (list->tail != NULL) {
+ list->tail->next = timepoint;
+ list->tail = timepoint;
+ } else {
+ list->head = timepoint;
+ list->tail = timepoint;
+ }
+}
+
+// Erases a timepoint from the list.
+static void iree_hal_task_timepoint_list_erase(
+ iree_hal_task_timepoint_list_t* list,
+ iree_hal_task_timepoint_t* timepoint) {
+ if (timepoint->prev != NULL) timepoint->prev->next = timepoint->next;
+ if (timepoint == list->head) list->head = timepoint->next;
+ if (timepoint == list->tail) list->tail = timepoint->prev;
+ timepoint->prev = NULL;
+ timepoint->next = NULL;
+}
+
+// Scans the |pending_list| for all timepoints that are satisfied by the
+// timeline having reached |payload_value|. Each satisfied timepoint will be
+// moved to |out_ready_list|.
+static void iree_hal_task_timepoint_list_take_ready(
+ iree_hal_task_timepoint_list_t* pending_list, uint64_t payload_value,
+ iree_hal_task_timepoint_list_t* out_ready_list) {
+ iree_hal_task_timepoint_list_initialize(out_ready_list);
+ iree_hal_task_timepoint_t* next = pending_list->head;
+ while (next != NULL) {
+ iree_hal_task_timepoint_t* timepoint = next;
+ next = timepoint->next;
+ bool is_satisfied = timepoint->payload_value <= payload_value;
+ if (!is_satisfied) continue;
+
+ // Remove from pending list.
+ iree_hal_task_timepoint_list_erase(pending_list, timepoint);
+
+ // Add to ready list.
+ iree_hal_task_timepoint_list_append(out_ready_list, timepoint);
+ }
+}
+
+// Notifies all of the timepoints in the |ready_list| that their condition has
+// been satisfied. |ready_list| will be reset as ownership of the events is
+// held by the originator.
+static void iree_hal_task_timepoint_list_notify_ready(
+ iree_hal_task_timepoint_list_t* ready_list) {
+ iree_hal_task_timepoint_t* next = ready_list->head;
+ while (next != NULL) {
+ iree_hal_task_timepoint_t* timepoint = next;
+ next = timepoint->next;
+ timepoint->next = NULL;
+ timepoint->prev = NULL;
+ iree_event_set(&timepoint->event);
+ }
+ iree_hal_task_timepoint_list_initialize(ready_list);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_task_semaphore_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_task_semaphore_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+ iree_event_pool_t* event_pool;
+
+ // Guards all mutable fields. We expect low contention on semaphores and since
+ // iree_slim_mutex_t is (effectively) just a CAS this keeps things simpler
+ // than trying to make the entire structure lock-free.
+ iree_slim_mutex_t mutex;
+
+ // Current signaled value. May be IREE_HAL_TASK_SEMAPHORE_FAILURE_VALUE to
+ // indicate that the semaphore has been signaled for failure and
+ // |failure_status| contains the error.
+ uint64_t current_value;
+
+ // OK or the status passed to iree_hal_semaphore_fail. Owned by the semaphore.
+ iree_status_t failure_status;
+
+ // In-process notification signaled when the semaphore value changes. This is
+ // used exclusively for wait-ones to avoid going to the kernel for a full wait
+ // handle operation.
+ iree_notification_t notification;
+
+ // A list of all reserved timepoints waiting for the semaphore to reach a
+ // certain payload value.
+ iree_hal_task_timepoint_list_t timepoint_list;
+} iree_hal_task_semaphore_t;
+
+static const iree_hal_semaphore_vtable_t iree_hal_task_semaphore_vtable;
+
+static iree_hal_task_semaphore_t* iree_hal_task_semaphore_cast(
+ iree_hal_semaphore_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_task_semaphore_vtable);
+ return (iree_hal_task_semaphore_t*)base_value;
+}
+
+iree_status_t iree_hal_task_semaphore_create(
+ iree_event_pool_t* event_pool, uint64_t initial_value,
+ iree_allocator_t host_allocator, iree_hal_semaphore_t** out_semaphore) {
+ IREE_ASSERT_ARGUMENT(event_pool);
+ IREE_ASSERT_ARGUMENT(out_semaphore);
+ *out_semaphore = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_task_semaphore_t* semaphore = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ host_allocator, sizeof(*semaphore), (void**)&semaphore);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_task_semaphore_vtable,
+ &semaphore->resource);
+ semaphore->host_allocator = host_allocator;
+ semaphore->event_pool = event_pool;
+
+ iree_slim_mutex_initialize(&semaphore->mutex);
+ semaphore->current_value = initial_value;
+ semaphore->failure_status = iree_ok_status();
+ iree_notification_initialize(&semaphore->notification);
+ iree_hal_task_timepoint_list_initialize(&semaphore->timepoint_list);
+
+ *out_semaphore = (iree_hal_semaphore_t*)semaphore;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_task_semaphore_destroy(
+ iree_hal_semaphore_t* base_semaphore) {
+ iree_hal_task_semaphore_t* semaphore =
+ iree_hal_task_semaphore_cast(base_semaphore);
+ iree_allocator_t host_allocator = semaphore->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_free(semaphore->failure_status);
+ iree_notification_deinitialize(&semaphore->notification);
+ iree_slim_mutex_deinitialize(&semaphore->mutex);
+ iree_allocator_free(host_allocator, semaphore);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_task_semaphore_query(
+ iree_hal_semaphore_t* base_semaphore, uint64_t* out_value) {
+ iree_hal_task_semaphore_t* semaphore =
+ iree_hal_task_semaphore_cast(base_semaphore);
+
+ iree_slim_mutex_lock(&semaphore->mutex);
+
+ *out_value = semaphore->current_value;
+
+ iree_status_t status = iree_ok_status();
+ if (*out_value >= IREE_HAL_TASK_SEMAPHORE_FAILURE_VALUE) {
+ status = iree_status_clone(semaphore->failure_status);
+ }
+
+ iree_slim_mutex_unlock(&semaphore->mutex);
+
+ return status;
+}
+
+static iree_status_t iree_hal_task_semaphore_signal(
+ iree_hal_semaphore_t* base_semaphore, uint64_t new_value) {
+ iree_hal_task_semaphore_t* semaphore =
+ iree_hal_task_semaphore_cast(base_semaphore);
+
+ iree_slim_mutex_lock(&semaphore->mutex);
+
+ if (new_value <= semaphore->current_value) {
+ uint64_t current_value IREE_ATTRIBUTE_UNUSED = semaphore->current_value;
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "semaphore values must be monotonically "
+ "increasing; current_value=%" PRIu64
+ ", new_value=%" PRIu64,
+ current_value, new_value);
+ }
+
+ semaphore->current_value = new_value;
+
+ // Scan for all timepoints that are now satisfied and move them to our local
+ // ready list. This way we can notify them without needing to continue holding
+ // the semaphore lock.
+ iree_hal_task_timepoint_list_t ready_list;
+ iree_hal_task_timepoint_list_take_ready(&semaphore->timepoint_list, new_value,
+ &ready_list);
+
+ iree_notification_post(&semaphore->notification, IREE_ALL_WAITERS);
+ iree_slim_mutex_unlock(&semaphore->mutex);
+
+ // Notify all waiters - note that this must happen outside the lock.
+ iree_hal_task_timepoint_list_notify_ready(&ready_list);
+
+ return iree_ok_status();
+}
+
+static void iree_hal_task_semaphore_fail(iree_hal_semaphore_t* base_semaphore,
+ iree_status_t status) {
+ iree_hal_task_semaphore_t* semaphore =
+ iree_hal_task_semaphore_cast(base_semaphore);
+
+ iree_slim_mutex_lock(&semaphore->mutex);
+
+ // Try to set our local status - we only preserve the first failure so only
+ // do this if we are going from a valid semaphore to a failed one.
+ if (!iree_status_is_ok(semaphore->failure_status)) {
+ // Previous status was not OK; drop our new status.
+ IREE_IGNORE_ERROR(status);
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ return;
+ }
+
+ // Signal to our failure sentinel value.
+ semaphore->current_value = IREE_HAL_TASK_SEMAPHORE_FAILURE_VALUE;
+ semaphore->failure_status = status;
+
+ // Take the whole timepoint list as we'll be signaling all of them. Since
+ // we hold the lock no other timepoints can be created while we are cleaning
+ // up.
+ iree_hal_task_timepoint_list_t ready_list;
+ iree_hal_task_timepoint_list_move(&semaphore->timepoint_list, &ready_list);
+
+ iree_notification_post(&semaphore->notification, IREE_ALL_WAITERS);
+ iree_slim_mutex_unlock(&semaphore->mutex);
+
+ // Notify all waiters - note that this must happen outside the lock.
+ iree_hal_task_timepoint_list_notify_ready(&ready_list);
+}
+
+// Acquires a timepoint waiting for the given value.
+// |out_timepoint| is owned by the caller and must be kept live until the
+// timepoint has been reached (or it is cancelled by the caller).
+static iree_status_t iree_hal_task_semaphore_acquire_timepoint(
+ iree_hal_task_semaphore_t* semaphore, uint64_t minimum_value,
+ iree_hal_task_timepoint_t* out_timepoint) {
+ memset(out_timepoint, 0, sizeof(*out_timepoint));
+ out_timepoint->payload_value = minimum_value;
+ IREE_RETURN_IF_ERROR(
+ iree_event_pool_acquire(semaphore->event_pool, 1, &out_timepoint->event));
+ iree_hal_task_timepoint_list_append(&semaphore->timepoint_list,
+ out_timepoint);
+ return iree_ok_status();
+}
+
+typedef struct iree_hal_task_semaphore_wait_cmd_t {
+ iree_task_wait_t task;
+ iree_hal_task_semaphore_t* semaphore;
+ iree_hal_task_timepoint_t timepoint;
+} iree_hal_task_semaphore_wait_cmd_t;
+
+// Cleans up a wait task by returning the event used to the pool and - if the
+// task failed - ensuring we scrub it from the timepoint list.
+static void iree_hal_task_semaphore_wait_cmd_cleanup(
+ iree_task_t* task, iree_status_code_t status_code) {
+ iree_hal_task_semaphore_wait_cmd_t* cmd =
+ (iree_hal_task_semaphore_wait_cmd_t*)task;
+ iree_event_pool_release(cmd->semaphore->event_pool, 1, &cmd->timepoint.event);
+ if (IREE_UNLIKELY(status_code != IREE_STATUS_OK)) {
+ // Abort the timepoint. Note that this is not designed to be fast as
+ // semaphore failure is an exceptional case.
+ iree_slim_mutex_lock(&cmd->semaphore->mutex);
+ iree_hal_task_timepoint_list_erase(&cmd->semaphore->timepoint_list,
+ &cmd->timepoint);
+ iree_slim_mutex_unlock(&cmd->semaphore->mutex);
+ }
+}
+
+iree_status_t iree_hal_task_semaphore_enqueue_timepoint(
+ iree_hal_semaphore_t* base_semaphore, uint64_t minimum_value,
+ iree_task_t* issue_task, iree_arena_allocator_t* arena,
+ iree_task_submission_t* submission) {
+ iree_hal_task_semaphore_t* semaphore =
+ iree_hal_task_semaphore_cast(base_semaphore);
+
+ iree_slim_mutex_lock(&semaphore->mutex);
+
+ iree_status_t status = iree_ok_status();
+ if (semaphore->current_value >= minimum_value) {
+ // Fast path: already satisfied.
+ } else {
+ // Slow path: acquire a system wait handle and perform a full wait.
+ iree_hal_task_semaphore_wait_cmd_t* cmd = NULL;
+ status = iree_arena_allocate(arena, sizeof(*cmd), (void**)&cmd);
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_task_semaphore_acquire_timepoint(
+ semaphore, minimum_value, &cmd->timepoint);
+ }
+ if (iree_status_is_ok(status)) {
+ iree_task_wait_initialize(issue_task->scope,
+ iree_event_await(&cmd->timepoint.event),
+ IREE_TIME_INFINITE_FUTURE, &cmd->task);
+ iree_task_set_cleanup_fn(&cmd->task.header,
+ iree_hal_task_semaphore_wait_cmd_cleanup);
+ iree_task_set_completion_task(&cmd->task.header, issue_task);
+ cmd->semaphore = semaphore;
+ iree_task_submission_enqueue(submission, &cmd->task.header);
+ }
+ }
+
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ return status;
+}
+
+static iree_status_t iree_hal_task_semaphore_wait(
+ iree_hal_semaphore_t* base_semaphore, uint64_t value,
+ iree_timeout_t timeout) {
+ iree_hal_task_semaphore_t* semaphore =
+ iree_hal_task_semaphore_cast(base_semaphore);
+
+ iree_slim_mutex_lock(&semaphore->mutex);
+
+ if (!iree_status_is_ok(semaphore->failure_status)) {
+ // Fastest path: failed; return an error to tell callers to query for it.
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ return iree_status_from_code(IREE_STATUS_ABORTED);
+ } else if (semaphore->current_value >= value) {
+ // Fast path: already satisfied.
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ return iree_ok_status();
+ } else if (iree_timeout_is_immediate(timeout)) {
+ // Not satisfied but a poll, so can avoid the expensive wait handle work.
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+
+ iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
+
+ // Slow path: acquire a timepoint while we hold the lock.
+ iree_hal_task_timepoint_t timepoint;
+ iree_status_t status =
+ iree_hal_task_semaphore_acquire_timepoint(semaphore, value, &timepoint);
+
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ if (IREE_UNLIKELY(!iree_status_is_ok(status))) return status;
+
+ // Wait until the timepoint resolves.
+ // If satisfied the timepoint is automatically cleaned up and we are done. If
+ // the deadline is reached before satisfied then we have to clean it up.
+ status = iree_wait_one(&timepoint.event, deadline_ns);
+ if (!iree_status_is_ok(status)) {
+ iree_slim_mutex_lock(&semaphore->mutex);
+ iree_hal_task_timepoint_list_erase(&semaphore->timepoint_list, &timepoint);
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ }
+ iree_event_pool_release(semaphore->event_pool, 1, &timepoint.event);
+ return status;
+}
+
+iree_status_t iree_hal_task_semaphore_multi_wait(
+ iree_hal_wait_mode_t wait_mode,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout,
+ iree_event_pool_t* event_pool, iree_arena_block_pool_t* block_pool) {
+ IREE_ASSERT_ARGUMENT(semaphore_list);
+ if (semaphore_list->count == 0) {
+ return iree_ok_status();
+ } else if (semaphore_list->count == 1) {
+ // Fast-path for a single semaphore.
+ return iree_hal_semaphore_wait(semaphore_list->semaphores[0],
+ semaphore_list->payload_values[0], timeout);
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
+
+ // Avoid heap allocations by using the device block pool for the wait set.
+ iree_arena_allocator_t arena;
+ iree_arena_initialize(block_pool, &arena);
+ iree_wait_set_t* wait_set = NULL;
+ iree_status_t status = iree_wait_set_allocate(
+ semaphore_list->count, iree_arena_allocator(&arena), &wait_set);
+
+ // Acquire a wait handle for each semaphore timepoint we are to wait on.
+ // TODO(benvanik): flip this API around so we can batch request events from
+ // the event pool. We should be acquiring all required time points in one
+ // call.
+ iree_host_size_t timepoint_count = 0;
+ iree_hal_task_timepoint_t* timepoints = NULL;
+ iree_host_size_t total_timepoint_size =
+ semaphore_list->count * sizeof(timepoints[0]);
+ status =
+ iree_arena_allocate(&arena, total_timepoint_size, (void**)&timepoints);
+ if (iree_status_is_ok(status)) {
+ memset(timepoints, 0, total_timepoint_size);
+ for (iree_host_size_t i = 0; i < semaphore_list->count; ++i) {
+ iree_hal_task_semaphore_t* semaphore =
+ iree_hal_task_semaphore_cast(semaphore_list->semaphores[i]);
+ iree_slim_mutex_lock(&semaphore->mutex);
+ if (semaphore->current_value >= semaphore_list->payload_values[i]) {
+ // Fast path: already satisfied.
+ } else {
+ // Slow path: get a native wait handle for the timepoint.
+ iree_hal_task_timepoint_t* timepoint = &timepoints[timepoint_count++];
+ status = iree_hal_task_semaphore_acquire_timepoint(
+ semaphore, semaphore_list->payload_values[i], timepoint);
+ if (iree_status_is_ok(status)) {
+ status = iree_wait_set_insert(wait_set, timepoint->event);
+ }
+ }
+ iree_slim_mutex_unlock(&semaphore->mutex);
+ if (!iree_status_is_ok(status)) break;
+ }
+ }
+
+ // Perform the wait.
+ if (iree_status_is_ok(status)) {
+ if (wait_mode == IREE_HAL_WAIT_MODE_ANY) {
+ status = iree_wait_any(wait_set, deadline_ns, /*out_wake_handle=*/NULL);
+ } else {
+ status = iree_wait_all(wait_set, deadline_ns);
+ }
+ }
+
+ if (timepoints != NULL) {
+ // TODO(benvanik): if we flip the API to multi-acquire events from the pool
+ // above then we can multi-release here too.
+ for (iree_host_size_t i = 0; i < timepoint_count; ++i) {
+ iree_event_pool_release(event_pool, 1, &timepoints[i].event);
+ }
+ }
+ iree_wait_set_free(wait_set);
+ iree_arena_deinitialize(&arena);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static const iree_hal_semaphore_vtable_t iree_hal_task_semaphore_vtable = {
+ .destroy = iree_hal_task_semaphore_destroy,
+ .query = iree_hal_task_semaphore_query,
+ .signal = iree_hal_task_semaphore_signal,
+ .fail = iree_hal_task_semaphore_fail,
+ .wait = iree_hal_task_semaphore_wait,
+};
diff --git a/runtime/src/iree/hal/local/task_semaphore.h b/runtime/src/iree/hal/local/task_semaphore.h
new file mode 100644
index 0000000..f3a1060
--- /dev/null
+++ b/runtime/src/iree/hal/local/task_semaphore.h
@@ -0,0 +1,51 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_TASK_SEMAPHORE_H_
+#define IREE_HAL_LOCAL_TASK_SEMAPHORE_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/arena.h"
+#include "iree/base/internal/event_pool.h"
+#include "iree/hal/api.h"
+#include "iree/task/submission.h"
+#include "iree/task/task.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a semaphore that integrates with the task system to allow for
+// pipelined wait and signal operations.
+iree_status_t iree_hal_task_semaphore_create(
+ iree_event_pool_t* event_pool, uint64_t initial_value,
+ iree_allocator_t host_allocator, iree_hal_semaphore_t** out_semaphore);
+
+// Reserves a new timepoint in the timeline for the given minimum payload value.
+// |issue_task| will wait until the timeline semaphore is signaled to at least
+// |minimum_value| before proceeding, with a possible wait task generated and
+// appended to the |submission|. Allocations for any intermediates will be made
+// from |arena| whose lifetime must be tied to the submission.
+iree_status_t iree_hal_task_semaphore_enqueue_timepoint(
+ iree_hal_semaphore_t* semaphore, uint64_t minimum_value,
+ iree_task_t* issue_task, iree_arena_allocator_t* arena,
+ iree_task_submission_t* submission);
+
+// Performs a multi-wait on one or more semaphores.
+// Returns IREE_STATUS_DEADLINE_EXCEEDED if the wait does not complete before
+// |deadline_ns| elapses.
+iree_status_t iree_hal_task_semaphore_multi_wait(
+ iree_hal_wait_mode_t wait_mode,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout,
+ iree_event_pool_t* event_pool, iree_arena_block_pool_t* block_pool);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_TASK_SEMAPHORE_H_
diff --git a/runtime/src/iree/hal/resource.h b/runtime/src/iree/hal/resource.h
new file mode 100644
index 0000000..0f7abbe
--- /dev/null
+++ b/runtime/src/iree/hal/resource.h
@@ -0,0 +1,116 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_RESOURCE_H_
+#define IREE_HAL_RESOURCE_H_
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/atomics.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Abstract resource type whose lifetime is managed by reference counting.
+// Used mostly just to get a virtual dtor and vtable, though we could add nicer
+// logging by allowing resources to capture debug names, stack traces of
+// creation, etc.
+//
+// All resource types must have the iree_hal_resource_t at offset 0. This allows
+// the HAL code to cast any type pointer to a resource to gain access to the
+// ref count and vtable at predictable locations. Note that this allows for the
+// resource to be at >0 of the allocation but the pointers used with the HAL
+// (iree_hal_event_t*, etc) must point to the iree_hal_resource_t.
+typedef struct iree_hal_resource_t {
+ // Reference count used to manage resource lifetime. The vtable->destroy
+ // method will be called when the reference count falls to zero.
+ iree_atomic_ref_count_t ref_count;
+
+ // Opaque vtable for the resource object.
+ // Must start with iree_hal_resource_vtable_t at offset 0.
+ //
+ // NOTE: this field may be hidden in the future. Only use this for
+ // IREE_HAL_VTABLE_DISPATCH and not equality/direct dereferencing.
+ const void* vtable;
+
+ // TODO(benvanik): debug string/logging utilities.
+} iree_hal_resource_t;
+
+// Base vtable for all resources.
+// This provides the base functions required to generically manipulate resources
+// of various types.
+//
+// This must be aliased at offset 0 of all typed vtables:
+// typedef struct iree_hal_foo_vtable_t {
+// void(IREE_API_PTR* destroy)(...);
+// void(IREE_API_PTR* foo_method)(...);
+// } iree_hal_foo_vtable_t;
+typedef struct iree_hal_resource_vtable_t {
+ // Destroys the resource upon the final reference being released.
+ // The resource pointer must be assumed invalid upon return from the function
+ // (even if in some implementations its returned to a pool and still live).
+ void(IREE_API_PTR* destroy)(iree_hal_resource_t* resource);
+} iree_hal_resource_vtable_t;
+
+// Verifies that the vtable has the right resource sub-vtable.
+#define IREE_HAL_ASSERT_VTABLE_LAYOUT(vtable_type) \
+ static_assert(offsetof(vtable_type, destroy) == 0, \
+ "iree_hal_resource_vtable_t must be at offset 0");
+
+// Initializes the base resource type.
+static inline void iree_hal_resource_initialize(
+ const void* vtable, iree_hal_resource_t* out_resource) {
+ iree_atomic_ref_count_init(&out_resource->ref_count);
+ out_resource->vtable = vtable;
+}
+
+// Retains a resource for the caller.
+static inline void iree_hal_resource_retain(const void* any_resource) {
+ iree_hal_resource_t* resource = (iree_hal_resource_t*)any_resource;
+ if (IREE_LIKELY(resource)) {
+ iree_atomic_ref_count_inc(&resource->ref_count);
+ }
+}
+
+// Releases a resource and destroys it if there are no more references.
+// This routes through the vtable and can disable optimizations; always prefer
+// to use the type-specific release functions (such as iree_hal_buffer_release)
+// to allow for more optimizations and better compile-time type safety.
+static inline void iree_hal_resource_release(const void* any_resource) {
+ iree_hal_resource_t* resource = (iree_hal_resource_t*)any_resource;
+ if (IREE_LIKELY(resource) &&
+ iree_atomic_ref_count_dec(&resource->ref_count) == 1) {
+ ((iree_hal_resource_vtable_t*)resource->vtable)->destroy(resource);
+ }
+}
+
+// Returns true if the |resource| has the given |vtable| type.
+// This is *not* a way to ensure that an instance is of a specific type but
+// instead that it has a compatible vtable. This is because LTO may very rarely
+// dedupe identical vtables and cause the pointer comparison to succeed even if
+// the spellings of the types differs.
+static inline bool iree_hal_resource_is(const void* resource,
+ const void* vtable) {
+ return resource ? ((const iree_hal_resource_t*)resource)->vtable == vtable
+ : false;
+}
+
+// Asserts (**DEBUG ONLY**) that the |resource| has the given |vtable| type.
+// This is only useful to check for programmer error and may have false
+// positives - do not rely on it for handling untrusted user input.
+#define IREE_HAL_ASSERT_TYPE(resource, vtable) \
+ IREE_ASSERT_TRUE(iree_hal_resource_is(resource, vtable), \
+ "type does not match expected " #vtable)
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_RESOURCE_H_
diff --git a/runtime/src/iree/hal/semaphore.c b/runtime/src/iree/hal/semaphore.c
new file mode 100644
index 0000000..50608e9
--- /dev/null
+++ b/runtime/src/iree/hal/semaphore.c
@@ -0,0 +1,73 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/semaphore.h"
+
+#include <stddef.h>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/detail.h"
+#include "iree/hal/device.h"
+#include "iree/hal/resource.h"
+
+#define _VTABLE_DISPATCH(semaphore, method_name) \
+ IREE_HAL_VTABLE_DISPATCH(semaphore, iree_hal_semaphore, method_name)
+
+IREE_HAL_API_RETAIN_RELEASE(semaphore);
+
+IREE_API_EXPORT iree_status_t
+iree_hal_semaphore_create(iree_hal_device_t* device, uint64_t initial_value,
+ iree_hal_semaphore_t** out_semaphore) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(out_semaphore);
+ *out_semaphore = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status =
+ IREE_HAL_VTABLE_DISPATCH(device, iree_hal_device, create_semaphore)(
+ device, initial_value, out_semaphore);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_semaphore_query(iree_hal_semaphore_t* semaphore, uint64_t* out_value) {
+ IREE_ASSERT_ARGUMENT(semaphore);
+ IREE_ASSERT_ARGUMENT(out_value);
+ *out_value = 0;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status =
+ _VTABLE_DISPATCH(semaphore, query)(semaphore, out_value);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_semaphore_signal(iree_hal_semaphore_t* semaphore, uint64_t new_value) {
+ IREE_ASSERT_ARGUMENT(semaphore);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status =
+ _VTABLE_DISPATCH(semaphore, signal)(semaphore, new_value);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT void iree_hal_semaphore_fail(iree_hal_semaphore_t* semaphore,
+ iree_status_t status) {
+ IREE_ASSERT_ARGUMENT(semaphore);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ _VTABLE_DISPATCH(semaphore, fail)(semaphore, status);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_semaphore_wait(
+ iree_hal_semaphore_t* semaphore, uint64_t value, iree_timeout_t timeout) {
+ IREE_ASSERT_ARGUMENT(semaphore);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status =
+ _VTABLE_DISPATCH(semaphore, wait)(semaphore, value, timeout);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/semaphore.h b/runtime/src/iree/hal/semaphore.h
new file mode 100644
index 0000000..afc8959
--- /dev/null
+++ b/runtime/src/iree/hal/semaphore.h
@@ -0,0 +1,138 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_SEMAPHORE_H_
+#define IREE_HAL_SEMAPHORE_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_hal_device_t iree_hal_device_t;
+
+//===----------------------------------------------------------------------===//
+// iree_hal_semaphore_t
+//===----------------------------------------------------------------------===//
+
+// Synchronization mechanism for host->device, device->host, host->host,
+// and device->device notification. Semaphores behave like Vulkan timeline
+// semaphores (or D3D12 fences) and contain a monotonically increasing
+// uint64_t payload. They may be waited on any number of times even if they
+// have already been signaled for a particular value. They may also be waited
+// on for a particular value prior to the signal for that value.
+//
+// A semaphore is updated to its new value after all prior commands have
+// completed but the delay between completion and the host being woken varies.
+// Some implementations may coalesce semaphores to avoid spurious waking while
+// others will immediately synchronize with the host.
+//
+// One use of semaphores is for resource lifetime management: all resources used
+// by a set of submission batches must be considered live until the semaphore
+// attached to the submission has signaled.
+//
+// Another use of semaphores is device->device synchronization for setting up
+// the DAG of command buffers across queue submissions. This allows devices to
+// perform non-trivial scheduling behavior without the need to wake the host.
+//
+// Semaphores may be set to a permanently failed state by implementations when
+// errors occur during asynchronous execution. Users are expected to propagate
+// the failures and possibly reset the entire device that produced the error.
+//
+// For more information on semaphores see the following docs describing how
+// timelines are generally used (specifically in the device->host case):
+// https://www.youtube.com/watch?v=SpE--Rf516Y
+// https://www.khronos.org/assets/uploads/developers/library/2018-xdc/Vulkan-Timeline-Semaphores-Part-1_Sep18.pdf
+// https://docs.microsoft.com/en-us/windows/win32/direct3d12/user-mode-heap-synchronization
+typedef struct iree_hal_semaphore_t iree_hal_semaphore_t;
+
+// Creates a semaphore that can be used with command queues owned by this
+// device. To use the semaphores with other devices or instances they must
+// first be exported.
+IREE_API_EXPORT iree_status_t
+iree_hal_semaphore_create(iree_hal_device_t* device, uint64_t initial_value,
+ iree_hal_semaphore_t** out_semaphore);
+
+// Retains the given |semaphore| for the caller.
+IREE_API_EXPORT void iree_hal_semaphore_retain(iree_hal_semaphore_t* semaphore);
+
+// Releases the given |semaphore| from the caller.
+IREE_API_EXPORT void iree_hal_semaphore_release(
+ iree_hal_semaphore_t* semaphore);
+
+// Queries the current payload of the semaphore and stores the result in
+// |out_value|. As the payload is monotonically increasing it is guaranteed that
+// the value is at least equal to the previous result of a
+// iree_hal_semaphore_query call and coherent with any waits for a
+// specified value via iree_device_wait_all_semaphores.
+//
+// Returns the status at the time the method is called without blocking and as
+// such is only valid after a semaphore has been signaled. The same failure
+// status will be returned regardless of when in the timeline the error
+// occurred.
+IREE_API_EXPORT iree_status_t
+iree_hal_semaphore_query(iree_hal_semaphore_t* semaphore, uint64_t* out_value);
+
+// Signals the |semaphore| to the given payload value.
+// The call is ignored if the current payload value exceeds |new_value|.
+IREE_API_EXPORT iree_status_t
+iree_hal_semaphore_signal(iree_hal_semaphore_t* semaphore, uint64_t new_value);
+
+// Signals the |semaphore| with a failure. The |status| will be returned from
+// iree_hal_semaphore_query and iree_hal_semaphore_signal for the lifetime
+// of the semaphore. Ownership of the status transfers to the semaphore and
+// callers must clone it if they wish to retain it.
+IREE_API_EXPORT void iree_hal_semaphore_fail(iree_hal_semaphore_t* semaphore,
+ iree_status_t status);
+
+// Blocks the caller until the semaphore reaches or exceedes the specified
+// payload value or the |timeout| elapses.
+//
+// Returns success if the wait is successful and the semaphore has met or
+// exceeded the required payload value.
+//
+// Returns IREE_STATUS_DEADLINE_EXCEEDED if the |timeout| elapses without the
+// semaphore reaching the required value. If an asynchronous failure occured
+// this will return the failure status that was set immediately.
+//
+// Returns IREE_STATUS_ABORTED if one or more semaphores has failed. Callers can
+// use iree_hal_semaphore_query on the semaphores to find the ones that have
+// failed and get the status.
+IREE_API_EXPORT iree_status_t iree_hal_semaphore_wait(
+ iree_hal_semaphore_t* semaphore, uint64_t value, iree_timeout_t timeout);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_semaphore_t implementation details
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_semaphore_vtable_t {
+ void(IREE_API_PTR* destroy)(iree_hal_semaphore_t* semaphore);
+
+ iree_status_t(IREE_API_PTR* query)(iree_hal_semaphore_t* semaphore,
+ uint64_t* out_value);
+ iree_status_t(IREE_API_PTR* signal)(iree_hal_semaphore_t* semaphore,
+ uint64_t new_value);
+ void(IREE_API_PTR* fail)(iree_hal_semaphore_t* semaphore,
+ iree_status_t status);
+
+ iree_status_t(IREE_API_PTR* wait)(iree_hal_semaphore_t* semaphore,
+ uint64_t value, iree_timeout_t timeout);
+} iree_hal_semaphore_vtable_t;
+IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_semaphore_vtable_t);
+
+IREE_API_EXPORT void iree_hal_semaphore_destroy(
+ iree_hal_semaphore_t* semaphore);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_SEMAPHORE_H_
diff --git a/runtime/src/iree/hal/string_util.c b/runtime/src/iree/hal/string_util.c
new file mode 100644
index 0000000..0938301
--- /dev/null
+++ b/runtime/src/iree/hal/string_util.c
@@ -0,0 +1,599 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/string_util.h"
+
+#include <ctype.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/math.h"
+#include "iree/hal/buffer_view.h"
+
+IREE_API_EXPORT iree_status_t iree_hal_parse_shape(
+ iree_string_view_t value, iree_host_size_t shape_capacity,
+ iree_hal_dim_t* out_shape, iree_host_size_t* out_shape_rank) {
+ IREE_ASSERT_ARGUMENT(out_shape_rank);
+ *out_shape_rank = 0;
+
+ if (iree_string_view_is_empty(value)) {
+ return iree_ok_status(); // empty shape
+ }
+
+ // Count the number of dimensions to see if we have capacity.
+ iree_host_size_t shape_rank = 1; // always at least one if we are not empty
+ for (iree_host_size_t i = 0; i < value.size; ++i) {
+ if (value.data[i] == 'x') ++shape_rank;
+ }
+ if (out_shape_rank) {
+ *out_shape_rank = shape_rank;
+ }
+ if (shape_rank > shape_capacity) {
+ // NOTE: fast return for capacity queries.
+ return iree_status_from_code(IREE_STATUS_OUT_OF_RANGE);
+ }
+
+ iree_host_size_t dim_index = 0;
+ iree_string_view_t lhs;
+ iree_string_view_t rhs = value;
+ while (iree_string_view_split(rhs, 'x', &lhs, &rhs) &&
+ !iree_string_view_is_empty(lhs)) {
+ int32_t dim_value = 0;
+ if (!iree_string_view_atoi_int32(lhs, &dim_value)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "shape[%zu] invalid value '%.*s' of '%.*s'",
+ dim_index, (int)lhs.size, lhs.data,
+ (int)value.size, value.data);
+ }
+ if (dim_value < 0) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "shape[%zu] unsupported value %d of '%.*s'",
+ dim_index, dim_value, (int)value.size,
+ value.data);
+ }
+ out_shape[dim_index++] = dim_value;
+ }
+ if (dim_index != shape_rank) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "invalid shape specification: '%.*s'",
+ (int)value.size, value.data);
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_format_shape(const iree_hal_dim_t* shape, iree_host_size_t shape_rank,
+ iree_host_size_t buffer_capacity, char* buffer,
+ iree_host_size_t* out_buffer_length) {
+ if (out_buffer_length) {
+ *out_buffer_length = 0;
+ }
+ iree_host_size_t buffer_length = 0;
+ for (iree_host_size_t i = 0; i < shape_rank; ++i) {
+ int n = snprintf(buffer ? buffer + buffer_length : NULL,
+ buffer ? buffer_capacity - buffer_length : 0,
+ (i < shape_rank - 1) ? "%dx" : "%d", shape[i]);
+ if (IREE_UNLIKELY(n < 0)) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "snprintf failed to write dimension %zu", i);
+ } else if (buffer && n >= buffer_capacity - buffer_length) {
+ buffer = NULL;
+ }
+ buffer_length += n;
+ }
+ if (out_buffer_length) {
+ *out_buffer_length = buffer_length;
+ }
+ return buffer ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_OUT_OF_RANGE);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_parse_element_type(
+ iree_string_view_t value, iree_hal_element_type_t* out_element_type) {
+ IREE_ASSERT_ARGUMENT(out_element_type);
+ *out_element_type = IREE_HAL_ELEMENT_TYPE_NONE;
+
+ iree_string_view_t str_value = value;
+ iree_hal_numerical_type_t numerical_type = IREE_HAL_NUMERICAL_TYPE_UNKNOWN;
+ if (iree_string_view_consume_prefix(&str_value, IREE_SV("i"))) {
+ numerical_type = IREE_HAL_NUMERICAL_TYPE_INTEGER;
+ } else if (iree_string_view_consume_prefix(&str_value, IREE_SV("si"))) {
+ numerical_type = IREE_HAL_NUMERICAL_TYPE_INTEGER_SIGNED;
+ } else if (iree_string_view_consume_prefix(&str_value, IREE_SV("ui"))) {
+ numerical_type = IREE_HAL_NUMERICAL_TYPE_INTEGER_UNSIGNED;
+ } else if (iree_string_view_consume_prefix(&str_value, IREE_SV("f"))) {
+ numerical_type = IREE_HAL_NUMERICAL_TYPE_FLOAT_IEEE;
+ } else if (iree_string_view_consume_prefix(&str_value, IREE_SV("bf"))) {
+ numerical_type = IREE_HAL_NUMERICAL_TYPE_FLOAT_BRAIN;
+ } else if (iree_string_view_consume_prefix(&str_value, IREE_SV("x")) ||
+ iree_string_view_consume_prefix(&str_value, IREE_SV("*"))) {
+ numerical_type = IREE_HAL_NUMERICAL_TYPE_UNKNOWN;
+ } else {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "unhandled element type prefix in '%.*s'",
+ (int)value.size, value.data);
+ }
+
+ uint32_t bit_count = 0;
+ if (!iree_string_view_atoi_uint32(str_value, &bit_count) ||
+ bit_count > 0xFFu) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "out of range bit count in '%.*s'", (int)value.size,
+ value.data);
+ }
+
+ *out_element_type = iree_hal_make_element_type(numerical_type, bit_count);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_format_element_type(
+ iree_hal_element_type_t element_type, iree_host_size_t buffer_capacity,
+ char* buffer, iree_host_size_t* out_buffer_length) {
+ if (out_buffer_length) {
+ *out_buffer_length = 0;
+ }
+ const char* prefix;
+ switch (iree_hal_element_numerical_type(element_type)) {
+ case IREE_HAL_NUMERICAL_TYPE_INTEGER:
+ prefix = "i";
+ break;
+ case IREE_HAL_NUMERICAL_TYPE_INTEGER_SIGNED:
+ prefix = "si";
+ break;
+ case IREE_HAL_NUMERICAL_TYPE_INTEGER_UNSIGNED:
+ prefix = "ui";
+ break;
+ case IREE_HAL_NUMERICAL_TYPE_FLOAT_IEEE:
+ prefix = "f";
+ break;
+ case IREE_HAL_NUMERICAL_TYPE_FLOAT_BRAIN:
+ prefix = "bf";
+ break;
+ default:
+ prefix = "*";
+ break;
+ }
+ int n = snprintf(buffer, buffer_capacity, "%s%d", prefix,
+ (int32_t)iree_hal_element_bit_count(element_type));
+ if (n < 0) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, "snprintf failed");
+ }
+ if (out_buffer_length) {
+ *out_buffer_length = n;
+ }
+ return n >= buffer_capacity ? iree_status_from_code(IREE_STATUS_OUT_OF_RANGE)
+ : iree_ok_status();
+}
+
+// Parses a string of two character pairs representing hex numbers into bytes.
+static void iree_hal_hex_string_to_bytes(const char* from, uint8_t* to,
+ ptrdiff_t num) {
+ /* clang-format off */
+ static const char kHexValue[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // '0'..'9'
+ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'A'..'F'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'a'..'f'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ /* clang-format on */
+ for (int i = 0; i < num; i++) {
+ to[i] = (kHexValue[from[i * 2] & 0xFF] << 4) +
+ (kHexValue[from[i * 2 + 1] & 0xFF]);
+ }
+}
+
+// Parses a signal element string, assuming that the caller has validated that
+// |out_data| has enough storage space for the parsed element data.
+static iree_status_t iree_hal_parse_element_unsafe(
+ iree_string_view_t data_str, iree_hal_element_type_t element_type,
+ uint8_t* out_data) {
+ switch (element_type) {
+ case IREE_HAL_ELEMENT_TYPE_INT_8:
+ case IREE_HAL_ELEMENT_TYPE_SINT_8: {
+ int32_t temp = 0;
+ if (!iree_string_view_atoi_int32(data_str, &temp) || temp > INT8_MAX) {
+ return iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+ }
+ *(int8_t*)out_data = (int8_t)temp;
+ return iree_ok_status();
+ }
+ case IREE_HAL_ELEMENT_TYPE_UINT_8: {
+ uint32_t temp = 0;
+ if (!iree_string_view_atoi_uint32(data_str, &temp) || temp > UINT8_MAX) {
+ return iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+ }
+ *(uint8_t*)out_data = (uint8_t)temp;
+ return iree_ok_status();
+ }
+ case IREE_HAL_ELEMENT_TYPE_INT_16:
+ case IREE_HAL_ELEMENT_TYPE_SINT_16: {
+ int32_t temp = 0;
+ if (!iree_string_view_atoi_int32(data_str, &temp) || temp > INT16_MAX) {
+ return iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+ }
+ *(int16_t*)out_data = (int16_t)temp;
+ return iree_ok_status();
+ }
+ case IREE_HAL_ELEMENT_TYPE_UINT_16: {
+ uint32_t temp = 0;
+ if (!iree_string_view_atoi_uint32(data_str, &temp) || temp > UINT16_MAX) {
+ return iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+ }
+ *(uint16_t*)out_data = (uint16_t)temp;
+ return iree_ok_status();
+ }
+ case IREE_HAL_ELEMENT_TYPE_INT_32:
+ case IREE_HAL_ELEMENT_TYPE_SINT_32:
+ return iree_string_view_atoi_int32(data_str, (int32_t*)out_data)
+ ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+ case IREE_HAL_ELEMENT_TYPE_UINT_32:
+ return iree_string_view_atoi_uint32(data_str, (uint32_t*)out_data)
+ ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+ case IREE_HAL_ELEMENT_TYPE_INT_64:
+ case IREE_HAL_ELEMENT_TYPE_SINT_64:
+ return iree_string_view_atoi_int64(data_str, (int64_t*)out_data)
+ ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+ case IREE_HAL_ELEMENT_TYPE_UINT_64:
+ return iree_string_view_atoi_uint64(data_str, (uint64_t*)out_data)
+ ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+ case IREE_HAL_ELEMENT_TYPE_FLOAT_16: {
+ float temp = 0;
+ if (!iree_string_view_atof(data_str, &temp)) {
+ return iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+ }
+ *(uint16_t*)out_data = iree_math_f32_to_f16(temp);
+ return iree_ok_status();
+ }
+ case IREE_HAL_ELEMENT_TYPE_FLOAT_32:
+ return iree_string_view_atof(data_str, (float*)out_data)
+ ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+ case IREE_HAL_ELEMENT_TYPE_FLOAT_64:
+ return iree_string_view_atod(data_str, (double*)out_data)
+ ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+ default: {
+ // Treat any unknown format as binary.
+ iree_host_size_t element_size =
+ iree_hal_element_dense_byte_count(element_type);
+ if (data_str.size != element_size * 2) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "binary hex element count mismatch: buffer "
+ "length=%zu < expected=%zu",
+ data_str.size, element_size * 2);
+ }
+ iree_hal_hex_string_to_bytes(data_str.data, out_data, element_size);
+ return iree_ok_status();
+ }
+ }
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_parse_element(
+ iree_string_view_t data_str, iree_hal_element_type_t element_type,
+ iree_byte_span_t data_ptr) {
+ iree_host_size_t element_size =
+ iree_hal_element_dense_byte_count(element_type);
+ if (data_ptr.data_length < element_size) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "output data buffer overflow: data_length=%zu < element_size=%zu",
+ data_ptr.data_length, element_size);
+ }
+ return iree_hal_parse_element_unsafe(data_str, element_type, data_ptr.data);
+}
+
+// Converts a sequence of bytes into hex number strings.
+static void iree_hal_bytes_to_hex_string(const uint8_t* src, char* dest,
+ ptrdiff_t num) {
+ static const char kHexTable[513] =
+ "000102030405060708090A0B0C0D0E0F"
+ "101112131415161718191A1B1C1D1E1F"
+ "202122232425262728292A2B2C2D2E2F"
+ "303132333435363738393A3B3C3D3E3F"
+ "404142434445464748494A4B4C4D4E4F"
+ "505152535455565758595A5B5C5D5E5F"
+ "606162636465666768696A6B6C6D6E6F"
+ "707172737475767778797A7B7C7D7E7F"
+ "808182838485868788898A8B8C8D8E8F"
+ "909192939495969798999A9B9C9D9E9F"
+ "A0A1A2A3A4A5A6A7A8A9AAABACADAEAF"
+ "B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF"
+ "C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF"
+ "D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF"
+ "E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF"
+ "F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF";
+ for (const uint8_t* src_ptr = src; src_ptr != (src + num);
+ ++src_ptr, dest += 2) {
+ const char* hex_p = &kHexTable[*src_ptr * 2];
+ memcpy(dest, hex_p, 2);
+ }
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_format_element(
+ iree_const_byte_span_t data, iree_hal_element_type_t element_type,
+ iree_host_size_t buffer_capacity, char* buffer,
+ iree_host_size_t* out_buffer_length) {
+ iree_host_size_t element_size =
+ iree_hal_element_dense_byte_count(element_type);
+ if (data.data_length < element_size) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "data buffer underflow: data_length=%zu < element_size=%zu",
+ data.data_length, element_size);
+ }
+ int n = 0;
+ switch (element_type) {
+ case IREE_HAL_ELEMENT_TYPE_INT_8:
+ case IREE_HAL_ELEMENT_TYPE_SINT_8:
+ n = snprintf(buffer, buffer ? buffer_capacity : 0, "%" PRIi8,
+ *(const int8_t*)data.data);
+ break;
+ case IREE_HAL_ELEMENT_TYPE_UINT_8:
+ n = snprintf(buffer, buffer ? buffer_capacity : 0, "%" PRIu8,
+ *(const uint8_t*)data.data);
+ break;
+ case IREE_HAL_ELEMENT_TYPE_INT_16:
+ case IREE_HAL_ELEMENT_TYPE_SINT_16:
+ n = snprintf(buffer, buffer ? buffer_capacity : 0, "%" PRIi16,
+ *(const int16_t*)data.data);
+ break;
+ case IREE_HAL_ELEMENT_TYPE_UINT_16:
+ n = snprintf(buffer, buffer ? buffer_capacity : 0, "%" PRIu16,
+ *(const uint16_t*)data.data);
+ break;
+ case IREE_HAL_ELEMENT_TYPE_INT_32:
+ case IREE_HAL_ELEMENT_TYPE_SINT_32:
+ n = snprintf(buffer, buffer ? buffer_capacity : 0, "%" PRIi32,
+ *(const int32_t*)data.data);
+ break;
+ case IREE_HAL_ELEMENT_TYPE_UINT_32:
+ n = snprintf(buffer, buffer ? buffer_capacity : 0, "%" PRIu32,
+ *(const uint32_t*)data.data);
+ break;
+ case IREE_HAL_ELEMENT_TYPE_INT_64:
+ case IREE_HAL_ELEMENT_TYPE_SINT_64:
+ n = snprintf(buffer, buffer ? buffer_capacity : 0, "%" PRIi64,
+ *(const int64_t*)data.data);
+ break;
+ case IREE_HAL_ELEMENT_TYPE_UINT_64:
+ n = snprintf(buffer, buffer ? buffer_capacity : 0, "%" PRIu64,
+ *(const uint64_t*)data.data);
+ break;
+ case IREE_HAL_ELEMENT_TYPE_FLOAT_16:
+ n = snprintf(buffer, buffer ? buffer_capacity : 0, "%G",
+ iree_math_f16_to_f32(*(const uint16_t*)data.data));
+ break;
+ case IREE_HAL_ELEMENT_TYPE_FLOAT_32:
+ n = snprintf(buffer, buffer ? buffer_capacity : 0, "%G",
+ *(const float*)data.data);
+ break;
+ case IREE_HAL_ELEMENT_TYPE_FLOAT_64:
+ n = snprintf(buffer, buffer ? buffer_capacity : 0, "%G",
+ *(const double*)data.data);
+ break;
+ default: {
+ // Treat any unknown format as binary.
+ n = 2 * (int)element_size;
+ if (buffer && buffer_capacity > n) {
+ iree_hal_bytes_to_hex_string(data.data, buffer, element_size);
+ buffer[n] = 0;
+ }
+ }
+ }
+ if (n < 0) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, "snprintf failed");
+ } else if (buffer && n >= buffer_capacity) {
+ buffer = NULL;
+ }
+ if (out_buffer_length) {
+ *out_buffer_length = n;
+ }
+ return buffer ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_OUT_OF_RANGE);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_parse_buffer_elements(
+ iree_string_view_t data_str, iree_hal_element_type_t element_type,
+ iree_byte_span_t data_ptr) {
+ iree_host_size_t element_size =
+ iree_hal_element_dense_byte_count(element_type);
+ iree_host_size_t element_capacity = data_ptr.data_length / element_size;
+ if (iree_string_view_is_empty(data_str)) {
+ memset(data_ptr.data, 0, data_ptr.data_length);
+ return iree_ok_status();
+ }
+ size_t src_i = 0;
+ size_t dst_i = 0;
+ size_t token_start = IREE_STRING_VIEW_NPOS;
+ while (src_i < data_str.size) {
+ char c = data_str.data[src_i++];
+ bool is_separator = isspace(c) || c == ',' || c == '[' || c == ']';
+ if (token_start == IREE_STRING_VIEW_NPOS) {
+ if (!is_separator) {
+ token_start = src_i - 1;
+ }
+ continue;
+ } else if (token_start != IREE_STRING_VIEW_NPOS && !is_separator) {
+ continue;
+ }
+ if (dst_i >= element_capacity) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "output data buffer overflow: element_capacity=%zu < dst_i=%zu+",
+ element_capacity, dst_i);
+ }
+ IREE_RETURN_IF_ERROR(iree_hal_parse_element_unsafe(
+ iree_make_string_view(data_str.data + token_start,
+ src_i - 2 - token_start + 1),
+ element_type, data_ptr.data + dst_i * element_size));
+ ++dst_i;
+ token_start = IREE_STRING_VIEW_NPOS;
+ }
+ if (token_start != IREE_STRING_VIEW_NPOS) {
+ if (dst_i >= element_capacity) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "output data overflow: element_capacity=%zu < dst_i=%zu",
+ element_capacity, dst_i);
+ }
+ IREE_RETURN_IF_ERROR(iree_hal_parse_element_unsafe(
+ iree_make_string_view(data_str.data + token_start,
+ data_str.size - token_start),
+ element_type, data_ptr.data + dst_i * element_size));
+ ++dst_i;
+ }
+ if (dst_i == 1 && element_capacity > 1) {
+ // Splat the single value we got to the entire buffer.
+ uint8_t* p = data_ptr.data + element_size;
+ for (int i = 1; i < element_capacity; ++i, p += element_size) {
+ memcpy(p, data_ptr.data, element_size);
+ }
+ } else if (dst_i < element_capacity) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "input data string underflow: dst_i=%zu < element_capacity=%zu", dst_i,
+ element_capacity);
+ }
+ return iree_ok_status();
+}
+
+#define APPEND_CHAR(c) \
+ { \
+ if (buffer) { \
+ if (buffer_length < buffer_capacity - 1) { \
+ buffer[buffer_length] = c; \
+ buffer[buffer_length + 1] = '\0'; \
+ } else { \
+ buffer = NULL; \
+ } \
+ } \
+ ++buffer_length; \
+ }
+
+static iree_status_t iree_hal_format_buffer_elements_recursive(
+ iree_const_byte_span_t data, const iree_hal_dim_t* shape,
+ iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
+ iree_host_size_t* max_element_count, iree_host_size_t buffer_capacity,
+ char* buffer, iree_host_size_t* out_buffer_length) {
+ iree_host_size_t buffer_length = 0;
+ if (shape_rank == 0) {
+ // Scalar value; recurse to get on to the leaf dimension path.
+ const iree_hal_dim_t one = 1;
+ return iree_hal_format_buffer_elements_recursive(
+ data, &one, 1, element_type, max_element_count, buffer_capacity, buffer,
+ out_buffer_length);
+ } else if (shape_rank > 1) {
+ // Nested dimension; recurse into the next innermost dimension.
+ iree_hal_dim_t dim_length = 1;
+ for (iree_host_size_t i = 1; i < shape_rank; ++i) {
+ dim_length *= shape[i];
+ }
+ iree_device_size_t dim_stride =
+ dim_length * iree_hal_element_dense_byte_count(element_type);
+ if (data.data_length < dim_stride * shape[0]) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "input data underflow: data_length=%zu < expected=%zu",
+ data.data_length, (iree_host_size_t)(dim_stride * shape[0]));
+ }
+ iree_const_byte_span_t subdata;
+ subdata.data = data.data;
+ subdata.data_length = dim_stride;
+ for (iree_hal_dim_t i = 0; i < shape[0]; ++i) {
+ APPEND_CHAR('[');
+ iree_host_size_t actual_length = 0;
+ iree_status_t status = iree_hal_format_buffer_elements_recursive(
+ subdata, shape + 1, shape_rank - 1, element_type, max_element_count,
+ buffer ? buffer_capacity - buffer_length : 0,
+ buffer ? buffer + buffer_length : NULL, &actual_length);
+ buffer_length += actual_length;
+ if (iree_status_is_out_of_range(status)) {
+ buffer = NULL;
+ } else if (!iree_status_is_ok(status)) {
+ return status;
+ }
+ subdata.data += dim_stride;
+ APPEND_CHAR(']');
+ }
+ } else {
+ // Leaf dimension; output data.
+ iree_host_size_t max_count =
+ iree_min(*max_element_count, (iree_host_size_t)shape[0]);
+ iree_device_size_t element_stride =
+ iree_hal_element_dense_byte_count(element_type);
+ if (data.data_length < max_count * element_stride) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "input data underflow; data_length=%zu < expected=%zu",
+ data.data_length, (iree_host_size_t)(max_count * element_stride));
+ }
+ *max_element_count -= max_count;
+ iree_const_byte_span_t subdata;
+ subdata.data = data.data;
+ subdata.data_length = element_stride;
+ for (iree_hal_dim_t i = 0; i < max_count; ++i) {
+ if (i > 0) APPEND_CHAR(' ');
+ iree_host_size_t actual_length = 0;
+ iree_status_t status = iree_hal_format_element(
+ subdata, element_type, buffer ? buffer_capacity - buffer_length : 0,
+ buffer ? buffer + buffer_length : NULL, &actual_length);
+ subdata.data += element_stride;
+ buffer_length += actual_length;
+ if (iree_status_is_out_of_range(status)) {
+ buffer = NULL;
+ } else if (!iree_status_is_ok(status)) {
+ return status;
+ }
+ }
+ if (max_count < shape[0]) {
+ APPEND_CHAR('.');
+ APPEND_CHAR('.');
+ APPEND_CHAR('.');
+ }
+ }
+ if (out_buffer_length) {
+ *out_buffer_length = buffer_length;
+ }
+ return buffer ? iree_ok_status()
+ : iree_status_from_code(IREE_STATUS_OUT_OF_RANGE);
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_format_buffer_elements(
+ iree_const_byte_span_t data, const iree_hal_dim_t* shape,
+ iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
+ iree_host_size_t max_element_count, iree_host_size_t buffer_capacity,
+ char* buffer, iree_host_size_t* out_buffer_length) {
+ if (out_buffer_length) {
+ *out_buffer_length = 0;
+ }
+ if (buffer && buffer_capacity) {
+ buffer[0] = '\0';
+ }
+ return iree_hal_format_buffer_elements_recursive(
+ data, shape, shape_rank, element_type, &max_element_count,
+ buffer_capacity, buffer, out_buffer_length);
+}
diff --git a/runtime/src/iree/hal/string_util.h b/runtime/src/iree/hal/string_util.h
new file mode 100644
index 0000000..3e8b1bf
--- /dev/null
+++ b/runtime/src/iree/hal/string_util.h
@@ -0,0 +1,104 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_STRING_UTIL_H_
+#define IREE_HAL_STRING_UTIL_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/buffer.h"
+#include "iree/hal/buffer_view.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Parses a serialized set of shape dimensions using the canonical shape format
+// (the same as produced by iree_hal_format_shape).
+IREE_API_EXPORT iree_status_t iree_hal_parse_shape(
+ iree_string_view_t value, iree_host_size_t shape_capacity,
+ iree_hal_dim_t* out_shape, iree_host_size_t* out_shape_rank);
+
+// Converts shape dimensions into a `4x5x6` format.
+//
+// Follows the standard API string formatting rules. See iree/base/api.h.
+IREE_API_EXPORT iree_status_t
+iree_hal_format_shape(const iree_hal_dim_t* shape, iree_host_size_t shape_rank,
+ iree_host_size_t buffer_capacity, char* buffer,
+ iree_host_size_t* out_buffer_length);
+
+// Parses a serialized iree_hal_element_type_t and sets |out_element_type| if
+// it is valid. The format is the same as produced by
+// iree_hal_format_element_type.
+IREE_API_EXPORT iree_status_t iree_hal_parse_element_type(
+ iree_string_view_t value, iree_hal_element_type_t* out_element_type);
+
+// Converts an iree_hal_element_type_t enum value to a canonical string
+// representation, like `IREE_HAL_ELEMENT_TYPE_FLOAT_16` to `f16`.
+// |buffer_capacity| defines the size of |buffer| in bytes and
+// |out_buffer_length| will return the string length in characters.
+//
+// Follows the standard API string formatting rules. See iree/base/api.h.
+IREE_API_EXPORT iree_status_t iree_hal_format_element_type(
+ iree_hal_element_type_t element_type, iree_host_size_t buffer_capacity,
+ char* buffer, iree_host_size_t* out_buffer_length);
+
+// Parses a serialized element of |element_type| to its in-memory form.
+// |data_ptr| must be at least large enough to contain the bytes of the element.
+// For example, "1.2" of type IREE_HAL_ELEMENT_TYPE_FLOAT32 will write the 4
+// byte float value of 1.2 to |data_ptr|.
+IREE_API_EXPORT iree_status_t iree_hal_parse_element(
+ iree_string_view_t data_str, iree_hal_element_type_t element_type,
+ iree_byte_span_t data_ptr);
+
+// Converts a single element of |element_type| to a string.
+//
+// |buffer_capacity| defines the size of |buffer| in bytes and
+// |out_buffer_length| will return the string length in characters. Returns
+// IREE_STATUS_OUT_OF_RANGE if the buffer capacity is insufficient to hold the
+// formatted elements and |out_buffer_length| will contain the required size.
+//
+// Follows the standard API string formatting rules. See iree/base/api.h.
+IREE_API_EXPORT iree_status_t iree_hal_format_element(
+ iree_const_byte_span_t data, iree_hal_element_type_t element_type,
+ iree_host_size_t buffer_capacity, char* buffer,
+ iree_host_size_t* out_buffer_length);
+
+// Parses a serialized set of elements of the given |element_type|.
+// The resulting parsed data is written to |data_ptr|, which must be at least
+// large enough to contain the parsed elements. The format is the same as
+// produced by iree_hal_format_buffer_elements. Supports additional inputs of
+// empty to denote a 0 fill and a single element to denote a splat.
+IREE_API_EXPORT iree_status_t iree_hal_parse_buffer_elements(
+ iree_string_view_t data_str, iree_hal_element_type_t element_type,
+ iree_byte_span_t data_ptr);
+
+// Converts a shaped buffer of |element_type| elements to a string.
+// This will include []'s to denote each dimension, for example for a shape of
+// 2x3 the elements will be formatted as `[1 2 3][4 5 6]`.
+//
+// |max_element_count| can be used to limit the total number of elements printed
+// when the count may be large. Elided elements will be replaced with `...`.
+//
+// |buffer_capacity| defines the size of |buffer| in bytes and
+// |out_buffer_length| will return the string length in characters. Returns
+// IREE_STATUS_OUT_OF_RANGE if the buffer capacity is insufficient to hold the
+// formatted elements and |out_buffer_length| will contain the required size.
+//
+// Follows the standard API string formatting rules. See iree/base/api.h.
+IREE_API_EXPORT iree_status_t iree_hal_format_buffer_elements(
+ iree_const_byte_span_t data, const iree_hal_dim_t* shape,
+ iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
+ iree_host_size_t max_element_count, iree_host_size_t buffer_capacity,
+ char* buffer, iree_host_size_t* out_buffer_length);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_STRING_UTIL_H_
diff --git a/runtime/src/iree/hal/string_util_test.cc b/runtime/src/iree/hal/string_util_test.cc
new file mode 100644
index 0000000..453f99c
--- /dev/null
+++ b/runtime/src/iree/hal/string_util_test.cc
@@ -0,0 +1,1049 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/span.h"
+#include "iree/base/status_cc.h"
+#include "iree/hal/api.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace {
+
+using ::iree::testing::status::IsOkAndHolds;
+using ::iree::testing::status::StatusIs;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+
+// TODO(benvanik): move these utils to C++ bindings.
+using Shape = std::vector<iree_hal_dim_t>;
+
+// Parses a serialized set of shape dimensions using the canonical shape format
+// (the same as produced by FormatShape).
+StatusOr<Shape> ParseShape(const std::string& value) {
+ Shape shape(6);
+ iree_host_size_t actual_rank = 0;
+ iree_status_t status;
+ do {
+ status =
+ iree_hal_parse_shape(iree_string_view_t{value.data(), value.size()},
+ shape.size(), shape.data(), &actual_rank);
+ shape.resize(actual_rank);
+ } while (iree_status_is_out_of_range(status));
+ IREE_RETURN_IF_ERROR(std::move(status));
+ return std::move(shape);
+}
+
+// Converts shape dimensions into a `4x5x6` format.
+StatusOr<std::string> FormatShape(iree::span<const iree_hal_dim_t> value) {
+ std::string buffer(16, '\0');
+ iree_host_size_t actual_length = 0;
+ iree_status_t status;
+ do {
+ status =
+ iree_hal_format_shape(value.data(), value.size(), buffer.size() + 1,
+ &buffer[0], &actual_length);
+ buffer.resize(actual_length);
+ } while (iree_status_is_out_of_range(status));
+ IREE_RETURN_IF_ERROR(std::move(status));
+ return std::move(buffer);
+}
+
+// Parses a serialized iree_hal_element_type_t. The format is the same as
+// produced by FormatElementType.
+StatusOr<iree_hal_element_type_t> ParseElementType(const std::string& value) {
+ iree_hal_element_type_t element_type = IREE_HAL_ELEMENT_TYPE_NONE;
+ iree_status_t status = iree_hal_parse_element_type(
+ iree_string_view_t{value.data(), value.size()}, &element_type);
+ IREE_RETURN_IF_ERROR(status, "Failed to parse element type '%.*s'",
+ (int)value.size(), value.data());
+ return element_type;
+}
+
+// Converts an iree_hal_element_type_t enum value to a canonical string
+// representation, like `IREE_HAL_ELEMENT_TYPE_FLOAT_16` to `f16`.
+StatusOr<std::string> FormatElementType(iree_hal_element_type_t value) {
+ std::string buffer(16, '\0');
+ iree_host_size_t actual_length = 0;
+ iree_status_t status;
+ do {
+ status = iree_hal_format_element_type(value, buffer.size() + 1, &buffer[0],
+ &actual_length);
+ buffer.resize(actual_length);
+ } while (iree_status_is_out_of_range(status));
+ IREE_RETURN_IF_ERROR(status);
+ return std::move(buffer);
+}
+
+// Parses a serialized element of |element_type| to its in-memory form.
+// |buffer| be at least large enough to contain the bytes of the element.
+// For example, "1.2" of type IREE_HAL_ELEMENT_TYPE_FLOAT32 will write the 4
+// byte float value of 1.2 to |buffer|.
+template <typename T>
+Status ParseElement(const std::string& value,
+ iree_hal_element_type_t element_type,
+ iree::span<T> buffer) {
+ return iree_hal_parse_element(
+ iree_string_view_t{value.data(), value.size()}, element_type,
+ iree_byte_span_t{reinterpret_cast<uint8_t*>(buffer.data()),
+ buffer.size() * sizeof(T)});
+}
+
+// Converts a single element of |element_type| to a string.
+template <typename T>
+StatusOr<std::string> FormatElement(T value,
+ iree_hal_element_type_t element_type) {
+ std::string result(16, '\0');
+ iree_status_t status;
+ do {
+ iree_host_size_t actual_length = 0;
+ status = iree_hal_format_element(
+ iree_const_byte_span_t{reinterpret_cast<const uint8_t*>(&value),
+ sizeof(T)},
+ element_type, result.size() + 1, &result[0], &actual_length);
+ result.resize(actual_length);
+ } while (iree_status_is_out_of_range(status));
+ IREE_RETURN_IF_ERROR(status, "failed to format buffer element");
+ return std::move(result);
+}
+
+// Parses a serialized set of elements of the given |element_type|.
+// The resulting parsed data is written to |buffer|, which must be at least
+// large enough to contain the parsed elements. The format is the same as
+// produced by FormatBufferElements. Supports additional inputs of
+// empty to denote a 0 fill and a single element to denote a splat.
+template <typename T>
+Status ParseBufferElements(const std::string& value,
+ iree_hal_element_type_t element_type,
+ iree::span<T> buffer) {
+ IREE_RETURN_IF_ERROR(
+ iree_hal_parse_buffer_elements(
+ iree_string_view_t{value.data(), value.size()}, element_type,
+ iree_byte_span_t{reinterpret_cast<uint8_t*>(buffer.data()),
+ buffer.size() * sizeof(T)}),
+ "failed to parse buffer elements '%.*s'",
+ iree_min(256, (int)value.size()), value.data());
+ return OkStatus();
+}
+
+// Converts a shaped buffer of |element_type| elements to a string.
+// This will include []'s to denote each dimension, for example for a shape of
+// 2x3 the elements will be formatted as `[1 2 3][4 5 6]`.
+//
+// |max_element_count| can be used to limit the total number of elements printed
+// when the count may be large. Elided elements will be replaced with `...`.
+template <typename T>
+StatusOr<std::string> FormatBufferElements(iree::span<const T> data,
+ const Shape& shape,
+ iree_hal_element_type_t element_type,
+ size_t max_element_count) {
+ std::string result(255, '\0');
+ iree_status_t status;
+ do {
+ iree_host_size_t actual_length = 0;
+ status = iree_hal_format_buffer_elements(
+ iree_const_byte_span_t{reinterpret_cast<const uint8_t*>(data.data()),
+ data.size() * sizeof(T)},
+ shape.data(), shape.size(), element_type, max_element_count,
+ result.size() + 1, &result[0], &actual_length);
+ result.resize(actual_length);
+ } while (iree_status_is_out_of_range(status));
+ IREE_RETURN_IF_ERROR(std::move(status));
+ return std::move(result);
+}
+
+// Maps a C type (eg float) to the HAL type (eg IREE_HAL_ELEMENT_TYPE_FLOAT32).
+template <typename T>
+struct ElementTypeFromCType;
+
+template <>
+struct ElementTypeFromCType<int8_t> {
+ static constexpr iree_hal_element_type_t value = IREE_HAL_ELEMENT_TYPE_SINT_8;
+};
+template <>
+struct ElementTypeFromCType<uint8_t> {
+ static constexpr iree_hal_element_type_t value = IREE_HAL_ELEMENT_TYPE_UINT_8;
+};
+template <>
+struct ElementTypeFromCType<int16_t> {
+ static constexpr iree_hal_element_type_t value =
+ IREE_HAL_ELEMENT_TYPE_SINT_16;
+};
+template <>
+struct ElementTypeFromCType<uint16_t> {
+ static constexpr iree_hal_element_type_t value =
+ IREE_HAL_ELEMENT_TYPE_UINT_16;
+};
+template <>
+struct ElementTypeFromCType<int32_t> {
+ static constexpr iree_hal_element_type_t value =
+ IREE_HAL_ELEMENT_TYPE_SINT_32;
+};
+template <>
+struct ElementTypeFromCType<uint32_t> {
+ static constexpr iree_hal_element_type_t value =
+ IREE_HAL_ELEMENT_TYPE_UINT_32;
+};
+template <>
+struct ElementTypeFromCType<int64_t> {
+ static constexpr iree_hal_element_type_t value =
+ IREE_HAL_ELEMENT_TYPE_SINT_64;
+};
+template <>
+struct ElementTypeFromCType<uint64_t> {
+ static constexpr iree_hal_element_type_t value =
+ IREE_HAL_ELEMENT_TYPE_UINT_64;
+};
+template <>
+struct ElementTypeFromCType<float> {
+ static constexpr iree_hal_element_type_t value =
+ IREE_HAL_ELEMENT_TYPE_FLOAT_32;
+};
+template <>
+struct ElementTypeFromCType<double> {
+ static constexpr iree_hal_element_type_t value =
+ IREE_HAL_ELEMENT_TYPE_FLOAT_64;
+};
+
+// Parses a serialized element of type T to its in-memory form.
+// For example, "1.2" of type float (IREE_HAL_ELEMENT_TYPE_FLOAT32) will return
+// 1.2f.
+template <typename T>
+inline StatusOr<T> ParseElement(const std::string& value) {
+ T result = T();
+ IREE_RETURN_IF_ERROR(ParseElement(value, ElementTypeFromCType<T>::value,
+ iree::span<T>(&result, 1)));
+ return result;
+}
+
+// Converts a single element of to a string value.
+template <typename T>
+inline StatusOr<std::string> FormatElement(T value) {
+ return FormatElement(value, ElementTypeFromCType<T>::value);
+}
+
+// Parses a serialized set of elements of type T.
+// The resulting parsed data is written to |buffer|, which must be at least
+// large enough to contain the parsed elements. The format is the same as
+// produced by FormatBufferElements. Supports additional inputs of
+// empty to denote a 0 fill and a single element to denote a splat.
+template <typename T>
+inline Status ParseBufferElements(const std::string& value,
+ iree::span<T> buffer) {
+ return ParseBufferElements(value, ElementTypeFromCType<T>::value, buffer);
+}
+
+// Parses a serialized set of elements of type T defined by |shape|.
+// The format is the same as produced by FormatBufferElements. Supports
+// additional inputs of empty to denote a 0 fill and a single element to denote
+// a splat.
+template <typename T>
+inline StatusOr<std::vector<T>> ParseBufferElements(const std::string& value,
+ const Shape& shape) {
+ iree_host_size_t element_count = 1;
+ for (size_t i = 0; i < shape.size(); ++i) {
+ element_count *= shape[i];
+ }
+ std::vector<T> result(element_count);
+ IREE_RETURN_IF_ERROR(ParseBufferElements(value, iree::span<T>(result)));
+ return std::move(result);
+}
+
+// Converts a shaped buffer of |element_type| elements to a string.
+// This will include []'s to denote each dimension, for example for a shape of
+// 2x3 the elements will be formatted as `[1 2 3][4 5 6]`.
+//
+// |max_element_count| can be used to limit the total number of elements printed
+// when the count may be large. Elided elements will be replaced with `...`.
+template <typename T>
+StatusOr<std::string> FormatBufferElements(
+ iree::span<const T> data, const Shape& shape,
+ size_t max_element_count = SIZE_MAX) {
+ return FormatBufferElements(data, shape, ElementTypeFromCType<T>::value,
+ max_element_count);
+}
+
+// C API iree_*_retain/iree_*_release function pointer.
+template <typename T>
+using HandleRefFn = void(IREE_API_PTR*)(T*);
+
+// C++ RAII wrapper for an IREE C reference object.
+// Behaves the same as a thread-safe intrusive pointer.
+template <typename T, HandleRefFn<T> retain_fn, HandleRefFn<T> release_fn>
+class Handle {
+ public:
+ using handle_type = Handle<T, retain_fn, release_fn>;
+
+ static Handle Wrap(T* value) noexcept { return Handle(value, false); }
+
+ Handle() noexcept = default;
+ Handle(std::nullptr_t) noexcept {}
+ Handle(T* value) noexcept : value_(value) { retain_fn(value_); }
+
+ ~Handle() noexcept {
+ if (value_) release_fn(value_);
+ }
+
+ Handle(const Handle& rhs) noexcept : value_(rhs.value_) {
+ if (value_) retain_fn(value_);
+ }
+ Handle& operator=(const Handle& rhs) noexcept {
+ if (value_ != rhs.value_) {
+ if (value_) release_fn(value_);
+ value_ = rhs.get();
+ if (value_) retain_fn(value_);
+ }
+ return *this;
+ }
+
+ Handle(Handle&& rhs) noexcept : value_(rhs.release()) {}
+ Handle& operator=(Handle&& rhs) noexcept {
+ if (value_ != rhs.value_) {
+ if (value_) release_fn(value_);
+ value_ = rhs.release();
+ }
+ return *this;
+ }
+
+ // Gets the pointer referenced by this instance.
+ constexpr T* get() const noexcept { return value_; }
+ constexpr operator T*() const noexcept { return value_; }
+
+ // Resets the object to nullptr and decrements the reference count, possibly
+ // deleting it.
+ void reset() noexcept {
+ if (value_) {
+ release_fn(value_);
+ value_ = nullptr;
+ }
+ }
+
+ // Returns the current pointer held by this object without having its
+ // reference count decremented and resets the handle to empty. Returns
+ // nullptr if the handle holds no value. To re-wrap in a handle use either
+ // ctor(value) or assign().
+ T* release() noexcept {
+ auto* p = value_;
+ value_ = nullptr;
+ return p;
+ }
+
+ // Assigns a pointer.
+ // The pointer will be accepted by the handle and its reference count will
+ // not be incremented.
+ void assign(T* value) noexcept {
+ reset();
+ value_ = value;
+ }
+
+ // Returns a pointer to the inner pointer storage.
+ // This allows passing a pointer to the handle as an output argument to
+ // C-style creation functions.
+ constexpr T** operator&() noexcept { return &value_; }
+
+ // Support boolean expression evaluation ala unique_ptr/shared_ptr:
+ // https://en.cppreference.com/w/cpp/memory/shared_ptr/operator_bool
+ typedef T* Handle::*unspecified_bool_type;
+ constexpr operator unspecified_bool_type() const noexcept {
+ return value_ ? &Handle::value_ : nullptr;
+ }
+
+ // Supports unary expression evaluation.
+ constexpr bool operator!() const noexcept { return !value_; }
+
+ // Swap support.
+ void swap(Handle& rhs) noexcept { std::swap(value_, rhs.value_); }
+
+ protected:
+ Handle(T* value, bool) noexcept : value_(value) {}
+
+ private:
+ T* value_ = nullptr;
+};
+
+// C++ wrapper for iree_hal_allocator_t.
+struct Allocator final
+ : public Handle<iree_hal_allocator_t, iree_hal_allocator_retain,
+ iree_hal_allocator_release> {
+ using handle_type::handle_type;
+
+ // Creates a host-local heap allocator that can be used when buffers are
+ // required that will not interact with a real hardware device (such as those
+ // used in file IO or tests). Buffers allocated with this will not be
+ // compatible with real device allocators and will likely incur a copy if
+ // used.
+ static StatusOr<Allocator> CreateHostLocal() {
+ Allocator allocator;
+ iree_status_t status = iree_hal_allocator_create_heap(
+ iree_make_cstring_view("host_local"), iree_allocator_system(),
+ iree_allocator_system(), &allocator);
+ IREE_RETURN_IF_ERROR(std::move(status));
+ return std::move(allocator);
+ }
+};
+
+// C++ wrapper for iree_hal_buffer_t.
+struct Buffer final : public Handle<iree_hal_buffer_t, iree_hal_buffer_retain,
+ iree_hal_buffer_release> {
+ using handle_type::handle_type;
+
+ // Returns the size in bytes of the buffer.
+ iree_device_size_t byte_length() const noexcept {
+ return iree_hal_buffer_byte_length(get());
+ }
+
+ // Returns a copy of the buffer contents interpreted as the given type in
+ // host-format.
+ template <typename T>
+ StatusOr<std::vector<T>> CloneData() noexcept {
+ iree_device_size_t total_byte_length = byte_length();
+ std::vector<T> result(total_byte_length / sizeof(T));
+ iree_status_t status =
+ iree_hal_buffer_map_read(get(), 0, result.data(), total_byte_length);
+ IREE_RETURN_IF_ERROR(std::move(status));
+ return std::move(result);
+ }
+};
+
+// C++ wrapper for iree_hal_buffer_view_t.
+struct BufferView final
+ : public Handle<iree_hal_buffer_view_t, iree_hal_buffer_view_retain,
+ iree_hal_buffer_view_release> {
+ using handle_type::handle_type;
+
+ // Creates a buffer view with a reference to the given |buffer|.
+ static StatusOr<BufferView> Create(Buffer buffer,
+ iree::span<const iree_hal_dim_t> shape,
+ iree_hal_element_type_t element_type) {
+ iree_hal_encoding_type_t encoding_type =
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR;
+ BufferView buffer_view;
+ iree_status_t status = iree_hal_buffer_view_create(
+ buffer, shape.data(), shape.size(), element_type, encoding_type,
+ iree_allocator_system(), &buffer_view);
+ IREE_RETURN_IF_ERROR(std::move(status));
+ return std::move(buffer_view);
+ }
+
+ // TODO(benvanik): subview.
+
+ // Returns the buffer underlying the buffer view.
+ inline Buffer buffer() const noexcept {
+ return Buffer(iree_hal_buffer_view_buffer(get()));
+ }
+
+ // Returns the dimensions of the shape.
+ Shape shape() const noexcept {
+ iree_status_t status;
+ Shape shape(6);
+ do {
+ iree_host_size_t actual_rank = 0;
+ status = iree_hal_buffer_view_shape(get(), shape.size(), shape.data(),
+ &actual_rank);
+ shape.resize(actual_rank);
+ } while (iree_status_is_out_of_range(status));
+ IREE_CHECK_OK(status);
+ return shape;
+ }
+
+ // Returns the total number of elements stored in the view.
+ inline iree_host_size_t element_count() const noexcept {
+ return iree_hal_buffer_view_element_count(get());
+ }
+
+ // Returns the element type of the buffer.
+ inline iree_hal_element_type_t element_type() const noexcept {
+ return iree_hal_buffer_view_element_type(get());
+ }
+
+ // Returns the total size of the specified view in bytes.
+ // Note that not all buffers are contiguous or densely packed.
+ inline iree_device_size_t byte_length() const noexcept {
+ return iree_hal_buffer_view_byte_length(get());
+ }
+
+ // TODO(benvanik): compute offset/range.
+
+ // Parses a serialized set of buffer elements in the canonical tensor format
+ // (the same as produced by Format).
+ static StatusOr<BufferView> Parse(const std::string& value,
+ Allocator allocator) {
+ BufferView buffer_view;
+ iree_status_t status = iree_hal_buffer_view_parse(
+ iree_string_view_t{value.data(), value.size()}, allocator,
+ &buffer_view);
+ IREE_RETURN_IF_ERROR(std::move(status));
+ return std::move(buffer_view);
+ }
+
+ // Converts buffer view elements into a fully-specified string-form format
+ // like `2x4xi16=[[1 2][3 4]]`.
+ //
+ // |max_element_count| can be used to limit the total number of elements
+ // printed when the count may be large. Elided elements will be replaced with
+ // `...`.
+ StatusOr<std::string> ToString(size_t max_element_count = SIZE_MAX) const {
+ std::string result(255, '\0');
+ iree_status_t status;
+ do {
+ iree_host_size_t actual_length = 0;
+ status = iree_hal_buffer_view_format(get(), max_element_count,
+ result.size() + 1, &result[0],
+ &actual_length);
+ result.resize(actual_length);
+ } while (iree_status_is_out_of_range(status));
+ IREE_RETURN_IF_ERROR(std::move(status));
+ return std::move(result);
+ }
+};
+
+TEST(ShapeStringUtilTest, ParseShape) {
+ EXPECT_THAT(ParseShape(""), IsOkAndHolds(Eq(Shape{})));
+ EXPECT_THAT(ParseShape("0"), IsOkAndHolds(Eq(Shape{0})));
+ EXPECT_THAT(ParseShape("1"), IsOkAndHolds(Eq(Shape{1})));
+ EXPECT_THAT(ParseShape("1x2"), IsOkAndHolds(Eq(Shape{1, 2})));
+ EXPECT_THAT(ParseShape(" 1 x 2 "), IsOkAndHolds(Eq(Shape{1, 2})));
+ EXPECT_THAT(ParseShape("1x2x3x4x5"), IsOkAndHolds(Eq(Shape{1, 2, 3, 4, 5})));
+ EXPECT_THAT(ParseShape("1x2x3x4x5x6x7x8x9"),
+ IsOkAndHolds(Eq(Shape{1, 2, 3, 4, 5, 6, 7, 8, 9})));
+}
+
+TEST(ShapeStringUtilTest, ParseShapeInvalid) {
+ EXPECT_THAT(ParseShape("abc"), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseShape("1xf"), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseShape("1xff23"), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseShape("1xf32"), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseShape("x"), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseShape("x1"), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseShape("1x"), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseShape("x1x2"), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseShape("1xx2"), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseShape("1x2x"), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseShape("0x-1"), StatusIs(StatusCode::kInvalidArgument));
+}
+
+TEST(ShapeStringUtilTest, FormatShape) {
+ EXPECT_THAT(FormatShape(Shape{}), IsOkAndHolds(Eq("")));
+ EXPECT_THAT(FormatShape(Shape{0}), IsOkAndHolds(Eq("0")));
+ EXPECT_THAT(FormatShape(Shape{1}), IsOkAndHolds(Eq("1")));
+ EXPECT_THAT(FormatShape(Shape{1, 2}), IsOkAndHolds(Eq("1x2")));
+ EXPECT_THAT(FormatShape(Shape{1, 2, 3, 4, 5}), IsOkAndHolds(Eq("1x2x3x4x5")));
+ EXPECT_THAT(
+ FormatShape(Shape{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19}),
+ IsOkAndHolds(Eq("1x2x3x4x5x6x7x8x9x10x11x12x13x14x15x16x17x18x19")));
+}
+
+TEST(ElementTypeStringUtilTest, ParseElementType) {
+ EXPECT_THAT(ParseElementType("i8"),
+ IsOkAndHolds(Eq(IREE_HAL_ELEMENT_TYPE_INT_8)));
+ EXPECT_THAT(ParseElementType("si8"),
+ IsOkAndHolds(Eq(IREE_HAL_ELEMENT_TYPE_SINT_8)));
+ EXPECT_THAT(ParseElementType("ui16"),
+ IsOkAndHolds(Eq(IREE_HAL_ELEMENT_TYPE_UINT_16)));
+ EXPECT_THAT(ParseElementType("f32"),
+ IsOkAndHolds(Eq(IREE_HAL_ELEMENT_TYPE_FLOAT_32)));
+ EXPECT_THAT(ParseElementType("f16"),
+ IsOkAndHolds(Eq(IREE_HAL_ELEMENT_TYPE_FLOAT_16)));
+ EXPECT_THAT(ParseElementType("bf16"),
+ IsOkAndHolds(Eq(IREE_HAL_ELEMENT_TYPE_BFLOAT_16)));
+ EXPECT_THAT(ParseElementType("x64"),
+ IsOkAndHolds(Eq(IREE_HAL_ELEMENT_TYPE_OPAQUE_64)));
+ EXPECT_THAT(ParseElementType("*64"),
+ IsOkAndHolds(Eq(IREE_HAL_ELEMENT_TYPE_OPAQUE_64)));
+ EXPECT_THAT(ParseElementType("f4"),
+ IsOkAndHolds(Eq(iree_hal_make_element_type(
+ IREE_HAL_NUMERICAL_TYPE_FLOAT_IEEE, 4))));
+}
+
+TEST(ElementTypeStringUtilTest, ParseElementTypeInvalid) {
+ EXPECT_THAT(ParseElementType(""), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElementType("1"), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElementType("*1234"),
+ StatusIs(StatusCode::kInvalidArgument));
+}
+
+TEST(ElementTypeStringUtilTest, FormatElementType) {
+ EXPECT_THAT(FormatElementType(IREE_HAL_ELEMENT_TYPE_INT_8),
+ IsOkAndHolds(Eq("i8")));
+ EXPECT_THAT(FormatElementType(IREE_HAL_ELEMENT_TYPE_SINT_8),
+ IsOkAndHolds(Eq("si8")));
+ EXPECT_THAT(FormatElementType(IREE_HAL_ELEMENT_TYPE_UINT_16),
+ IsOkAndHolds(Eq("ui16")));
+ EXPECT_THAT(FormatElementType(IREE_HAL_ELEMENT_TYPE_FLOAT_32),
+ IsOkAndHolds(Eq("f32")));
+ EXPECT_THAT(FormatElementType(IREE_HAL_ELEMENT_TYPE_BFLOAT_16),
+ IsOkAndHolds(Eq("bf16")));
+ EXPECT_THAT(FormatElementType(IREE_HAL_ELEMENT_TYPE_OPAQUE_64),
+ IsOkAndHolds(Eq("*64")));
+ EXPECT_THAT(FormatElementType(iree_hal_make_element_type(
+ IREE_HAL_NUMERICAL_TYPE_FLOAT_IEEE, 4)),
+ IsOkAndHolds(Eq("f4")));
+}
+
+TEST(ElementStringUtilTest, ParseElement) {
+ EXPECT_THAT(ParseElement<int8_t>("-128"), IsOkAndHolds(Eq(INT8_MIN)));
+ EXPECT_THAT(ParseElement<int8_t>("127"), IsOkAndHolds(Eq(INT8_MAX)));
+ EXPECT_THAT(ParseElement<uint8_t>("255"), IsOkAndHolds(Eq(UINT8_MAX)));
+ EXPECT_THAT(ParseElement<int16_t>("-32768"), IsOkAndHolds(Eq(INT16_MIN)));
+ EXPECT_THAT(ParseElement<int16_t>("32767"), IsOkAndHolds(Eq(INT16_MAX)));
+ EXPECT_THAT(ParseElement<uint16_t>("65535"), IsOkAndHolds(Eq(UINT16_MAX)));
+ EXPECT_THAT(ParseElement<int32_t>("-2147483648"),
+ IsOkAndHolds(Eq(INT32_MIN)));
+ EXPECT_THAT(ParseElement<int32_t>("2147483647"), IsOkAndHolds(Eq(INT32_MAX)));
+ EXPECT_THAT(ParseElement<uint32_t>("4294967295"),
+ IsOkAndHolds(Eq(UINT32_MAX)));
+ EXPECT_THAT(ParseElement<int64_t>("-9223372036854775808"),
+ IsOkAndHolds(Eq(INT64_MIN)));
+ EXPECT_THAT(ParseElement<int64_t>("9223372036854775807"),
+ IsOkAndHolds(Eq(INT64_MAX)));
+ EXPECT_THAT(ParseElement<uint64_t>("18446744073709551615"),
+ IsOkAndHolds(Eq(UINT64_MAX)));
+ EXPECT_THAT(ParseElement<float>("1.5"), IsOkAndHolds(Eq(1.5f)));
+ EXPECT_THAT(ParseElement<double>("1.567890123456789"),
+ IsOkAndHolds(Eq(1.567890123456789)));
+ EXPECT_THAT(ParseElement<double>("-1.5e-10"), IsOkAndHolds(Eq(-1.5e-10)));
+}
+
+TEST(ElementStringUtilTest, ParseElementOutOfRange) {
+ EXPECT_THAT(ParseElement<int8_t>("255"),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<uint8_t>("-128"),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<int16_t>("65535"),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<uint16_t>("-32768"),
+ StatusIs(StatusCode::kInvalidArgument));
+ // TODO(benvanik): these don't seem to work the same across all stdlib
+ // implementations. The current implementation works with MSVC but fails under
+ // clang. The fact that these failed like they did at all may have just been
+ // an artifact of abseil and I'm not too concerned about matching that
+ // behavior exactly enough to spend any more time on it now.
+ // EXPECT_THAT(ParseElement<int32_t>("4294967295"),
+ // StatusIs(StatusCode::kInvalidArgument));
+ // EXPECT_THAT(ParseElement<uint32_t>("4294967296"),
+ // StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<int32_t>("18446744073709551615"),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<uint32_t>("-9223372036854775808"),
+ StatusIs(StatusCode::kInvalidArgument));
+}
+
+TEST(ElementStringUtilTest, ParseElementInvalid) {
+ EXPECT_THAT(ParseElement<int8_t>(""), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<uint8_t>(""),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<int16_t>(""),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<uint16_t>(""),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<int32_t>(""),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<uint32_t>(""),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<int32_t>(""),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<uint32_t>(""),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<float>(""), StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<double>(""), StatusIs(StatusCode::kInvalidArgument));
+
+ EXPECT_THAT(ParseElement<int8_t>("asdfasdf"),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<uint8_t>("asdfasdf"),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<int16_t>("asdfasdf"),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<uint16_t>("asdfasdf"),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<int32_t>("asdfasdf"),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<uint32_t>("asdfasdf"),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<int32_t>("asdfasdf"),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<uint32_t>("asdfasdf"),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<float>("asdfasdf"),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement<double>("asdfasdf"),
+ StatusIs(StatusCode::kInvalidArgument));
+
+ EXPECT_THAT(ParseElement<int8_t>("🌮"),
+ StatusIs(StatusCode::kInvalidArgument));
+}
+
+TEST(ElementStringUtilTest, ParseOpaqueElement) {
+ std::vector<uint8_t> buffer1(1);
+ IREE_EXPECT_OK(ParseElement("FF", IREE_HAL_ELEMENT_TYPE_OPAQUE_8,
+ iree::span<uint8_t>(buffer1)));
+ EXPECT_THAT(buffer1, Eq(std::vector<uint8_t>{0xFF}));
+
+ std::vector<uint16_t> buffer2(1);
+ IREE_EXPECT_OK(ParseElement("FFCD", IREE_HAL_ELEMENT_TYPE_OPAQUE_16,
+ iree::span<uint16_t>(buffer2)));
+ EXPECT_THAT(buffer2, Eq(std::vector<uint16_t>{0xCDFFu}));
+
+ std::vector<uint32_t> buffer4(1);
+ IREE_EXPECT_OK(ParseElement("FFCDAABB", IREE_HAL_ELEMENT_TYPE_OPAQUE_32,
+ iree::span<uint32_t>(buffer4)));
+ EXPECT_THAT(buffer4, Eq(std::vector<uint32_t>{0xBBAACDFFu}));
+
+ std::vector<uint64_t> buffer8(1);
+ IREE_EXPECT_OK(ParseElement("FFCDAABBCCDDEEFF",
+ IREE_HAL_ELEMENT_TYPE_OPAQUE_64,
+ iree::span<uint64_t>(buffer8)));
+ EXPECT_THAT(buffer8, Eq(std::vector<uint64_t>{0xFFEEDDCCBBAACDFFull}));
+}
+
+TEST(ElementStringUtilTest, ParseOpaqueElementInvalid) {
+ std::vector<uint8_t> buffer0(0);
+ EXPECT_THAT(ParseElement("", IREE_HAL_ELEMENT_TYPE_OPAQUE_8,
+ iree::span<uint8_t>(buffer0)),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement("FF", IREE_HAL_ELEMENT_TYPE_OPAQUE_8,
+ iree::span<uint8_t>(buffer0)),
+ StatusIs(StatusCode::kInvalidArgument));
+
+ std::vector<uint8_t> buffer1(1);
+ EXPECT_THAT(ParseElement("", IREE_HAL_ELEMENT_TYPE_OPAQUE_8,
+ iree::span<uint8_t>(buffer1)),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement("F", IREE_HAL_ELEMENT_TYPE_OPAQUE_8,
+ iree::span<uint8_t>(buffer1)),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseElement("FFC", IREE_HAL_ELEMENT_TYPE_OPAQUE_8,
+ iree::span<uint8_t>(buffer1)),
+ StatusIs(StatusCode::kInvalidArgument));
+}
+
+TEST(ElementStringUtilTest, FormatElement) {
+ EXPECT_THAT(FormatElement<int8_t>(INT8_MIN), IsOkAndHolds(Eq("-128")));
+ EXPECT_THAT(FormatElement<int8_t>(INT8_MAX), IsOkAndHolds(Eq("127")));
+ EXPECT_THAT(FormatElement<uint8_t>(UINT8_MAX), IsOkAndHolds(Eq("255")));
+ EXPECT_THAT(FormatElement<int16_t>(INT16_MIN), IsOkAndHolds(Eq("-32768")));
+ EXPECT_THAT(FormatElement<int16_t>(INT16_MAX), IsOkAndHolds(Eq("32767")));
+ EXPECT_THAT(FormatElement<uint16_t>(UINT16_MAX), IsOkAndHolds(Eq("65535")));
+ EXPECT_THAT(FormatElement<int32_t>(INT32_MIN),
+ IsOkAndHolds(Eq("-2147483648")));
+ EXPECT_THAT(FormatElement<int32_t>(INT32_MAX),
+ IsOkAndHolds(Eq("2147483647")));
+ EXPECT_THAT(FormatElement<uint32_t>(UINT32_MAX),
+ IsOkAndHolds(Eq("4294967295")));
+ EXPECT_THAT(FormatElement<int64_t>(INT64_MIN),
+ IsOkAndHolds(Eq("-9223372036854775808")));
+ EXPECT_THAT(FormatElement<int64_t>(INT64_MAX),
+ IsOkAndHolds(Eq("9223372036854775807")));
+ EXPECT_THAT(FormatElement<uint64_t>(UINT64_MAX),
+ IsOkAndHolds(Eq("18446744073709551615")));
+ EXPECT_THAT(FormatElement<float>(1.5f), IsOkAndHolds(Eq("1.5")));
+ EXPECT_THAT(FormatElement<double>(1123.56789456789),
+ IsOkAndHolds(Eq("1123.57")));
+ EXPECT_THAT(FormatElement<double>(-1.5e-10), IsOkAndHolds(Eq("-1.5E-10")));
+}
+
+TEST(ElementStringUtilTest, FormatOpaqueElement) {
+ EXPECT_THAT(FormatElement<uint8_t>(129, IREE_HAL_ELEMENT_TYPE_OPAQUE_8),
+ IsOkAndHolds(Eq("81")));
+ EXPECT_THAT(FormatElement<int16_t>(-12345, IREE_HAL_ELEMENT_TYPE_OPAQUE_16),
+ IsOkAndHolds(Eq("C7CF")));
+ EXPECT_THAT(FormatElement<int32_t>(0, IREE_HAL_ELEMENT_TYPE_OPAQUE_32),
+ IsOkAndHolds(Eq("00000000")));
+ EXPECT_THAT(FormatElement<uint64_t>(0x8899AABBCCDDEEFFull,
+ IREE_HAL_ELEMENT_TYPE_OPAQUE_64),
+ IsOkAndHolds(Eq("FFEEDDCCBBAA9988")));
+}
+
+TEST(BufferElementsStringUtilTest, ParseBufferElements) {
+ // Empty:
+ std::vector<int8_t> buffer0(0);
+ IREE_EXPECT_OK(ParseBufferElements<int8_t>("", iree::span<int8_t>(buffer0)));
+ EXPECT_THAT(buffer0, Eq(std::vector<int8_t>{}));
+ std::vector<int8_t> buffer8(8, 123);
+ IREE_EXPECT_OK(ParseBufferElements<int8_t>("", iree::span<int8_t>(buffer8)));
+ EXPECT_THAT(buffer8, Eq(std::vector<int8_t>{0, 0, 0, 0, 0, 0, 0, 0}));
+ // Scalar:
+ std::vector<int8_t> buffer1(1);
+ IREE_EXPECT_OK(ParseBufferElements<int8_t>("1", iree::span<int8_t>(buffer1)));
+ EXPECT_THAT(buffer1, Eq(std::vector<int8_t>{1}));
+ // Splat:
+ IREE_EXPECT_OK(ParseBufferElements<int8_t>("3", iree::span<int8_t>(buffer8)));
+ EXPECT_THAT(buffer8, Eq(std::vector<int8_t>{3, 3, 3, 3, 3, 3, 3, 3}));
+ // 1:1:
+ IREE_EXPECT_OK(ParseBufferElements<int8_t>("2", iree::span<int8_t>(buffer1)));
+ EXPECT_THAT(buffer1, Eq(std::vector<int8_t>{2}));
+ std::vector<int16_t> buffer8i16(8);
+ IREE_EXPECT_OK(ParseBufferElements<int16_t>("0 1 2 3 4 5 6 7",
+ iree::span<int16_t>(buffer8i16)));
+ EXPECT_THAT(buffer8i16, Eq(std::vector<int16_t>{0, 1, 2, 3, 4, 5, 6, 7}));
+ std::vector<int32_t> buffer8i32(8);
+ IREE_EXPECT_OK(ParseBufferElements<int32_t>("[0 1 2 3] [4 5 6 7]",
+ iree::span<int32_t>(buffer8i32)));
+ EXPECT_THAT(buffer8i32, Eq(std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7}));
+}
+
+TEST(BufferElementsStringUtilTest, ParseBufferElementsOpaque) {
+ std::vector<uint16_t> buffer3i16(3);
+ IREE_EXPECT_OK(ParseBufferElements("0011 2233 4455",
+ IREE_HAL_ELEMENT_TYPE_OPAQUE_16,
+ iree::span<uint16_t>(buffer3i16)));
+ EXPECT_THAT(buffer3i16, Eq(std::vector<uint16_t>{0x1100, 0x3322, 0x5544}));
+}
+
+TEST(BufferElementsStringUtilTest, ParseBufferElementsInvalid) {
+ std::vector<int8_t> buffer0(0);
+ EXPECT_THAT(ParseBufferElements("abc", iree::span<int8_t>(buffer0)),
+ StatusIs(StatusCode::kOutOfRange));
+ std::vector<int8_t> buffer1(1);
+ EXPECT_THAT(ParseBufferElements("abc", iree::span<int8_t>(buffer1)),
+ StatusIs(StatusCode::kInvalidArgument));
+ std::vector<int8_t> buffer8(8);
+ EXPECT_THAT(ParseBufferElements("1 2 3", iree::span<int8_t>(buffer8)),
+ StatusIs(StatusCode::kOutOfRange));
+ std::vector<int8_t> buffer4(4);
+ EXPECT_THAT(ParseBufferElements("1 2 3 4 5", iree::span<int8_t>(buffer4)),
+ StatusIs(StatusCode::kOutOfRange));
+}
+
+TEST(BufferElementsStringUtilTest, ParseBufferElementsShaped) {
+ // Empty:
+ EXPECT_THAT(ParseBufferElements<int8_t>("", Shape{2, 4}),
+ IsOkAndHolds(Eq(std::vector<int8_t>{0, 0, 0, 0, 0, 0, 0, 0})));
+ // Scalar:
+ EXPECT_THAT(ParseBufferElements<int8_t>("", Shape{}),
+ IsOkAndHolds(Eq(std::vector<int8_t>{0})));
+ EXPECT_THAT(ParseBufferElements<int8_t>("1", Shape{}),
+ IsOkAndHolds(Eq(std::vector<int8_t>{1})));
+ // Splat:
+ EXPECT_THAT(ParseBufferElements<int8_t>("3", Shape{2, 4}),
+ IsOkAndHolds(Eq(std::vector<int8_t>{3, 3, 3, 3, 3, 3, 3, 3})));
+ // 1:1:
+ EXPECT_THAT(ParseBufferElements<int8_t>("2", Shape{1}),
+ IsOkAndHolds(Eq(std::vector<int8_t>{2})));
+ EXPECT_THAT(ParseBufferElements<int16_t>("0 1 2 3 4 5 6 7", Shape{2, 4}),
+ IsOkAndHolds(Eq(std::vector<int16_t>{0, 1, 2, 3, 4, 5, 6, 7})));
+ EXPECT_THAT(ParseBufferElements<int32_t>("[0 1 2 3] [4 5 6 7]", Shape{2, 4}),
+ IsOkAndHolds(Eq(std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7})));
+}
+
+TEST(BufferElementsStringUtilTest, ParseBufferElementsShapedInvalid) {
+ EXPECT_THAT(ParseBufferElements<int8_t>("abc", Shape{}),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(ParseBufferElements<int8_t>("1 2 3", Shape{2, 4}),
+ StatusIs(StatusCode::kOutOfRange));
+ EXPECT_THAT(ParseBufferElements<int8_t>("1 2 3 4 5", Shape{2, 2}),
+ StatusIs(StatusCode::kOutOfRange));
+}
+
+TEST(BufferElementsStringUtilTest, FormatBufferElements) {
+ EXPECT_THAT(FormatBufferElements<int8_t>({1}, Shape{}), IsOkAndHolds("1"));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1}, Shape{1}), IsOkAndHolds("1"));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{4}),
+ IsOkAndHolds("1 2 3 4"));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{2, 2}),
+ IsOkAndHolds("[1 2][3 4]"));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{4, 1}),
+ IsOkAndHolds("[1][2][3][4]"));
+ EXPECT_THAT(
+ FormatBufferElements<int32_t>(std::vector<int32_t>(300, -99),
+ Shape{100, 3}),
+ IsOkAndHolds(
+ "[-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99]"));
+}
+
+TEST(BufferElementsStringUtilTest, FormatBufferElementsElided) {
+ EXPECT_THAT(FormatBufferElements<int8_t>({1}, Shape{}, 0),
+ IsOkAndHolds("..."));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1}, Shape{}, 1), IsOkAndHolds("1"));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1}, Shape{}, 99123),
+ IsOkAndHolds("1"));
+
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{4}, 0),
+ IsOkAndHolds("..."));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{4}, 1),
+ IsOkAndHolds("1..."));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{4}, 3),
+ IsOkAndHolds("1 2 3..."));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{4}, 99123),
+ IsOkAndHolds("1 2 3 4"));
+
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{2, 2}, 0),
+ IsOkAndHolds("[...][...]"));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{2, 2}, 1),
+ IsOkAndHolds("[1...][...]"));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{2, 2}, 3),
+ IsOkAndHolds("[1 2][3...]"));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{2, 2}, 99123),
+ IsOkAndHolds("[1 2][3 4]"));
+}
+
+TEST(BufferViewStringUtilTest, Parse) {
+ IREE_ASSERT_OK_AND_ASSIGN(auto allocator, Allocator::CreateHostLocal());
+
+ // Zero fill.
+ IREE_ASSERT_OK_AND_ASSIGN(auto bv0, BufferView::Parse("i8", allocator));
+ EXPECT_THAT(bv0.buffer().CloneData<int8_t>(),
+ IsOkAndHolds(Eq(std::vector<int8_t>{0})));
+
+ // Zero fill (empty value).
+ IREE_ASSERT_OK_AND_ASSIGN(auto bv1, BufferView::Parse("2x2xi8=", allocator));
+ EXPECT_THAT(bv1.buffer().CloneData<int8_t>(),
+ IsOkAndHolds(Eq(std::vector<int8_t>{0, 0, 0, 0})));
+
+ // Splat.
+ IREE_ASSERT_OK_AND_ASSIGN(auto bv2, BufferView::Parse("2x2xi8=3", allocator));
+ EXPECT_THAT(bv2.buffer().CloneData<int8_t>(),
+ IsOkAndHolds(Eq(std::vector<int8_t>{3, 3, 3, 3})));
+
+ // Flat list.
+ IREE_ASSERT_OK_AND_ASSIGN(auto bv3,
+ BufferView::Parse("2x2xi8=1 2 3 4", allocator));
+ EXPECT_THAT(bv3.buffer().CloneData<int8_t>(),
+ IsOkAndHolds(Eq(std::vector<int8_t>{1, 2, 3, 4})));
+
+ // Whitespace and separators shouldn't matter.
+ IREE_ASSERT_OK_AND_ASSIGN(
+ auto bv4, BufferView::Parse(" 2x2xi8 = 1,\n2 3\t,4", allocator));
+ EXPECT_THAT(bv4.buffer().CloneData<int8_t>(),
+ IsOkAndHolds(Eq(std::vector<int8_t>{1, 2, 3, 4})));
+
+ // Brackets are optional.
+ IREE_ASSERT_OK_AND_ASSIGN(
+ auto bv5, BufferView::Parse("4xi16=[[0][1][2]][3]", allocator));
+ EXPECT_THAT(bv5.buffer().CloneData<int16_t>(),
+ IsOkAndHolds(Eq(std::vector<int16_t>{0, 1, 2, 3})));
+}
+
+TEST(BufferViewStringUtilTest, ParseInvalid) {
+ IREE_ASSERT_OK_AND_ASSIGN(auto allocator, Allocator::CreateHostLocal());
+
+ // Incomplete.
+ EXPECT_THAT(BufferView::Parse("", allocator),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(BufferView::Parse("asdf", allocator),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(BufferView::Parse("9x8=", allocator),
+ StatusIs(StatusCode::kInvalidArgument));
+ EXPECT_THAT(BufferView::Parse("=4", allocator),
+ StatusIs(StatusCode::kInvalidArgument));
+
+ // Partial data.
+ EXPECT_THAT(BufferView::Parse("2x4xi32=5 3", allocator),
+ StatusIs(StatusCode::kOutOfRange));
+}
+
+TEST(BufferViewStringUtilTest, ToString) {
+ EXPECT_THAT(FormatBufferElements<int8_t>({1}, Shape{}), IsOkAndHolds("1"));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1}, Shape{1}), IsOkAndHolds("1"));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{4}),
+ IsOkAndHolds("1 2 3 4"));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{2, 2}),
+ IsOkAndHolds("[1 2][3 4]"));
+ EXPECT_THAT(FormatBufferElements<int8_t>({1, 2, 3, 4}, Shape{4, 1}),
+ IsOkAndHolds("[1][2][3][4]"));
+ EXPECT_THAT(
+ FormatBufferElements<int32_t>(std::vector<int32_t>(300, -99),
+ Shape{100, 3}),
+ IsOkAndHolds(
+ "[-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99]"));
+}
+
+TEST(BufferViewStringUtilTest, RoundTrip) {
+ IREE_ASSERT_OK_AND_ASSIGN(auto allocator, Allocator::CreateHostLocal());
+ auto expect_round_trip = [&](std::string source_value) {
+ IREE_ASSERT_OK_AND_ASSIGN(auto buffer_view,
+ BufferView::Parse(source_value, allocator));
+ EXPECT_THAT(buffer_view.ToString(), IsOkAndHolds(source_value));
+ };
+
+ expect_round_trip("i8=-8");
+ expect_round_trip("si8=-8");
+ expect_round_trip("ui8=239");
+ expect_round_trip("4xi8=0 -1 2 3");
+ expect_round_trip("4xsi8=0 -1 2 3");
+ expect_round_trip("4xi16=0 -1 2 3");
+ expect_round_trip("4xui16=0 1 2 3");
+ expect_round_trip("2x2xi32=[0 1][2 3]");
+ expect_round_trip("4xf16=0 0.5 2 3");
+ expect_round_trip("4xf32=0 1.1 2 3");
+ expect_round_trip("4xf64=0 1.1 2 3");
+ expect_round_trip("1x2x3xi8=[[0 1 2][3 4 5]]");
+ expect_round_trip("2x*16=AABB CCDD");
+ expect_round_trip(
+ "100x3xi16=[-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 "
+ "-99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99][-99 -99 -99]");
+}
+
+} // namespace
+} // namespace hal
+} // namespace iree
diff --git a/runtime/src/iree/hal/utils/BUILD b/runtime/src/iree/hal/utils/BUILD
new file mode 100644
index 0000000..e01552a
--- /dev/null
+++ b/runtime/src/iree/hal/utils/BUILD
@@ -0,0 +1,77 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/bazel:cc_binary_benchmark.bzl", "cc_binary_benchmark")
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library", "iree_runtime_cc_test")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_runtime_cc_library(
+ name = "buffer_transfer",
+ srcs = ["buffer_transfer.c"],
+ hdrs = ["buffer_transfer.h"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/hal",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "deferred_command_buffer",
+ srcs = ["deferred_command_buffer.c"],
+ hdrs = ["deferred_command_buffer.h"],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":resource_set",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal:arena",
+ "//runtime/src/iree/hal",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "resource_set",
+ srcs = ["resource_set.c"],
+ hdrs = ["resource_set.h"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal:arena",
+ "//runtime/src/iree/hal",
+ ],
+)
+
+cc_binary_benchmark(
+ name = "resource_set_benchmark",
+ srcs = ["resource_set_benchmark.c"],
+ deps = [
+ ":resource_set",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base/internal:prng",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/testing:benchmark",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "resource_set_test",
+ srcs = ["resource_set_test.cc"],
+ deps = [
+ ":resource_set",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
diff --git a/runtime/src/iree/hal/utils/CMakeLists.txt b/runtime/src/iree/hal/utils/CMakeLists.txt
new file mode 100644
index 0000000..1f589f5
--- /dev/null
+++ b/runtime/src/iree/hal/utils/CMakeLists.txt
@@ -0,0 +1,85 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/utils/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ buffer_transfer
+ HDRS
+ "buffer_transfer.h"
+ SRCS
+ "buffer_transfer.c"
+ DEPS
+ iree::base
+ iree::base::tracing
+ iree::hal
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ deferred_command_buffer
+ HDRS
+ "deferred_command_buffer.h"
+ SRCS
+ "deferred_command_buffer.c"
+ DEPS
+ ::resource_set
+ iree::base
+ iree::base::internal::arena
+ iree::base::tracing
+ iree::hal
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ resource_set
+ HDRS
+ "resource_set.h"
+ SRCS
+ "resource_set.c"
+ DEPS
+ iree::base
+ iree::base::internal::arena
+ iree::base::tracing
+ iree::hal
+ PUBLIC
+)
+
+iree_cc_binary_benchmark(
+ NAME
+ resource_set_benchmark
+ SRCS
+ "resource_set_benchmark.c"
+ DEPS
+ ::resource_set
+ iree::base
+ iree::base::internal::prng
+ iree::hal
+ iree::testing::benchmark
+ TESTONLY
+)
+
+iree_cc_test(
+ NAME
+ resource_set_test
+ SRCS
+ "resource_set_test.cc"
+ DEPS
+ ::resource_set
+ iree::base
+ iree::hal
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/hal/utils/buffer_transfer.c b/runtime/src/iree/hal/utils/buffer_transfer.c
new file mode 100644
index 0000000..553d049
--- /dev/null
+++ b/runtime/src/iree/hal/utils/buffer_transfer.c
@@ -0,0 +1,364 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/utils/buffer_transfer.h"
+
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// iree_hal_device_transfer_range implementations
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_hal_device_submit_transfer_range_and_wait(
+ iree_hal_device_t* device, iree_hal_transfer_buffer_t source,
+ iree_device_size_t source_offset, iree_hal_transfer_buffer_t target,
+ iree_device_size_t target_offset, iree_device_size_t data_length,
+ iree_hal_transfer_buffer_flags_t flags, iree_timeout_t timeout) {
+ // If the source and target are both mappable into host memory (or are host
+ // memory) then we can use the fast zero-alloc path. This may actually be
+ // slower than doing a device queue transfer depending on the size of the data
+ // and where the memory lives. For example, if we have two device buffers in
+ // device-local host-visible memory we'd be performing the transfer by pulling
+ // all the memory to the CPU and pushing it back again.
+ // TODO(benvanik): check for device-local -> device-local and avoid mapping.
+ bool is_source_mappable =
+ !source.device_buffer ||
+ (iree_all_bits_set(iree_hal_buffer_memory_type(source.device_buffer),
+ IREE_HAL_MEMORY_TYPE_HOST_VISIBLE) &&
+ iree_all_bits_set(iree_hal_buffer_allowed_usage(source.device_buffer),
+ IREE_HAL_BUFFER_USAGE_MAPPING));
+ bool is_target_mappable =
+ !target.device_buffer ||
+ (iree_all_bits_set(iree_hal_buffer_memory_type(target.device_buffer),
+ IREE_HAL_MEMORY_TYPE_HOST_VISIBLE) &&
+ iree_all_bits_set(iree_hal_buffer_allowed_usage(target.device_buffer),
+ IREE_HAL_BUFFER_USAGE_MAPPING));
+ if (is_source_mappable && is_target_mappable) {
+ return iree_hal_device_transfer_mappable_range(
+ device, source, source_offset, target, target_offset, data_length,
+ flags, timeout);
+ }
+
+ // If the source is a host buffer under 64KB then we can do a more efficient
+ // (though still relatively costly) update instead of needing a staging
+ // buffer.
+ if (!source.device_buffer && target.device_buffer &&
+ data_length <= IREE_HAL_COMMAND_BUFFER_MAX_UPDATE_SIZE) {
+ const iree_hal_transfer_command_t transfer_command = {
+ .type = IREE_HAL_TRANSFER_COMMAND_TYPE_UPDATE,
+ .update =
+ {
+ .source_buffer = source.host_buffer.data,
+ .source_offset = source_offset,
+ .target_buffer = target.device_buffer,
+ .target_offset = target_offset,
+ .length = data_length,
+ },
+ };
+ return iree_hal_device_transfer_and_wait(device, /*wait_semaphore=*/NULL,
+ /*wait_value=*/0ull, 1,
+ &transfer_command, timeout);
+ }
+
+ iree_status_t status = iree_ok_status();
+
+ // Allocate the staging buffer for upload to the device.
+ iree_hal_buffer_t* source_buffer = source.device_buffer;
+ if (!source_buffer) {
+ // Allocate staging memory with a copy of the host data. We only initialize
+ // the portion being transferred.
+ // TODO(benvanik): use import if supported to avoid the allocation/copy.
+ // TODO(benvanik): make this device-local + host-visible? can be better for
+ // uploads as we know we are never going to read it back.
+ const iree_hal_buffer_params_t source_params = {
+ .type = IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
+ IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
+ .usage = IREE_HAL_BUFFER_USAGE_TRANSFER | IREE_HAL_BUFFER_USAGE_MAPPING,
+ };
+ status = iree_hal_allocator_allocate_buffer(
+ iree_hal_device_allocator(device), source_params, data_length,
+ iree_make_const_byte_span(source.host_buffer.data + source_offset,
+ data_length),
+ &source_buffer);
+ source_offset = 0;
+ }
+
+ // Allocate the staging buffer for download from the device.
+ iree_hal_buffer_t* target_buffer = target.device_buffer;
+ if (!target_buffer) {
+ // Allocate uninitialized staging memory for the transfer target.
+ // We only allocate enough for the portion we are transfering.
+ // TODO(benvanik): use import if supported to avoid the allocation/copy.
+ const iree_hal_buffer_params_t target_params = {
+ .type = IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
+ IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
+ .usage = IREE_HAL_BUFFER_USAGE_TRANSFER | IREE_HAL_BUFFER_USAGE_MAPPING,
+ };
+ status = iree_hal_allocator_allocate_buffer(
+ iree_hal_device_allocator(device), target_params, data_length,
+ iree_const_byte_span_empty(), &target_buffer);
+ target_offset = 0;
+ }
+
+ // Issue synchronous device copy.
+ if (iree_status_is_ok(status)) {
+ const iree_hal_transfer_command_t transfer_command = {
+ .type = IREE_HAL_TRANSFER_COMMAND_TYPE_COPY,
+ .copy =
+ {
+ .source_buffer = source_buffer,
+ .source_offset = source_offset,
+ .target_buffer = target_buffer,
+ .target_offset = target_offset,
+ .length = data_length,
+ },
+ };
+ status = iree_hal_device_transfer_and_wait(device, /*wait_semaphore=*/NULL,
+ /*wait_value=*/0ull, 1,
+ &transfer_command, timeout);
+ }
+
+ // Read back the staging buffer into memory, if needed.
+ if (iree_status_is_ok(status) && !target.device_buffer) {
+ status = iree_hal_buffer_map_read(target_buffer, 0, target.host_buffer.data,
+ data_length);
+ }
+
+ // Discard staging buffers, if they were required.
+ if (!source.device_buffer) iree_hal_buffer_release(source_buffer);
+ if (!target.device_buffer) iree_hal_buffer_release(target_buffer);
+
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_device_transfer_mappable_range(
+ iree_hal_device_t* device, iree_hal_transfer_buffer_t source,
+ iree_device_size_t source_offset, iree_hal_transfer_buffer_t target,
+ iree_device_size_t target_offset, iree_device_size_t data_length,
+ iree_hal_transfer_buffer_flags_t flags, iree_timeout_t timeout) {
+ iree_status_t status = iree_ok_status();
+
+ iree_hal_buffer_mapping_t source_mapping = {{0}};
+ if (iree_status_is_ok(status)) {
+ if (source.device_buffer) {
+ status = iree_hal_buffer_map_range(
+ source.device_buffer, IREE_HAL_MAPPING_MODE_SCOPED,
+ IREE_HAL_MEMORY_ACCESS_READ, source_offset, data_length,
+ &source_mapping);
+ } else {
+ source_mapping = (iree_hal_buffer_mapping_t){
+ .contents = source.host_buffer,
+ };
+ }
+ }
+
+ iree_hal_buffer_mapping_t target_mapping = {{0}};
+ if (iree_status_is_ok(status)) {
+ if (target.device_buffer) {
+ status = iree_hal_buffer_map_range(
+ target.device_buffer, IREE_HAL_MAPPING_MODE_SCOPED,
+ IREE_HAL_MEMORY_ACCESS_DISCARD_WRITE, target_offset, data_length,
+ &target_mapping);
+ } else {
+ target_mapping = (iree_hal_buffer_mapping_t){
+ .contents = target.host_buffer,
+ };
+ }
+ }
+
+ iree_device_size_t adjusted_data_length = 0;
+ if (iree_status_is_ok(status)) {
+ // Adjust the data length based on the min we have.
+ if (data_length == IREE_WHOLE_BUFFER) {
+ // Whole buffer copy requested - that could mean either, so take the min.
+ adjusted_data_length = iree_min(source_mapping.contents.data_length,
+ target_mapping.contents.data_length);
+ } else {
+ // Specific length requested - validate that we have matching lengths.
+ IREE_ASSERT_EQ(source_mapping.contents.data_length,
+ target_mapping.contents.data_length);
+ adjusted_data_length = target_mapping.contents.data_length;
+ }
+
+ // Perform the copy, assuming there's anything to do.
+ if (adjusted_data_length != 0) {
+ memcpy(target_mapping.contents.data, source_mapping.contents.data,
+ adjusted_data_length);
+ }
+ }
+
+ if (source.device_buffer) {
+ status =
+ iree_status_join(status, iree_hal_buffer_unmap_range(&source_mapping));
+ }
+ if (target.device_buffer) {
+ if (adjusted_data_length > 0 &&
+ !iree_all_bits_set(iree_hal_buffer_memory_type(target.device_buffer),
+ IREE_HAL_MEMORY_TYPE_HOST_COHERENT)) {
+ status = iree_status_join(
+ status, iree_hal_buffer_flush_range(&target_mapping, 0,
+ adjusted_data_length));
+ }
+ status =
+ iree_status_join(status, iree_hal_buffer_unmap_range(&target_mapping));
+ }
+ return status;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_buffer_map_range implementations
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_emulated_buffer_mapping_t {
+ iree_hal_buffer_t* host_local_buffer;
+ iree_hal_buffer_mapping_t host_local_mapping;
+} iree_hal_emulated_buffer_mapping_t;
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_emulated_map_range(
+ iree_hal_device_t* device, iree_hal_buffer_t* buffer,
+ iree_hal_mapping_mode_t mapping_mode,
+ iree_hal_memory_access_t memory_access,
+ iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
+ iree_hal_buffer_mapping_t* mapping) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(buffer);
+ IREE_ASSERT_ARGUMENT(mapping);
+
+ iree_hal_allocator_t* device_allocator = iree_hal_device_allocator(device);
+ iree_allocator_t host_allocator = iree_hal_device_host_allocator(device);
+
+ // We can't perform persistent mapping with this as we need to manage the
+ // staging buffer lifetime.
+ if (IREE_UNLIKELY(mapping_mode == IREE_HAL_MAPPING_MODE_PERSISTENT)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "emulated buffer mapping only possible with scoped mappings");
+ }
+
+ // No implementation should be using this emulated method with memory that is
+ // allocated as mappable.
+ if (IREE_UNLIKELY(iree_all_bits_set(iree_hal_buffer_memory_type(buffer),
+ IREE_HAL_BUFFER_USAGE_MAPPING))) {
+ return iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "emulated buffer mapping should not be used with mappable buffers");
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, (uint64_t)local_byte_length);
+
+ // NOTE: this is assuming that the host is going to be doing a lot of work
+ // on the mapped memory and wants read/write caching and such. If the user
+ // wants write combining on device memory and other things they should ensure
+ // this emulated mapping path is not hit.
+
+ // Create a transient struct we use to track the emulated operation.
+ // We could pack this into the mapping but this composes better - it's small
+ // and pooled by the host allocator anyway.
+ iree_hal_emulated_buffer_mapping_t* emulation_state = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(host_allocator, sizeof(*emulation_state),
+ (void**)&emulation_state));
+
+ // Allocate the buffer we'll be using to stage our copy of the device memory.
+ // All devices should be able to satisfy this host-local + mapping request.
+ iree_status_t status = iree_hal_allocator_allocate_buffer(
+ device_allocator,
+ (iree_hal_buffer_params_t){
+ .type = IREE_HAL_MEMORY_TYPE_HOST_LOCAL,
+ .usage =
+ IREE_HAL_BUFFER_USAGE_TRANSFER | IREE_HAL_BUFFER_USAGE_MAPPING,
+ },
+ local_byte_length, iree_const_byte_span_empty(),
+ &emulation_state->host_local_buffer);
+
+ // We need to capture a copy of the device buffer to work with; unless the
+ // user was nice and said they don't care about the contents with the DISCARD
+ // bit. Ideally we'd also enable invalidate_range to specify subranges we want
+ // to map.
+ if (iree_status_is_ok(status) &&
+ !iree_all_bits_set(memory_access, IREE_HAL_MEMORY_ACCESS_DISCARD)) {
+ // Download (device->host) the data.
+ status = iree_hal_device_transfer_range(
+ device, iree_hal_make_device_transfer_buffer(mapping->buffer),
+ local_byte_offset,
+ iree_hal_make_device_transfer_buffer(
+ emulation_state->host_local_buffer),
+ 0, local_byte_length, IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT,
+ iree_infinite_timeout());
+ }
+
+ if (iree_status_is_ok(status)) {
+ // Map the scratch buffer: map-ception.
+ // Code-wise it looks like this may loop back onto this emulated path
+ // but no implementation should be using this emulation if they have host
+ // local IREE_HAL_BUFFER_USAGE_MAPPING memory - and we check that above.
+ status = iree_hal_buffer_map_range(emulation_state->host_local_buffer,
+ IREE_HAL_MAPPING_MODE_SCOPED,
+ memory_access, 0, local_byte_length,
+ &emulation_state->host_local_mapping);
+ }
+
+ // Retain the scratch buffer for the duration of the mapping.
+ if (iree_status_is_ok(status)) {
+ // Note that we are giving back the host-local mapped contents to the user -
+ // they don't need to know it's from our staging buffer.
+ mapping->contents = emulation_state->host_local_mapping.contents;
+ mapping->impl.reserved[0] = (uint64_t)((uintptr_t)emulation_state);
+ } else {
+ status = iree_status_join(
+ status,
+ iree_hal_buffer_unmap_range(&emulation_state->host_local_mapping));
+ iree_hal_buffer_release(emulation_state->host_local_buffer);
+ iree_allocator_free(host_allocator, emulation_state);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_emulated_unmap_range(
+ iree_hal_device_t* device, iree_hal_buffer_t* buffer,
+ iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
+ iree_hal_buffer_mapping_t* mapping) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(buffer);
+ IREE_ASSERT_ARGUMENT(mapping);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, (uint64_t)local_byte_length);
+ iree_hal_emulated_buffer_mapping_t* emulation_state =
+ (iree_hal_emulated_buffer_mapping_t*)((uintptr_t)
+ mapping->impl.reserved[0]);
+ IREE_ASSERT_NE(emulation_state, NULL);
+
+ // Unmap the scratch buffer first to make it available for copying (if
+ // needed).
+ iree_status_t status =
+ iree_hal_buffer_unmap_range(&emulation_state->host_local_mapping);
+
+ // If we were writing then we'll need to flush the range.
+ // Ideally we'd keep track of this on the mapping itself based on the user's
+ // calls to flush_range to limit how much we need to transfer.
+ if (iree_status_is_ok(status) &&
+ iree_all_bits_set(mapping->impl.allowed_access,
+ IREE_HAL_MEMORY_ACCESS_WRITE)) {
+ // Upload (host->device) the data.
+ status = iree_hal_device_transfer_range(
+ device,
+ iree_hal_make_device_transfer_buffer(
+ emulation_state->host_local_buffer),
+ 0, iree_hal_make_device_transfer_buffer(mapping->buffer),
+ local_byte_offset, local_byte_length,
+ IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout());
+ }
+
+ // Deallocate the scratch buffer and our emulation state.
+ iree_hal_buffer_release(emulation_state->host_local_buffer);
+ iree_allocator_t host_allocator = iree_hal_device_host_allocator(device);
+ iree_allocator_free(host_allocator, emulation_state);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/hal/utils/buffer_transfer.h b/runtime/src/iree/hal/utils/buffer_transfer.h
new file mode 100644
index 0000000..2daac0a
--- /dev/null
+++ b/runtime/src/iree/hal/utils/buffer_transfer.h
@@ -0,0 +1,69 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_UTILS_BUFFER_TRANSFER_H_
+#define IREE_HAL_UTILS_BUFFER_TRANSFER_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_hal_device_transfer_range implementations
+//===----------------------------------------------------------------------===//
+
+// Performs a full transfer operation on a device transfer queue.
+// This creates a transfer command buffer, submits it against the device, and
+// waits for it to complete synchronously. Implementations that can do this
+// cheaper are encouraged to do so.
+//
+// Precondition: source and target do not overlap.
+IREE_API_EXPORT iree_status_t iree_hal_device_submit_transfer_range_and_wait(
+ iree_hal_device_t* device, iree_hal_transfer_buffer_t source,
+ iree_device_size_t source_offset, iree_hal_transfer_buffer_t target,
+ iree_device_size_t target_offset, iree_device_size_t data_length,
+ iree_hal_transfer_buffer_flags_t flags, iree_timeout_t timeout);
+
+// Generic implementation of iree_hal_device_transfer_range for when the buffers
+// are mappable. In certain implementations even if buffers are mappable it's
+// often cheaper to still use the full queue transfers: instead of wasting CPU
+// cycles copying the memory (and possible PCIe round-trips) letting the device
+// do it is effectively free.
+//
+// Precondition: source and target do not overlap.
+IREE_API_EXPORT iree_status_t iree_hal_device_transfer_mappable_range(
+ iree_hal_device_t* device, iree_hal_transfer_buffer_t source,
+ iree_device_size_t source_offset, iree_hal_transfer_buffer_t target,
+ iree_device_size_t target_offset, iree_device_size_t data_length,
+ iree_hal_transfer_buffer_flags_t flags, iree_timeout_t timeout);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_buffer_map_range implementations
+//===----------------------------------------------------------------------===//
+
+// Generic implementation of iree_hal_buffer_map_range and unmap_range for when
+// the buffer is not mappable and a full device transfer is required. This will
+// allocate additional host-local buffers and submit copy commands.
+// Implementations able to do this more efficiently should do so.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_emulated_map_range(
+ iree_hal_device_t* device, iree_hal_buffer_t* buffer,
+ iree_hal_mapping_mode_t mapping_mode,
+ iree_hal_memory_access_t memory_access,
+ iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
+ iree_hal_buffer_mapping_t* mapping);
+IREE_API_EXPORT iree_status_t iree_hal_buffer_emulated_unmap_range(
+ iree_hal_device_t* device, iree_hal_buffer_t* buffer,
+ iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
+ iree_hal_buffer_mapping_t* mapping);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_UTILS_BUFFER_TRANSFER_H_
diff --git a/runtime/src/iree/hal/utils/deferred_command_buffer.c b/runtime/src/iree/hal/utils/deferred_command_buffer.c
new file mode 100644
index 0000000..347a222
--- /dev/null
+++ b/runtime/src/iree/hal/utils/deferred_command_buffer.c
@@ -0,0 +1,883 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/utils/deferred_command_buffer.h"
+
+#include "iree/base/internal/arena.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/utils/resource_set.h"
+
+//===----------------------------------------------------------------------===//
+// Command recording structures
+//===----------------------------------------------------------------------===//
+
+typedef enum iree_hal_command_type_e {
+ IREE_HAL_CMD_EXECUTION_BARRIER = 0,
+ IREE_HAL_CMD_SIGNAL_EVENT,
+ IREE_HAL_CMD_RESET_EVENT,
+ IREE_HAL_CMD_WAIT_EVENTS,
+ IREE_HAL_CMD_DISCARD_BUFFER,
+ IREE_HAL_CMD_FILL_BUFFER,
+ IREE_HAL_CMD_UPDATE_BUFFER,
+ IREE_HAL_CMD_COPY_BUFFER,
+ IREE_HAL_CMD_PUSH_CONSTANTS,
+ IREE_HAL_CMD_PUSH_DESCRIPTOR_SET,
+ IREE_HAL_CMD_BIND_DESCRIPTOR_SET,
+ IREE_HAL_CMD_DISPATCH,
+ IREE_HAL_CMD_DISPATCH_INDIRECT,
+} iree_hal_cmd_type_t;
+
+// Header prefixed to all commands, forming a linked-list.
+//
+// Each command is allocated from the arena and does *not* retain any resources.
+// We could elide some of these commands by keeping local state however that
+// requires knowing more about the target device (executable layouts, etc) and
+// prevents using this as a way to debug or benchmark command buffers. The
+// intent is that each command captures the exact information passed during the
+// call such that the target command buffer cannot tell they were deferred.
+//
+// As each command is variable sized we store pointers to the following command
+// to allow us to walk the list during replay. Storing just a size would be
+// insufficient as commands may be spread across many arena blocks from the
+// block pool.
+typedef struct iree_hal_cmd_header_t {
+ // Next command in the list or NULL if the end.
+ struct iree_hal_cmd_header_t* next;
+ // Type of the command that follows.
+ iree_hal_cmd_type_t type;
+} iree_hal_cmd_header_t;
+
+typedef iree_status_t (*iree_hal_cmd_apply_fn_t)(
+ iree_hal_command_buffer_t* target_command_buffer,
+ iree_hal_cmd_header_t* cmd_header);
+
+//===----------------------------------------------------------------------===//
+// Command list allocation and storage
+//===----------------------------------------------------------------------===//
+
+// A singly-linked list of commands allocated from an arena.
+typedef struct iree_hal_cmd_list_t {
+ // Arena used to hold the recorded commands using block_pool for storage.
+ // Will be reset as the command buffer is re-recorded.
+ iree_arena_allocator_t arena;
+
+ // Head of the command list.
+ iree_hal_cmd_header_t* head;
+ // Tail of the command list (may be head).
+ iree_hal_cmd_header_t* tail;
+} iree_hal_cmd_list_t;
+
+// Initializes a new command list that allocates from the given |block_pool|.
+// Upon return the command list is ready for recording.
+static void iree_hal_cmd_list_initialize(iree_arena_block_pool_t* block_pool,
+ iree_hal_cmd_list_t* out_cmd_list) {
+ iree_arena_initialize(block_pool, &out_cmd_list->arena);
+ out_cmd_list->head = NULL;
+ out_cmd_list->tail = NULL;
+}
+
+// Resets the command list and returns all arena blocks back to the block pool.
+// Upon return the command list is ready for recording.
+static void iree_hal_cmd_list_reset(iree_hal_cmd_list_t* cmd_list) {
+ // We could make reset retain a single block so as we know that we'll be
+ // adding more commands on this path and it would remove a round-trip through
+ // the pool.
+ iree_arena_reset(&cmd_list->arena);
+ cmd_list->head = NULL;
+ cmd_list->tail = NULL;
+}
+
+// Deinitializes the command list, preparing for destruction.
+static void iree_hal_cmd_list_deinitialize(iree_hal_cmd_list_t* cmd_list) {
+ iree_hal_cmd_list_reset(cmd_list);
+}
+
+// Appends a new command to the command list and returns the base pointer to its
+// storage. Callers must cast to the appropriate type and populate all fields.
+static iree_status_t iree_hal_cmd_list_append_command(
+ iree_hal_cmd_list_t* cmd_list, iree_hal_cmd_type_t command_type,
+ iree_host_size_t command_size, void** out_cmd) {
+ iree_hal_cmd_header_t* header = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_arena_allocate(&cmd_list->arena, command_size, (void**)&header));
+ header->next = NULL;
+ header->type = command_type;
+ if (!cmd_list->head) {
+ cmd_list->head = header;
+ } else if (cmd_list->tail) {
+ cmd_list->tail->next = header;
+ }
+ cmd_list->tail = header;
+ *out_cmd = header;
+ return iree_ok_status();
+}
+
+// Clones a source buffer and returns the pointer into the arena.
+static iree_status_t iree_hal_cmd_list_clone_data(iree_hal_cmd_list_t* cmd_list,
+ const void* source_data,
+ iree_host_size_t data_length,
+ void** out_target_data) {
+ void* target_data = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_arena_allocate(&cmd_list->arena, data_length, &target_data));
+ memcpy(target_data, source_data, data_length);
+ *out_target_data = target_data;
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_deferred_command_buffer_t implementation
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_deferred_command_buffer_t {
+ iree_hal_command_buffer_t base;
+ iree_allocator_t host_allocator;
+
+ // Maintains a reference to all resources used within the command buffer.
+ // Reset on each begin.
+ iree_hal_resource_set_t* resource_set;
+
+ // All commands in encoding order.
+ iree_hal_cmd_list_t cmd_list;
+} iree_hal_deferred_command_buffer_t;
+
+static const iree_hal_command_buffer_vtable_t
+ iree_hal_deferred_command_buffer_vtable;
+
+static iree_hal_deferred_command_buffer_t*
+iree_hal_deferred_command_buffer_cast(iree_hal_command_buffer_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_deferred_command_buffer_vtable);
+ return (iree_hal_deferred_command_buffer_t*)base_value;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_deferred_command_buffer_create(
+ iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator,
+ iree_hal_command_buffer_t** out_command_buffer) {
+ IREE_ASSERT_ARGUMENT(block_pool);
+ IREE_ASSERT_ARGUMENT(out_command_buffer);
+ *out_command_buffer = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_deferred_command_buffer_t* command_buffer = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ host_allocator, sizeof(*command_buffer), (void**)&command_buffer);
+ if (iree_status_is_ok(status)) {
+ iree_hal_command_buffer_initialize(
+ device, mode, command_categories, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &iree_hal_deferred_command_buffer_vtable, &command_buffer->base);
+ command_buffer->host_allocator = host_allocator;
+ iree_hal_cmd_list_initialize(block_pool, &command_buffer->cmd_list);
+
+ status = iree_hal_resource_set_allocate(block_pool,
+ &command_buffer->resource_set);
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_command_buffer = &command_buffer->base;
+ } else {
+ iree_hal_command_buffer_destroy(&command_buffer->base);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_deferred_command_buffer_destroy(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_allocator_t host_allocator = command_buffer->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_cmd_list_deinitialize(&command_buffer->cmd_list);
+ iree_hal_resource_set_free(command_buffer->resource_set);
+ iree_allocator_free(host_allocator, command_buffer);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static void* iree_hal_deferred_command_buffer_dyn_cast(
+ iree_hal_command_buffer_t* command_buffer, const void* vtable) {
+ if (vtable == &iree_hal_deferred_command_buffer_vtable) {
+ IREE_HAL_ASSERT_TYPE(command_buffer, vtable);
+ return command_buffer;
+ }
+ return NULL;
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_begin(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_hal_cmd_list_reset(&command_buffer->cmd_list);
+ iree_hal_resource_set_reset(command_buffer->resource_set);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_end(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_CMD_EXECUTION_BARRIER
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_execution_barrier_t {
+ iree_hal_cmd_header_t header;
+ iree_hal_execution_stage_t source_stage_mask;
+ iree_hal_execution_stage_t target_stage_mask;
+ iree_hal_execution_barrier_flags_t flags;
+ iree_host_size_t memory_barrier_count;
+ const iree_hal_memory_barrier_t* memory_barriers;
+ iree_host_size_t buffer_barrier_count;
+ const iree_hal_buffer_barrier_t* buffer_barriers;
+} iree_hal_cmd_execution_barrier_t;
+
+static iree_status_t iree_hal_deferred_command_buffer_execution_barrier(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_hal_execution_barrier_flags_t flags,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ iree_hal_cmd_list_t* cmd_list =
+ &iree_hal_deferred_command_buffer_cast(base_command_buffer)->cmd_list;
+ iree_hal_cmd_execution_barrier_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
+ cmd_list, IREE_HAL_CMD_EXECUTION_BARRIER, sizeof(*cmd), (void**)&cmd));
+ cmd->source_stage_mask = source_stage_mask;
+ cmd->target_stage_mask = target_stage_mask;
+ cmd->flags = flags;
+ cmd->memory_barrier_count = memory_barrier_count;
+ cmd->memory_barriers = NULL;
+ cmd->buffer_barrier_count = buffer_barrier_count;
+ cmd->buffer_barriers = NULL;
+ if (memory_barrier_count > 0) {
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_clone_data(
+ cmd_list, memory_barriers,
+ sizeof(memory_barriers[0]) * memory_barrier_count,
+ (void**)&cmd->memory_barriers));
+ }
+ if (buffer_barrier_count > 0) {
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_clone_data(
+ cmd_list, buffer_barriers,
+ sizeof(buffer_barriers[0]) * buffer_barrier_count,
+ (void**)&cmd->buffer_barriers));
+ }
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_apply_execution_barrier(
+ iree_hal_command_buffer_t* target_command_buffer,
+ const iree_hal_cmd_execution_barrier_t* cmd) {
+ return iree_hal_command_buffer_execution_barrier(
+ target_command_buffer, cmd->source_stage_mask, cmd->target_stage_mask,
+ cmd->flags, cmd->memory_barrier_count, cmd->memory_barriers,
+ cmd->buffer_barrier_count, cmd->buffer_barriers);
+}
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_CMD_SIGNAL_EVENT
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_signal_event_t {
+ iree_hal_cmd_header_t header;
+ iree_hal_event_t* event;
+ iree_hal_execution_stage_t source_stage_mask;
+} iree_hal_cmd_signal_event_t;
+
+static iree_status_t iree_hal_deferred_command_buffer_signal_event(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_resource_set_insert(command_buffer->resource_set, 1, &event));
+ iree_hal_cmd_signal_event_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
+ cmd_list, IREE_HAL_CMD_SIGNAL_EVENT, sizeof(*cmd), (void**)&cmd));
+ cmd->event = event;
+ cmd->source_stage_mask = source_stage_mask;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_apply_signal_event(
+ iree_hal_command_buffer_t* target_command_buffer,
+ const iree_hal_cmd_signal_event_t* cmd) {
+ return iree_hal_command_buffer_signal_event(target_command_buffer, cmd->event,
+ cmd->source_stage_mask);
+}
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_CMD_RESET_EVENT
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_reset_event_t {
+ iree_hal_cmd_header_t header;
+ iree_hal_event_t* event;
+ iree_hal_execution_stage_t source_stage_mask;
+} iree_hal_cmd_reset_event_t;
+
+static iree_status_t iree_hal_deferred_command_buffer_reset_event(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_resource_set_insert(command_buffer->resource_set, 1, &event));
+ iree_hal_cmd_reset_event_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
+ cmd_list, IREE_HAL_CMD_RESET_EVENT, sizeof(*cmd), (void**)&cmd));
+ cmd->event = event;
+ cmd->source_stage_mask = source_stage_mask;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_apply_reset_event(
+ iree_hal_command_buffer_t* target_command_buffer,
+ const iree_hal_cmd_reset_event_t* cmd) {
+ return iree_hal_command_buffer_reset_event(target_command_buffer, cmd->event,
+ cmd->source_stage_mask);
+}
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_CMD_WAIT_EVENTS
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_wait_events_t {
+ iree_hal_cmd_header_t header;
+ iree_host_size_t event_count;
+ iree_hal_execution_stage_t source_stage_mask;
+ iree_hal_execution_stage_t target_stage_mask;
+ iree_host_size_t memory_barrier_count;
+ const iree_hal_memory_barrier_t* memory_barriers;
+ iree_host_size_t buffer_barrier_count;
+ const iree_hal_buffer_barrier_t* buffer_barriers;
+ iree_hal_event_t* events[];
+} iree_hal_cmd_wait_events_t;
+
+static iree_status_t iree_hal_deferred_command_buffer_wait_events(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_host_size_t event_count, const iree_hal_event_t** events,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, event_count, events));
+ iree_hal_cmd_wait_events_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
+ cmd_list, IREE_HAL_CMD_WAIT_EVENTS,
+ sizeof(*cmd) + sizeof(cmd->events[0]) * event_count, (void**)&cmd));
+ cmd->event_count = event_count;
+ cmd->source_stage_mask = source_stage_mask;
+ cmd->target_stage_mask = target_stage_mask;
+ cmd->memory_barrier_count = memory_barrier_count;
+ cmd->memory_barriers = NULL;
+ cmd->buffer_barrier_count = buffer_barrier_count;
+ cmd->buffer_barriers = NULL;
+ memcpy(cmd->events, events, sizeof(cmd->events[0]) * event_count);
+ if (memory_barrier_count > 0) {
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_clone_data(
+ cmd_list, memory_barriers,
+ sizeof(memory_barriers[0]) * memory_barrier_count,
+ (void**)&cmd->memory_barriers));
+ }
+ if (buffer_barrier_count > 0) {
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_clone_data(
+ cmd_list, buffer_barriers,
+ sizeof(buffer_barriers[0]) * buffer_barrier_count,
+ (void**)&cmd->buffer_barriers));
+ }
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_apply_wait_events(
+ iree_hal_command_buffer_t* target_command_buffer,
+ const iree_hal_cmd_wait_events_t* cmd) {
+ return iree_hal_command_buffer_wait_events(
+ target_command_buffer, cmd->event_count,
+ (const iree_hal_event_t**)cmd->events, cmd->source_stage_mask,
+ cmd->target_stage_mask, cmd->memory_barrier_count, cmd->memory_barriers,
+ cmd->buffer_barrier_count, cmd->buffer_barriers);
+}
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_CMD_DISCARD_BUFFER
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_discard_buffer_t {
+ iree_hal_cmd_header_t header;
+ iree_hal_buffer_t* buffer;
+} iree_hal_cmd_discard_buffer_t;
+
+static iree_status_t iree_hal_deferred_command_buffer_discard_buffer(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_buffer_t* buffer) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_resource_set_insert(command_buffer->resource_set, 1, &buffer));
+ iree_hal_cmd_discard_buffer_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
+ cmd_list, IREE_HAL_CMD_DISCARD_BUFFER, sizeof(*cmd), (void**)&cmd));
+ cmd->buffer = buffer;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_apply_discard_buffer(
+ iree_hal_command_buffer_t* target_command_buffer,
+ const iree_hal_cmd_discard_buffer_t* cmd) {
+ return iree_hal_command_buffer_discard_buffer(target_command_buffer,
+ cmd->buffer);
+}
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_CMD_FILL_BUFFER
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_fill_buffer_t {
+ iree_hal_cmd_header_t header;
+ iree_hal_buffer_t* target_buffer;
+ iree_device_size_t target_offset;
+ iree_device_size_t length;
+ uint64_t pattern;
+ iree_host_size_t pattern_length;
+} iree_hal_cmd_fill_buffer_t;
+
+static iree_status_t iree_hal_deferred_command_buffer_fill_buffer(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length, const void* pattern,
+ iree_host_size_t pattern_length) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
+ iree_hal_cmd_fill_buffer_t* cmd = NULL;
+ if (pattern_length > sizeof(cmd->pattern)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "fill patterns must be < 8 bytes");
+ }
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &target_buffer));
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
+ cmd_list, IREE_HAL_CMD_FILL_BUFFER, sizeof(*cmd), (void**)&cmd));
+ cmd->target_buffer = target_buffer;
+ cmd->target_offset = target_offset;
+ cmd->length = length;
+ memcpy(&cmd->pattern, pattern, pattern_length);
+ cmd->pattern_length = pattern_length;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_apply_fill_buffer(
+ iree_hal_command_buffer_t* target_command_buffer,
+ const iree_hal_cmd_fill_buffer_t* cmd) {
+ return iree_hal_command_buffer_fill_buffer(
+ target_command_buffer, cmd->target_buffer, cmd->target_offset,
+ cmd->length, (void**)&cmd->pattern, cmd->pattern_length);
+}
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_CMD_UPDATE_BUFFER
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_update_buffer_t {
+ iree_hal_cmd_header_t header;
+ iree_hal_buffer_t* target_buffer;
+ iree_device_size_t target_offset;
+ iree_device_size_t length;
+ uint8_t source_buffer[];
+} iree_hal_cmd_update_buffer_t;
+
+static iree_status_t iree_hal_deferred_command_buffer_update_buffer(
+ iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
+ iree_host_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &target_buffer));
+ iree_hal_cmd_update_buffer_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
+ cmd_list, IREE_HAL_CMD_UPDATE_BUFFER,
+ sizeof(*cmd) + sizeof(cmd->source_buffer[0]) * length, (void**)&cmd));
+ cmd->target_buffer = target_buffer;
+ cmd->target_offset = target_offset;
+ cmd->length = length;
+ memcpy(cmd->source_buffer, (const uint8_t*)source_buffer + source_offset,
+ sizeof(cmd->source_buffer[0]) * length);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_apply_update_buffer(
+ iree_hal_command_buffer_t* target_command_buffer,
+ const iree_hal_cmd_update_buffer_t* cmd) {
+ return iree_hal_command_buffer_update_buffer(
+ target_command_buffer, cmd->source_buffer, 0, cmd->target_buffer,
+ cmd->target_offset, cmd->length);
+}
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_CMD_COPY_BUFFER
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_copy_buffer_t {
+ iree_hal_cmd_header_t header;
+ iree_hal_buffer_t* source_buffer;
+ iree_device_size_t source_offset;
+ iree_hal_buffer_t* target_buffer;
+ iree_device_size_t target_offset;
+ iree_device_size_t length;
+} iree_hal_cmd_copy_buffer_t;
+
+static iree_status_t iree_hal_deferred_command_buffer_copy_buffer(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
+ const void* buffers[2] = {source_buffer, target_buffer};
+ IREE_RETURN_IF_ERROR(
+ iree_hal_resource_set_insert(command_buffer->resource_set, 2, buffers));
+ iree_hal_cmd_copy_buffer_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
+ cmd_list, IREE_HAL_CMD_COPY_BUFFER, sizeof(*cmd), (void**)&cmd));
+ cmd->source_buffer = source_buffer;
+ cmd->source_offset = source_offset;
+ cmd->target_buffer = target_buffer;
+ cmd->target_offset = target_offset;
+ cmd->length = length;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_apply_copy_buffer(
+ iree_hal_command_buffer_t* target_command_buffer,
+ const iree_hal_cmd_copy_buffer_t* cmd) {
+ return iree_hal_command_buffer_copy_buffer(
+ target_command_buffer, cmd->source_buffer, cmd->source_offset,
+ cmd->target_buffer, cmd->target_offset, cmd->length);
+}
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_CMD_PUSH_CONSTANTS
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_push_constants_t {
+ iree_hal_cmd_header_t header;
+ iree_hal_executable_layout_t* executable_layout;
+ iree_host_size_t offset;
+ iree_host_size_t values_length;
+ uint8_t values[];
+} iree_hal_cmd_push_constants_t;
+
+static iree_status_t iree_hal_deferred_command_buffer_push_constants(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, iree_host_size_t offset,
+ const void* values, iree_host_size_t values_length) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &executable_layout));
+ iree_hal_cmd_push_constants_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
+ cmd_list, IREE_HAL_CMD_PUSH_CONSTANTS,
+ sizeof(*cmd) + sizeof(cmd->values[0]) * values_length, (void**)&cmd));
+ cmd->executable_layout = executable_layout;
+ cmd->offset = offset;
+ cmd->values_length = values_length;
+ memcpy(cmd->values, values, sizeof(cmd->values[0]) * values_length);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_apply_push_constants(
+ iree_hal_command_buffer_t* target_command_buffer,
+ const iree_hal_cmd_push_constants_t* cmd) {
+ return iree_hal_command_buffer_push_constants(
+ target_command_buffer, cmd->executable_layout, cmd->offset, cmd->values,
+ cmd->values_length);
+}
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_CMD_PUSH_DESCRIPTOR_SET
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_push_descriptor_set_t {
+ iree_hal_cmd_header_t header;
+ iree_hal_executable_layout_t* executable_layout;
+ uint32_t set;
+ iree_host_size_t binding_count;
+ iree_hal_descriptor_set_binding_t bindings[];
+} iree_hal_cmd_push_descriptor_set_t;
+
+static iree_status_t iree_hal_deferred_command_buffer_push_descriptor_set(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &executable_layout));
+ for (iree_host_size_t i = 0; i < binding_count; ++i) {
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &bindings[i].buffer));
+ }
+ iree_hal_cmd_push_descriptor_set_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
+ cmd_list, IREE_HAL_CMD_PUSH_DESCRIPTOR_SET,
+ sizeof(*cmd) + sizeof(cmd->bindings[0]) * binding_count, (void**)&cmd));
+ cmd->executable_layout = executable_layout;
+ cmd->set = set;
+ cmd->binding_count = binding_count;
+ memcpy(cmd->bindings, bindings, sizeof(cmd->bindings[0]) * binding_count);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_apply_push_descriptor_set(
+ iree_hal_command_buffer_t* target_command_buffer,
+ const iree_hal_cmd_push_descriptor_set_t* cmd) {
+ return iree_hal_command_buffer_push_descriptor_set(
+ target_command_buffer, cmd->executable_layout, cmd->set,
+ cmd->binding_count, cmd->bindings);
+}
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_CMD_BIND_DESCRIPTOR_SET
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_bind_descriptor_set_t {
+ iree_hal_cmd_header_t header;
+ iree_hal_executable_layout_t* executable_layout;
+ uint32_t set;
+ iree_hal_descriptor_set_t* descriptor_set;
+ iree_host_size_t dynamic_offset_count;
+ iree_device_size_t dynamic_offsets[];
+} iree_hal_cmd_bind_descriptor_set_t;
+
+static iree_status_t iree_hal_deferred_command_buffer_bind_descriptor_set(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_hal_descriptor_set_t* descriptor_set,
+ iree_host_size_t dynamic_offset_count,
+ const iree_device_size_t* dynamic_offsets) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
+ const void* resources[2] = {executable_layout, descriptor_set};
+ IREE_RETURN_IF_ERROR(
+ iree_hal_resource_set_insert(command_buffer->resource_set, 2, resources));
+ iree_hal_cmd_bind_descriptor_set_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
+ cmd_list, IREE_HAL_CMD_BIND_DESCRIPTOR_SET,
+ sizeof(*cmd) + sizeof(cmd->dynamic_offsets[0]) * dynamic_offset_count,
+ (void**)&cmd));
+ cmd->executable_layout = executable_layout;
+ cmd->set = set;
+ cmd->descriptor_set = descriptor_set;
+ cmd->dynamic_offset_count = dynamic_offset_count;
+ memcpy(cmd->dynamic_offsets, dynamic_offsets,
+ sizeof(cmd->dynamic_offsets[0]) * dynamic_offset_count);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_apply_bind_descriptor_set(
+ iree_hal_command_buffer_t* target_command_buffer,
+ const iree_hal_cmd_bind_descriptor_set_t* cmd) {
+ return iree_hal_command_buffer_bind_descriptor_set(
+ target_command_buffer, cmd->executable_layout, cmd->set,
+ cmd->descriptor_set, cmd->dynamic_offset_count, cmd->dynamic_offsets);
+}
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_CMD_DISPATCH
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_dispatch_t {
+ iree_hal_cmd_header_t header;
+ iree_hal_executable_t* executable;
+ int32_t entry_point;
+ uint32_t workgroup_x;
+ uint32_t workgroup_y;
+ uint32_t workgroup_z;
+} iree_hal_cmd_dispatch_t;
+
+static iree_status_t iree_hal_deferred_command_buffer_dispatch(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &executable));
+ iree_hal_cmd_dispatch_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
+ cmd_list, IREE_HAL_CMD_DISPATCH, sizeof(*cmd), (void**)&cmd));
+ cmd->executable = executable;
+ cmd->entry_point = entry_point;
+ cmd->workgroup_x = workgroup_x;
+ cmd->workgroup_y = workgroup_y;
+ cmd->workgroup_z = workgroup_z;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_apply_dispatch(
+ iree_hal_command_buffer_t* target_command_buffer,
+ const iree_hal_cmd_dispatch_t* cmd) {
+ return iree_hal_command_buffer_dispatch(
+ target_command_buffer, cmd->executable, cmd->entry_point,
+ cmd->workgroup_x, cmd->workgroup_y, cmd->workgroup_z);
+}
+
+//===----------------------------------------------------------------------===//
+// IREE_HAL_CMD_DISPATCH_INDIRECT
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_cmd_dispatch_indirect_t {
+ iree_hal_cmd_header_t header;
+ iree_hal_executable_t* executable;
+ int32_t entry_point;
+ iree_hal_buffer_t* workgroups_buffer;
+ iree_device_size_t workgroups_offset;
+} iree_hal_cmd_dispatch_indirect_t;
+
+static iree_status_t iree_hal_deferred_command_buffer_dispatch_indirect(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ iree_hal_buffer_t* workgroups_buffer,
+ iree_device_size_t workgroups_offset) {
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ iree_hal_deferred_command_buffer_cast(base_command_buffer);
+ iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
+ const void* resources[2] = {executable, workgroups_buffer};
+ IREE_RETURN_IF_ERROR(
+ iree_hal_resource_set_insert(command_buffer->resource_set, 2, resources));
+ iree_hal_cmd_dispatch_indirect_t* cmd = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
+ cmd_list, IREE_HAL_CMD_DISPATCH_INDIRECT, sizeof(*cmd), (void**)&cmd));
+ cmd->executable = executable;
+ cmd->entry_point = entry_point;
+ cmd->workgroups_buffer = workgroups_buffer;
+ cmd->workgroups_offset = workgroups_offset;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_deferred_command_buffer_apply_dispatch_indirect(
+ iree_hal_command_buffer_t* target_command_buffer,
+ const iree_hal_cmd_dispatch_indirect_t* cmd) {
+ return iree_hal_command_buffer_dispatch_indirect(
+ target_command_buffer, cmd->executable, cmd->entry_point,
+ cmd->workgroups_buffer, cmd->workgroups_offset);
+}
+
+//===----------------------------------------------------------------------===//
+// Dynamic replay dispatch
+//===----------------------------------------------------------------------===//
+
+static const iree_hal_cmd_apply_fn_t iree_hal_cmd_apply_table[] = {
+ [IREE_HAL_CMD_EXECUTION_BARRIER] = (iree_hal_cmd_apply_fn_t)
+ iree_hal_deferred_command_buffer_apply_execution_barrier,
+ [IREE_HAL_CMD_SIGNAL_EVENT] = (iree_hal_cmd_apply_fn_t)
+ iree_hal_deferred_command_buffer_apply_signal_event,
+ [IREE_HAL_CMD_RESET_EVENT] = (iree_hal_cmd_apply_fn_t)
+ iree_hal_deferred_command_buffer_apply_reset_event,
+ [IREE_HAL_CMD_WAIT_EVENTS] = (iree_hal_cmd_apply_fn_t)
+ iree_hal_deferred_command_buffer_apply_wait_events,
+ [IREE_HAL_CMD_DISCARD_BUFFER] = (iree_hal_cmd_apply_fn_t)
+ iree_hal_deferred_command_buffer_apply_discard_buffer,
+ [IREE_HAL_CMD_FILL_BUFFER] = (iree_hal_cmd_apply_fn_t)
+ iree_hal_deferred_command_buffer_apply_fill_buffer,
+ [IREE_HAL_CMD_UPDATE_BUFFER] = (iree_hal_cmd_apply_fn_t)
+ iree_hal_deferred_command_buffer_apply_update_buffer,
+ [IREE_HAL_CMD_COPY_BUFFER] = (iree_hal_cmd_apply_fn_t)
+ iree_hal_deferred_command_buffer_apply_copy_buffer,
+ [IREE_HAL_CMD_PUSH_CONSTANTS] = (iree_hal_cmd_apply_fn_t)
+ iree_hal_deferred_command_buffer_apply_push_constants,
+ [IREE_HAL_CMD_PUSH_DESCRIPTOR_SET] = (iree_hal_cmd_apply_fn_t)
+ iree_hal_deferred_command_buffer_apply_push_descriptor_set,
+ [IREE_HAL_CMD_BIND_DESCRIPTOR_SET] = (iree_hal_cmd_apply_fn_t)
+ iree_hal_deferred_command_buffer_apply_bind_descriptor_set,
+ [IREE_HAL_CMD_DISPATCH] = (iree_hal_cmd_apply_fn_t)
+ iree_hal_deferred_command_buffer_apply_dispatch,
+ [IREE_HAL_CMD_DISPATCH_INDIRECT] = (iree_hal_cmd_apply_fn_t)
+ iree_hal_deferred_command_buffer_apply_dispatch_indirect,
+};
+
+IREE_API_EXPORT iree_status_t iree_hal_deferred_command_buffer_apply(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_command_buffer_t* target_command_buffer) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_deferred_command_buffer_t* command_buffer =
+ (iree_hal_deferred_command_buffer_t*)iree_hal_command_buffer_dyn_cast(
+ base_command_buffer, &iree_hal_deferred_command_buffer_vtable);
+ iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
+
+ iree_status_t status = iree_hal_command_buffer_begin(target_command_buffer);
+ if (iree_status_is_ok(status)) {
+ for (iree_hal_cmd_header_t* cmd = cmd_list->head; cmd != NULL;
+ cmd = cmd->next) {
+ status = iree_hal_cmd_apply_table[cmd->type](target_command_buffer, cmd);
+ if (!iree_status_is_ok(status)) break;
+ }
+ }
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_command_buffer_end(target_command_buffer);
+ }
+
+ // One-shot command buffers can't be replayed so we can drop the memory
+ // immediately. As command buffers must remain live for the duration of their
+ // execution this prevents us from hanging on to the commands we will never
+ // use again.
+ if (iree_status_is_ok(status) &&
+ iree_all_bits_set(command_buffer->base.mode,
+ IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT)) {
+ iree_hal_cmd_list_reset(cmd_list);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static const iree_hal_command_buffer_vtable_t
+ iree_hal_deferred_command_buffer_vtable = {
+ .destroy = iree_hal_deferred_command_buffer_destroy,
+ .dyn_cast = iree_hal_deferred_command_buffer_dyn_cast,
+ .begin = iree_hal_deferred_command_buffer_begin,
+ .end = iree_hal_deferred_command_buffer_end,
+ .execution_barrier = iree_hal_deferred_command_buffer_execution_barrier,
+ .signal_event = iree_hal_deferred_command_buffer_signal_event,
+ .reset_event = iree_hal_deferred_command_buffer_reset_event,
+ .wait_events = iree_hal_deferred_command_buffer_wait_events,
+ .discard_buffer = iree_hal_deferred_command_buffer_discard_buffer,
+ .fill_buffer = iree_hal_deferred_command_buffer_fill_buffer,
+ .update_buffer = iree_hal_deferred_command_buffer_update_buffer,
+ .copy_buffer = iree_hal_deferred_command_buffer_copy_buffer,
+ .push_constants = iree_hal_deferred_command_buffer_push_constants,
+ .push_descriptor_set =
+ iree_hal_deferred_command_buffer_push_descriptor_set,
+ .bind_descriptor_set =
+ iree_hal_deferred_command_buffer_bind_descriptor_set,
+ .dispatch = iree_hal_deferred_command_buffer_dispatch,
+ .dispatch_indirect = iree_hal_deferred_command_buffer_dispatch_indirect,
+};
diff --git a/runtime/src/iree/hal/utils/deferred_command_buffer.h b/runtime/src/iree/hal/utils/deferred_command_buffer.h
new file mode 100644
index 0000000..f1686f3
--- /dev/null
+++ b/runtime/src/iree/hal/utils/deferred_command_buffer.h
@@ -0,0 +1,62 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_UTILS_DEFERRED_COMMAND_BUFFER_H_
+#define IREE_HAL_UTILS_DEFERRED_COMMAND_BUFFER_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/command_buffer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_arena_block_pool_t iree_arena_block_pool_t;
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_t deferred record/replay wrapper
+//===----------------------------------------------------------------------===//
+
+// Records an in-memory command buffer that can be replayed against a target
+// command buffer at a later time.
+//
+// Argument arrays (like push constants) and host buffers (like the source
+// buffer in iree_hal_command_buffer_update_buffer) that usually live on the
+// stack will be cloned. As with all command buffers the resources (buffers,
+// events, etc) referenced will not be retained and the caller must ensure that
+// all resource lifetimes outlive the command buffer.
+//
+// |block_pool| will be used to allocate the underlying storage and the blocks
+// will be retained until the command buffer is reset or released, or if
+// IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT is set after the first time the command
+// buffer is replayed. The block size of the pool can be whatever the caller
+// wants with the caveat being that smaller sizes may result in more oversized
+// allocations from the system. 16KB, 32KB, and 64KB are reasonable starting
+// points based on system availability.
+// NOTE: the |block_pool| must remain live for the lifetime of the command
+// buffers that use it.
+//
+// After recording iree_hal_deferred_command_buffer_apply can be used to replay
+// the sequence of commands against a target command buffer implementation.
+// The command buffer can be replayed multiple times.
+IREE_API_EXPORT iree_status_t iree_hal_deferred_command_buffer_create(
+ iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator,
+ iree_hal_command_buffer_t** out_command_buffer);
+
+// Replays a recorded |command_buffer| against a |target_command_buffer|.
+// If the command buffer was recorded in one-shot mode it will be reset upon
+// return.
+IREE_API_EXPORT iree_status_t iree_hal_deferred_command_buffer_apply(
+ iree_hal_command_buffer_t* command_buffer,
+ iree_hal_command_buffer_t* target_command_buffer);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_UTILS_DEFERRED_COMMAND_BUFFER_H_
diff --git a/runtime/src/iree/hal/utils/resource_set.c b/runtime/src/iree/hal/utils/resource_set.c
new file mode 100644
index 0000000..14e5871
--- /dev/null
+++ b/runtime/src/iree/hal/utils/resource_set.c
@@ -0,0 +1,276 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/utils/resource_set.h"
+
+#include "iree/base/tracing.h"
+
+// Inlines the first chunk into the block using all of the remaining space.
+// This is a special case chunk that is released back to the pool with the
+// resource set and lets us avoid an additional allocation.
+static void iree_hal_resource_set_setup_inline_chunk(
+ iree_hal_resource_set_t* set) {
+ uint8_t* block_ptr = (uint8_t*)set + sizeof(*set);
+ iree_hal_resource_set_chunk_t* inlined_chunk =
+ (iree_hal_resource_set_chunk_t*)block_ptr;
+ inlined_chunk->flags = IREE_HAL_RESOURCE_SET_CHUNK_FLAG_INLINE;
+ inlined_chunk->capacity = (set->block_pool->total_block_size - sizeof(*set) -
+ sizeof(*inlined_chunk)) /
+ sizeof(iree_hal_resource_t*);
+ inlined_chunk->capacity = iree_min(inlined_chunk->capacity,
+ IREE_HAL_RESOURCE_SET_CHUNK_MAX_CAPACITY);
+ inlined_chunk->count = 0;
+ set->chunk_head = inlined_chunk;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_resource_set_allocate(
+ iree_arena_block_pool_t* block_pool, iree_hal_resource_set_t** out_set) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // We could allow larger sizes (would require widening the capacity/count
+ // fields in the chunk) but in real usage having even 64k is a bit too much.
+ IREE_ASSERT_LE(block_pool->total_block_size, 64 * 1024,
+ "keep block sizes small for resource sets");
+
+ // Acquire block and place the set struct at the head.
+ iree_arena_block_t* block = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_arena_block_pool_acquire(block_pool, &block));
+ uint8_t* block_ptr = (uint8_t*)block - block_pool->usable_block_size;
+ iree_hal_resource_set_t* set = (iree_hal_resource_set_t*)block_ptr;
+ memset(set, 0, sizeof(*set));
+ set->block_pool = block_pool;
+ iree_hal_resource_set_setup_inline_chunk(set);
+
+ *out_set = set;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void iree_hal_resource_set_release_blocks(iree_hal_resource_set_t* set,
+ bool preserve_set) {
+ // Release all resources in all chunks and stitch together the blocks in a
+ // linked list. We do this first so that we can release all of the chunks back
+ // to the block pool in one operation. Ideally we'd maintain the linked list
+ // in our chunks but there's some weirdness with prefix/suffix header/footers
+ // that isn't worth the complexity.
+ iree_arena_block_t* block_head = NULL;
+ iree_arena_block_t* block_tail = NULL;
+ iree_hal_resource_set_chunk_t* chunk = set->chunk_head;
+ while (chunk) {
+ // Release all resources in the chunk.
+ for (iree_host_size_t i = 0; i < chunk->count; ++i) {
+ iree_hal_resource_release(chunk->resources[i]);
+ }
+ // Consume the chunk and add it to the block pool release linked list.
+ iree_hal_resource_set_chunk_t* next_chunk = chunk->next_chunk;
+ iree_arena_block_t* block = NULL;
+ if (iree_hal_resource_set_chunk_is_stored_inline(chunk)) {
+ // This is the inlined first chunk that also stores the set header.
+ // If we are not freeing the set then we don't release the block back to
+ // the pool.
+ if (preserve_set) {
+ // Don't release the block.
+ break;
+ } else {
+ block = (iree_arena_block_t*)((uint8_t*)set +
+ set->block_pool->usable_block_size);
+ next_chunk = NULL;
+ }
+ } else {
+ // A chunk acquired after the set was acquired.
+ block = (iree_arena_block_t*)((uint8_t*)chunk +
+ set->block_pool->usable_block_size);
+ }
+ block->next = block_head;
+ block_head = block;
+ if (!block_tail) block_tail = block;
+ chunk = next_chunk;
+ }
+
+ // Release all blocks back to the block pool in one operation.
+ // NOTE: this invalidates the |set| memory.
+ iree_arena_block_pool_t* block_pool = set->block_pool;
+ iree_arena_block_pool_release(block_pool, block_head, block_tail);
+}
+
+IREE_API_EXPORT void iree_hal_resource_set_free(iree_hal_resource_set_t* set) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Release all resources and the arena block used by the set.
+ // The set pointer is invalid after this call returns.
+ iree_hal_resource_set_release_blocks(set, /*preserve_set=*/false);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT void iree_hal_resource_set_reset(iree_hal_resource_set_t* set) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Release all resources and the blocks besides the base set.
+ iree_hal_resource_set_release_blocks(set, /*preserve_set=*/true);
+
+ // Reset the set state.
+ memset(set->mru, 0, sizeof(set->mru));
+ iree_hal_resource_set_setup_inline_chunk(set);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Retains |resource| and adds it to the main |set| list.
+static iree_status_t iree_hal_resource_set_insert_retain(
+ iree_hal_resource_set_t* set, iree_hal_resource_t* resource) {
+ iree_hal_resource_set_chunk_t* chunk = set->chunk_head;
+ if (IREE_UNLIKELY(chunk->count + 1 > chunk->capacity)) {
+ // Ran out of room in the current chunk - acquire a new one and link it into
+ // the list of chunks.
+ iree_arena_block_t* block = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_arena_block_pool_acquire(set->block_pool, &block));
+ chunk =
+ (iree_hal_resource_set_chunk_t*)((uint8_t*)block -
+ set->block_pool->usable_block_size);
+ chunk->next_chunk = set->chunk_head;
+ set->chunk_head = chunk;
+ chunk->capacity = (set->block_pool->total_block_size - sizeof(*chunk)) /
+ sizeof(iree_hal_resource_t*);
+ chunk->capacity =
+ iree_min(chunk->capacity, IREE_HAL_RESOURCE_SET_CHUNK_MAX_CAPACITY);
+ chunk->count = 0;
+ }
+
+ // Retain and insert into the chunk.
+ chunk->resources[chunk->count++] = resource;
+ iree_hal_resource_retain(resource);
+ return iree_ok_status();
+}
+
+// Scans the lookaside for the resource pointer and updates the order if found.
+// If the resource was not found then it will be inserted into the main list as
+// well as the MRU.
+//
+// This performs a full scan over the MRU and if the resource is found will
+// move the resource to the front of the list before returning. Otherwise the
+// resource will be retained in the main source-of-truth list.
+//
+// Example (hit):
+// +----+----+----+----+
+// | AA | BB | CC | DD | resource: CC
+// +----+----+----+----+
+// scan mru to find CC:
+// found at mru[2]
+// shift prefix down 1:
+// +----+----+----+----+
+// | AA | AA | BB | DD |
+// +----+----+----+----+
+// insert resource at front:
+// +----+----+----+----+
+// | CC | AA | BB | DD |
+// +----+----+----+----+
+//
+// Example (miss):
+// +----+----+----+----+
+// | AA | BB | CC | DD | resource: EE
+// +----+----+----+----+
+// scan mru to find EE: not found
+// shift set down 1:
+// +----+----+----+----+
+// | AA | AA | BB | CC |
+// +----+----+----+----+
+// insert resource at front:
+// +----+----+----+----+
+// | EE | AA | BB | CC |
+// +----+----+----+----+
+// insert resource into main list
+//
+// The intent here is that we can model this behavior with SIMD ops to perform
+// both the scan and update using comparison, extraction, and permutation. The
+// best and worst case flows will load the entire MRU into registers from a
+// single cache line, do all the scanning and shifting in registers, and then
+// store back to the single cache line.
+//
+// Today, though, we leave this as an exercise to whoever comes across this :)
+// Notes:
+// As the MRU is a fixed size we can unroll it entirely and avoid any looping.
+// On a 32-bit system with uint32x4_t we only need 4 registers.
+// On a 64-bit system with uint64x2_t we also only need 4 registers - though
+// the MRU has half as many entries and we may want to go >1 cache line.
+//
+// If we wanted to process more than one resource at a time we can specialize
+// the code paths to handle 1/2/4/etc resources and process in batches with
+// an optional remainder. This would increase the ratio of work performed on
+// the loaded MRU registers before we do the shift/store.
+//
+// The tree sequence we likely want is something like:
+// https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u32
+// https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_u32
+// https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32
+// https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxvq_u32
+// or
+// https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u64
+// https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_u64
+// https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64
+// https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32
+// https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxvq_u32
+// This would yield whether the pointer was found, but instead of maxing at
+// the end we can use the produced mask to extract out a single register with
+// which positions are hits and use that to then permute the registers into
+// the proper order. At the end we could use a table instruction to remap and
+// extract out a byte/bitmap of the indices that we need to insert into the
+// main set.
+//
+// The shifting can be performed with
+// https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32
+// https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64
+// This takes n low elements of LHS and rest from RHS and we can cascade them
+// to shift down the whole MRU.
+//
+// We can use SIMDE as a rosetta stone for getting neon/avx/wasm/etc:
+// https://github.com/simd-everywhere/simde/blob/master/simde/arm/neon/ceq.h#L591
+static iree_status_t iree_hal_resource_set_insert_1(
+ iree_hal_resource_set_t* set, iree_hal_resource_t* resource) {
+ // Scan and hope for a hit.
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(set->mru); ++i) {
+ if (set->mru[i] != resource) continue;
+ // Hit - keep the list sorted by most->least recently used.
+ // We shift the MRU down to make room at index 0 and store the
+ // resource there.
+ if (i > 0) {
+ memmove(&set->mru[1], &set->mru[0], sizeof(set->mru[0]) * i);
+ set->mru[0] = resource;
+ }
+ return iree_ok_status();
+ }
+
+ // Miss - insert into the main list (slow path).
+ // Note that we do this before updating the MRU in case allocation fails - we
+ // don't want to keep the pointer around unless we've really retained it.
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert_retain(set, resource));
+
+ // Shift the MRU down and insert the new item at the head.
+ memmove(&set->mru[1], &set->mru[0],
+ sizeof(set->mru[0]) * (IREE_ARRAYSIZE(set->mru) - 1));
+ set->mru[0] = resource;
+
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_resource_set_insert(iree_hal_resource_set_t* set,
+ iree_host_size_t count, const void* resources) {
+ // For now we process one at a time. We should have a stride that lets us
+ // amortize the cost of doing the MRU update and insertion allocation by
+ // say slicing off 4/8/16/32 resources at a time etc. Today each miss that
+ // requires a full insertion goes down the whole path of checking chunk
+ // capacity and such.
+ iree_hal_resource_t* const* typed_resources =
+ (iree_hal_resource_t* const*)resources;
+ for (iree_host_size_t i = 0; i < count; ++i) {
+ IREE_RETURN_IF_ERROR(
+ iree_hal_resource_set_insert_1(set, typed_resources[i]));
+ }
+ return iree_ok_status();
+}
diff --git a/runtime/src/iree/hal/utils/resource_set.h b/runtime/src/iree/hal/utils/resource_set.h
new file mode 100644
index 0000000..6f63ced
--- /dev/null
+++ b/runtime/src/iree/hal/utils/resource_set.h
@@ -0,0 +1,139 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_UTILS_RESOURCE_SET_H_
+#define IREE_HAL_UTILS_RESOURCE_SET_H_
+
+#include "iree/base/api.h"
+#include "iree/base/internal/arena.h"
+#include "iree/hal/resource.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Bit 0 of the next_chunk pointer indicates whether we are inlined into the
+// resource set block - the chunks are always aligned and the bit is unused.
+#define IREE_HAL_RESOURCE_SET_CHUNK_FLAG_INLINE 0x1
+
+// Capacity is limited by how many bits we reserve for the count.
+#define IREE_HAL_RESOURCE_SET_CHUNK_MAX_CAPACITY 0xFFFFu
+
+// A chunk of resources within a resource set.
+// Chunks contain a fixed number of resources based on the block size of the
+// pool the set was allocated from.
+typedef struct iree_hal_resource_set_chunk_t {
+ // Next chunk in the chunk linked list.
+ // Bit 0 indicates whether this was an allocated block; 0 means that the
+ // chunk is stored within the parent resource set and should not be returned
+ // to the block pool. This works only because we know the blocks are allocated
+ // at an alignment >= 16 and we have a few bits to work with.
+ union {
+ struct iree_hal_resource_set_chunk_t* next_chunk;
+ uintptr_t flags;
+ };
+
+ // Retained resources - may be less than the capacity derived from the block
+ // pool block size. We keep the counts small here to reduce chunk overhead. We
+ // could recompute the capacity each time but at the point that we use even 1
+ // byte we've already consumed 4 (or 8) thanks to padding and should make use
+ // of the rest.
+ uint16_t capacity;
+ uint16_t count;
+ iree_hal_resource_t* resources[];
+} iree_hal_resource_set_chunk_t;
+
+// Returns true if the chunk is stored inline in the parent resource set.
+#define iree_hal_resource_set_chunk_is_stored_inline(chunk) \
+ (((chunk)->flags & IREE_HAL_RESOURCE_SET_CHUNK_FLAG_INLINE) == \
+ IREE_HAL_RESOURCE_SET_CHUNK_FLAG_INLINE)
+
+// Number of elements in the most-recently-used resource list of a set.
+// The larger the number the greater the chance of having a hit but the more
+// expensive every miss will be.
+//
+// To try to keep the MRU in cache we size this based on how many pointers will
+// fit in a single cache line. This also makes it easier to author SIMD lookups
+// as we'll (in-theory) be able to load the entries into SIMD registers.
+//
+// Values for the platforms we specify for:
+// 32-bit: 64 / 4 = 16x4b ptrs (4 x uint32x4_t)
+// 64-bit: 64 / 8 = 8x8b ptrs (4 x uint64x2_t)
+// We could scale this up if we wanted but being able to unroll is nice.
+#define IREE_HAL_RESOURCE_SET_MRU_SIZE \
+ (iree_hardware_constructive_interference_size / sizeof(uintptr_t))
+
+// "Efficient" append-only set for retaining a set of resources.
+// This is a non-deterministic data structure that tries to reduce the amount of
+// overhead involved in tracking a reasonably-sized set of resources (~dozens to
+// hundreds). Set insertion may have false negatives and retain resources more
+// than strictly required by trading off the expense of precisely detecting
+// redundant insertions with the expense of an additional atomic operation.
+//
+// This tries to elide insertions by maintaining a most-recently-used list.
+// This optimizes for temporal locality of resources used (the same executables,
+// same buffers, etc) and is implemented to have a fixed cost regardless of
+// whether the values are found and should hopefully trigger enough to avoid the
+// subsequent full insertion that can introduce allocations and ref counting.
+// The idea is that if we can keep the MRU in cache and spend a dozen cycles to
+// manage it we only need to avoid a single cache miss that would occur doing
+// the full insertion. We care here because this is on the critical path of
+// command encoding and the parasitic cost of maintaining the set scales with
+// the number of commands issued. This never needs to be free, only as fast as
+// whatever user code may need to do to maintain proper lifetime - or as small
+// in terms of code-size.
+//
+// **WARNING**: thread-unsafe insertion: it's assumed that sets are constructed
+// by a single thread, sealed, and then released at once at a future time point.
+// Multiple threads needing to insert into a set should have their own sets and
+// then join them afterward.
+typedef struct iree_hal_resource_set_t {
+ // A small MRUish list of resources for quickly deduplicating insertions.
+ // We use this to perform an O(k) comparison traded off with the cost of a
+ // miss that results in an atomic inc/dec. We shouldn't make this
+ // more expensive than the additional cost of the retain/release.
+ //
+ // This lives at the head of the struct as it's used in 100% of insertions and
+ // if we can get lucky with it staying in cache we reduce a lot of memory
+ // traffic. Once we spill the MRU and go to main memory to add the resource
+ // we're going to have a cache miss and this way we avoid two (one for the
+ // set and one for the chunk).
+ //
+ // TODO(benvanik): ensure alignment on the set - should be at
+ // iree_hardware_constructive_interference_size.
+ iree_hal_resource_t* mru[IREE_HAL_RESOURCE_SET_MRU_SIZE];
+
+ // Block pool used for allocating additional set storage slabs.
+ iree_arena_block_pool_t* block_pool;
+
+ // Linked list of storage chunks.
+ iree_hal_resource_set_chunk_t* chunk_head;
+} iree_hal_resource_set_t;
+
+// Allocates a new resource from the given |block_pool|.
+// Resources can be inserted and are retained until the set is freed.
+IREE_API_EXPORT iree_status_t iree_hal_resource_set_allocate(
+ iree_arena_block_pool_t* block_pool, iree_hal_resource_set_t** out_set);
+
+// Frees a resource set and releases all inserted resources.
+// The |set| itself will be returned back to the block pool it was allocated
+// from.
+IREE_API_EXPORT void iree_hal_resource_set_free(iree_hal_resource_set_t* set);
+
+// Resets the set to its initial empty state by releasing all owned resources.
+IREE_API_EXPORT void iree_hal_resource_set_reset(iree_hal_resource_set_t* set);
+
+// Inserts zero or more resources into the set.
+// Each resource will be retained for at least the lifetime of the set.
+IREE_API_EXPORT iree_status_t
+iree_hal_resource_set_insert(iree_hal_resource_set_t* set,
+ iree_host_size_t count, const void* resources);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_UTILS_RESOURCE_SET_H_
diff --git a/runtime/src/iree/hal/utils/resource_set_benchmark.c b/runtime/src/iree/hal/utils/resource_set_benchmark.c
new file mode 100644
index 0000000..5b22f97
--- /dev/null
+++ b/runtime/src/iree/hal/utils/resource_set_benchmark.c
@@ -0,0 +1,287 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/prng.h"
+#include "iree/hal/api.h"
+#include "iree/hal/utils/resource_set.h"
+#include "iree/testing/benchmark.h"
+
+typedef struct iree_hal_test_resource_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+} iree_hal_test_resource_t;
+
+typedef struct iree_hal_test_resource_vtable_t {
+ void(IREE_API_PTR* destroy)(iree_hal_test_resource_t* resource);
+} iree_hal_test_resource_vtable_t;
+IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_test_resource_vtable_t);
+
+static const iree_hal_test_resource_vtable_t iree_hal_test_resource_vtable;
+
+static iree_status_t iree_hal_test_resource_create(
+ iree_allocator_t host_allocator, iree_hal_resource_t** out_resource) {
+ iree_hal_test_resource_t* test_resource = NULL;
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc(
+ host_allocator, sizeof(*test_resource), (void**)&test_resource));
+ iree_hal_resource_initialize(&iree_hal_test_resource_vtable,
+ &test_resource->resource);
+ test_resource->host_allocator = host_allocator;
+ *out_resource = (iree_hal_resource_t*)test_resource;
+ return iree_ok_status();
+}
+
+static void iree_hal_test_resource_destroy(iree_hal_test_resource_t* resource) {
+ iree_allocator_t host_allocator = resource->host_allocator;
+ iree_allocator_free(host_allocator, resource);
+}
+
+static const iree_hal_test_resource_vtable_t iree_hal_test_resource_vtable = {
+ /*.destroy=*/iree_hal_test_resource_destroy,
+};
+
+// Tests init/deinit performance when 0+ resources are in the set.
+// This is our worst-case with unique resources that never match the MRU.
+//
+// user_data is a count of elements to insert into each set.
+static iree_status_t iree_hal_resource_set_benchmark_lifecycle_n(
+ const iree_benchmark_def_t* benchmark_def,
+ iree_benchmark_state_t* benchmark_state) {
+ iree_allocator_t host_allocator = benchmark_state->host_allocator;
+
+ // Initialize the block pool we'll be serving from.
+ // Sized like we usually do it in the runtime for ~512-1024 elements.
+ iree_arena_block_pool_t block_pool;
+ iree_arena_block_pool_initialize(4096, host_allocator, &block_pool);
+
+ // Allocate the resources we'll be using - we keep them live so that we are
+ // measuring just the retain/release and set times instead of the timing of
+ // resource creation/deletion.
+ uint32_t count = (uint32_t)(uintptr_t)benchmark_def->user_data;
+ iree_hal_resource_t** resources = NULL;
+ if (count > 0) {
+ IREE_CHECK_OK(iree_allocator_malloc(host_allocator,
+ sizeof(iree_hal_resource_t*) * count,
+ (void**)&resources));
+ }
+ for (uint32_t i = 0; i < count; ++i) {
+ IREE_CHECK_OK(iree_hal_test_resource_create(host_allocator, &resources[i]));
+ }
+
+ // Create/insert/delete lifecycle.
+ while (iree_benchmark_keep_running(benchmark_state, /*batch_count=*/1)) {
+ iree_hal_resource_set_t* set = NULL;
+ IREE_CHECK_OK(iree_hal_resource_set_allocate(&block_pool, &set));
+ IREE_CHECK_OK(iree_hal_resource_set_insert(set, count, resources));
+ iree_hal_resource_set_free(set);
+ }
+
+ // Cleanup.
+ for (uint32_t i = 0; i < count; ++i) {
+ iree_hal_resource_release(resources[i]);
+ }
+ iree_allocator_free(host_allocator, resources);
+ iree_arena_block_pool_deinitialize(&block_pool);
+
+ return iree_ok_status();
+}
+
+// Tests insertion performance when either the MRU is used (n < MRU size) or
+// the worst-case performance when all resources are unique and guaranteed to
+// miss the MRU. Expect to see a cliff where we spill the MRU.
+//
+// user_data is a count of unique elements to insert.
+static iree_status_t iree_hal_resource_set_benchmark_insert_n(
+ const iree_benchmark_def_t* benchmark_def,
+ iree_benchmark_state_t* benchmark_state) {
+ iree_allocator_t host_allocator = benchmark_state->host_allocator;
+
+ // Initialize the block pool we'll be serving from.
+ // Sized like we usually do it in the runtime for ~512-1024 elements.
+ iree_arena_block_pool_t block_pool;
+ iree_arena_block_pool_initialize(4096, host_allocator, &block_pool);
+
+ // Create the empty set using the block pool for additional memory.
+ iree_hal_resource_set_t* set = NULL;
+ IREE_CHECK_OK(iree_hal_resource_set_allocate(&block_pool, &set));
+
+ // Allocate the resources we'll be using - we keep them live so that we are
+ // measuring just the retain/release and set times instead of the timing of
+ // resource creation/deletion.
+ uint32_t count = (uint32_t)(uintptr_t)benchmark_def->user_data;
+ iree_hal_resource_t** resources = NULL;
+ IREE_CHECK_OK(iree_allocator_malloc(host_allocator,
+ sizeof(iree_hal_resource_t*) * count,
+ (void**)&resources));
+ for (uint32_t i = 0; i < count; ++i) {
+ IREE_CHECK_OK(iree_hal_test_resource_create(host_allocator, &resources[i]));
+ }
+
+ // Insert the resources. After the first iteration these should all be hits.
+ while (iree_benchmark_keep_running(benchmark_state, /*batch_count=*/1)) {
+ IREE_CHECK_OK(iree_hal_resource_set_insert(set, count, resources));
+ }
+
+ // Cleanup.
+ for (uint32_t i = 0; i < count; ++i) {
+ iree_hal_resource_release(resources[i]);
+ }
+ iree_hal_resource_set_free(set);
+ iree_allocator_free(host_allocator, resources);
+ iree_arena_block_pool_deinitialize(&block_pool);
+
+ return iree_ok_status();
+}
+
+// Tests insertion into the set in a randomized order.
+// This lets us get a somewhat reasonable approximation of average performance.
+// In reality what the compiler spits out is non-random and often just
+// alternating A/B/C/B/A/C/A/B/C etc kind of sequences.
+//
+// This is the most important benchmark: if this is fast then we are :thumbsup:.
+//
+// user_data is a count of unique element pool to insert N times. The higher
+// the pool size the more likely we are to miss the MRU.
+static iree_status_t iree_hal_resource_set_benchmark_randomized_n(
+ const iree_benchmark_def_t* benchmark_def,
+ iree_benchmark_state_t* benchmark_state) {
+ iree_allocator_t host_allocator = benchmark_state->host_allocator;
+
+ // Initialize the block pool we'll be serving from.
+ // Sized like we usually do it in the runtime for ~512-1024 elements.
+ iree_arena_block_pool_t block_pool;
+ iree_arena_block_pool_initialize(4096, host_allocator, &block_pool);
+
+ // Allocate the resources we'll be using - we keep them live so that we are
+ // measuring just the retain/release and set times instead of the timing of
+ // resource creation/deletion.
+ uint32_t count = (uint32_t)(uintptr_t)benchmark_def->user_data;
+ iree_hal_resource_t** resources = NULL;
+ IREE_CHECK_OK(iree_allocator_malloc(host_allocator,
+ sizeof(iree_hal_resource_t*) * count,
+ (void**)&resources));
+ for (uint32_t i = 0; i < count; ++i) {
+ IREE_CHECK_OK(iree_hal_test_resource_create(host_allocator, &resources[i]));
+ }
+
+ // The same set is maintained; we'll eventually have all resources in the set
+ // and be testing the MRU hit %.
+ iree_hal_resource_set_t* set = NULL;
+ IREE_CHECK_OK(iree_hal_resource_set_allocate(&block_pool, &set));
+
+ // The PRNG we use to select the elements.
+ iree_prng_xoroshiro128_state_t prng = {0};
+ iree_prng_xoroshiro128_initialize(123ull, &prng);
+
+ // Insert N random resources into the set. To hide some of the overhead we do
+ // multiple insertions in each loop.
+ while (iree_benchmark_keep_running(benchmark_state, /*batch_count=*/256)) {
+ for (uint32_t i = 0; i < 256; ++i) {
+ uint32_t resource_idx =
+ iree_prng_xoroshiro128plus_next_uint32(&prng) % count;
+ iree_hal_resource_t* resource = resources[resource_idx];
+ IREE_CHECK_OK(iree_hal_resource_set_insert(set, 1, &resource));
+ }
+ }
+
+ // Cleanup.
+ iree_hal_resource_set_free(set);
+ for (uint32_t i = 0; i < count; ++i) {
+ iree_hal_resource_release(resources[i]);
+ }
+ iree_allocator_free(host_allocator, resources);
+ iree_arena_block_pool_deinitialize(&block_pool);
+
+ return iree_ok_status();
+}
+
+int main(int argc, char** argv) {
+ iree_benchmark_initialize(&argc, argv);
+
+ // iree_hal_resource_set_benchmark_lifecycle_n
+ {
+ iree_benchmark_def_t benchmark_def = {
+ .flags = IREE_BENCHMARK_FLAG_MEASURE_PROCESS_CPU_TIME |
+ IREE_BENCHMARK_FLAG_USE_REAL_TIME,
+ .time_unit = IREE_BENCHMARK_UNIT_NANOSECOND,
+ .minimum_duration_ns = 0,
+ .iteration_count = 0,
+ .run = iree_hal_resource_set_benchmark_lifecycle_n,
+ };
+ benchmark_def.user_data = (void*)0u;
+ iree_benchmark_register(iree_make_cstring_view("lifecycle_0"),
+ &benchmark_def);
+ benchmark_def.user_data = (void*)1u;
+ iree_benchmark_register(iree_make_cstring_view("lifecycle_1"),
+ &benchmark_def);
+ benchmark_def.user_data = (void*)256u;
+ iree_benchmark_register(iree_make_cstring_view("lifecycle_256"),
+ &benchmark_def);
+ benchmark_def.user_data = (void*)1024u;
+ iree_benchmark_register(iree_make_cstring_view("lifecycle_1024"),
+ &benchmark_def);
+ }
+
+ // iree_hal_resource_set_benchmark_insert_n
+ {
+ iree_benchmark_def_t benchmark_def = {
+ .flags = IREE_BENCHMARK_FLAG_MEASURE_PROCESS_CPU_TIME |
+ IREE_BENCHMARK_FLAG_USE_REAL_TIME,
+ .time_unit = IREE_BENCHMARK_UNIT_NANOSECOND,
+ .minimum_duration_ns = 0,
+ .iteration_count = 0,
+ .run = iree_hal_resource_set_benchmark_insert_n,
+ };
+ benchmark_def.user_data = (void*)1u;
+ iree_benchmark_register(iree_make_cstring_view("insert_1"), &benchmark_def);
+ benchmark_def.user_data = (void*)5u;
+ iree_benchmark_register(iree_make_cstring_view("insert_5"), &benchmark_def);
+ benchmark_def.user_data = (void*)32u;
+ iree_benchmark_register(iree_make_cstring_view("insert_32"),
+ &benchmark_def);
+ benchmark_def.user_data = (void*)64u;
+ iree_benchmark_register(iree_make_cstring_view("insert_64"),
+ &benchmark_def);
+ }
+
+ // iree_hal_resource_set_benchmark_randomized_n
+ {
+ iree_benchmark_def_t benchmark_def = {
+ .flags = IREE_BENCHMARK_FLAG_MEASURE_PROCESS_CPU_TIME |
+ IREE_BENCHMARK_FLAG_USE_REAL_TIME,
+ .time_unit = IREE_BENCHMARK_UNIT_NANOSECOND,
+ .minimum_duration_ns = 0,
+ .iteration_count = 0,
+ .run = iree_hal_resource_set_benchmark_randomized_n,
+ };
+ benchmark_def.user_data = (void*)1u;
+ iree_benchmark_register(iree_make_cstring_view("randomized_1"),
+ &benchmark_def);
+ benchmark_def.user_data = (void*)4u;
+ iree_benchmark_register(iree_make_cstring_view("randomized_4"),
+ &benchmark_def);
+ benchmark_def.user_data = (void*)8u;
+ iree_benchmark_register(iree_make_cstring_view("randomized_8"),
+ &benchmark_def);
+ benchmark_def.user_data = (void*)32u;
+ iree_benchmark_register(iree_make_cstring_view("randomized_32"),
+ &benchmark_def);
+ benchmark_def.user_data = (void*)256u;
+ iree_benchmark_register(iree_make_cstring_view("randomized_256"),
+ &benchmark_def);
+ benchmark_def.user_data = (void*)4096u;
+ iree_benchmark_register(iree_make_cstring_view("randomized_4096"),
+ &benchmark_def);
+ }
+
+ iree_benchmark_run_specified();
+ return 0;
+}
diff --git a/runtime/src/iree/hal/utils/resource_set_test.cc b/runtime/src/iree/hal/utils/resource_set_test.cc
new file mode 100644
index 0000000..021bb1b
--- /dev/null
+++ b/runtime/src/iree/hal/utils/resource_set_test.cc
@@ -0,0 +1,257 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/utils/resource_set.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace {
+
+using ::iree::testing::status::IsOkAndHolds;
+using ::iree::testing::status::StatusIs;
+using ::testing::Eq;
+
+typedef struct iree_hal_test_resource_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+ uint32_t index;
+ uint32_t* live_bitmap;
+} iree_hal_test_resource_t;
+
+typedef struct iree_hal_test_resource_vtable_t {
+ void(IREE_API_PTR* destroy)(iree_hal_test_resource_t* resource);
+} iree_hal_test_resource_vtable_t;
+IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_test_resource_vtable_t);
+
+extern const iree_hal_test_resource_vtable_t iree_hal_test_resource_vtable;
+
+static iree_status_t iree_hal_test_resource_create(
+ uint32_t index, uint32_t* live_bitmap, iree_allocator_t host_allocator,
+ iree_hal_resource_t** out_resource) {
+ iree_hal_test_resource_t* test_resource = NULL;
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc(
+ host_allocator, sizeof(*test_resource), (void**)&test_resource));
+ iree_hal_resource_initialize(&iree_hal_test_resource_vtable,
+ &test_resource->resource);
+ test_resource->host_allocator = host_allocator;
+ test_resource->index = index;
+ test_resource->live_bitmap = live_bitmap;
+ *live_bitmap |= 1 << index;
+ *out_resource = (iree_hal_resource_t*)test_resource;
+ return iree_ok_status();
+}
+
+static void iree_hal_test_resource_destroy(iree_hal_test_resource_t* resource) {
+ iree_allocator_t host_allocator = resource->host_allocator;
+ *resource->live_bitmap &= ~(1 << resource->index);
+ iree_allocator_free(host_allocator, resource);
+}
+
+const iree_hal_test_resource_vtable_t iree_hal_test_resource_vtable = {
+ /*.destroy=*/iree_hal_test_resource_destroy,
+};
+
+struct ResourceSetTest : public ::testing::Test {
+ // We could check the allocator to ensure all memory is freed if we wanted to
+ // reduce the reliance on asan.
+ iree_allocator_t host_allocator = iree_allocator_system();
+ iree_arena_block_pool_t block_pool;
+
+ void SetUp() override {
+ memset(&block_pool, 0, sizeof(block_pool));
+ iree_arena_block_pool_initialize(128, host_allocator, &block_pool);
+ }
+
+ void TearDown() override {
+ // This may assert (or at least trigger asan) if there are blocks
+ // outstanding.
+ iree_arena_block_pool_deinitialize(&block_pool);
+ }
+};
+
+using resource_set_ptr = std::unique_ptr<iree_hal_resource_set_t,
+ decltype(&iree_hal_resource_set_free)>;
+static resource_set_ptr make_resource_set(iree_arena_block_pool_t* block_pool) {
+ iree_hal_resource_set_t* set = NULL;
+ IREE_CHECK_OK(iree_hal_resource_set_allocate(block_pool, &set));
+ return resource_set_ptr(set, iree_hal_resource_set_free);
+}
+
+// Tests a set that has no resources added to it.
+TEST_F(ResourceSetTest, Empty) {
+ iree_hal_resource_set_t* set = NULL;
+ IREE_ASSERT_OK(iree_hal_resource_set_allocate(&block_pool, &set));
+ iree_hal_resource_set_free(set);
+}
+
+// Tests insertion of a single resource.
+TEST_F(ResourceSetTest, Insert1) {
+ auto resource_set = make_resource_set(&block_pool);
+
+ // Create test resource; it'll set its bit in the live_bitmap.
+ iree_hal_resource_t* resource = NULL;
+ uint32_t live_bitmap = 0u;
+ IREE_ASSERT_OK(iree_hal_test_resource_create(0, &live_bitmap, host_allocator,
+ &resource));
+ EXPECT_EQ(live_bitmap, 1u);
+
+ // Insert the resource and drop the reference; it should still be live as the
+ // set retains it.
+ IREE_ASSERT_OK(
+ iree_hal_resource_set_insert(resource_set.get(), 1, &resource));
+ iree_hal_resource_release(resource);
+ EXPECT_EQ(live_bitmap, 1u);
+
+ // Drop the set and expect the resource to be destroyed as it loses its last
+ // reference.
+ resource_set.reset();
+ EXPECT_EQ(live_bitmap, 0u);
+}
+
+// Tests inserting multiple resources at a time.
+TEST_F(ResourceSetTest, Insert5) {
+ auto resource_set = make_resource_set(&block_pool);
+
+ // Allocate 5 resources - this lets us test for special paths that may handle
+ // 4 at a time (to fit in SIMD registers) as well as the leftovers.
+ iree_hal_resource_t* resources[5] = {NULL};
+ uint32_t live_bitmap = 0u;
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(resources); ++i) {
+ IREE_ASSERT_OK(iree_hal_test_resource_create(
+ i, &live_bitmap, host_allocator, &resources[i]));
+ }
+ EXPECT_EQ(live_bitmap, 0x1Fu);
+
+ // Transfer ownership of the resources to the set.
+ IREE_ASSERT_OK(iree_hal_resource_set_insert(
+ resource_set.get(), IREE_ARRAYSIZE(resources), resources));
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(resources); ++i) {
+ iree_hal_resource_release(resources[i]);
+ }
+ EXPECT_EQ(live_bitmap, 0x1Fu);
+
+ // Ensure the set releases the resources.
+ resource_set.reset();
+ EXPECT_EQ(live_bitmap, 0u);
+}
+
+// Tests inserting enough resources to force set growth. This is ensured by
+// choosing a sufficiently small block size such that even 32 elements triggers
+// a growth. Of course, real usage should have at least ~4KB for the block size.
+TEST_F(ResourceSetTest, InsertionGrowth) {
+ auto resource_set = make_resource_set(&block_pool);
+
+ // Allocate 32 resources (one for each bit in our live map).
+ iree_hal_resource_t* resources[32] = {NULL};
+ uint32_t live_bitmap = 0u;
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(resources); ++i) {
+ IREE_ASSERT_OK(iree_hal_test_resource_create(
+ i, &live_bitmap, host_allocator, &resources[i]));
+ }
+ EXPECT_EQ(live_bitmap, 0xFFFFFFFFu);
+
+ // Transfer ownership of the resources to the set.
+ IREE_ASSERT_OK(iree_hal_resource_set_insert(
+ resource_set.get(), IREE_ARRAYSIZE(resources), resources));
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(resources); ++i) {
+ iree_hal_resource_release(resources[i]);
+ }
+ EXPECT_EQ(live_bitmap, 0xFFFFFFFFu);
+
+ // Ensure the set releases the resources.
+ resource_set.reset();
+ EXPECT_EQ(live_bitmap, 0u);
+}
+
+// Tests insertion of resources multiple times to verify the MRU works.
+TEST_F(ResourceSetTest, RedundantInsertion) {
+ auto resource_set = make_resource_set(&block_pool);
+
+ // Allocate 32 resources (one for each bit in our live map).
+ // We want to be able to miss in the MRU.
+ iree_hal_resource_t* resources[32] = {NULL};
+ static_assert(IREE_ARRAYSIZE(resources) > IREE_HAL_RESOURCE_SET_MRU_SIZE,
+ "need to pick a value that lets us exceed the MRU capacity");
+ uint32_t live_bitmap = 0u;
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(resources); ++i) {
+ IREE_ASSERT_OK(iree_hal_test_resource_create(
+ i, &live_bitmap, host_allocator, &resources[i]));
+ }
+ EXPECT_EQ(live_bitmap, 0xFFFFFFFFu);
+
+ // NOTE: the only requirement of the MRU is that it's _mostly_ MRU - we may
+ // for performance reasons make it a little fuzzy to avoid additional
+ // shuffling. Today it's always a proper MRU and we check the pointers here.
+
+ // NOTE: the MRU size can vary across architectures; we know it should always
+ // be at least ~6 though so that's what we work with here.
+ static_assert(IREE_HAL_RESOURCE_SET_MRU_SIZE > 6,
+ "need at least enough elements to test with");
+
+ // Insert in sequence, MRU should contain:
+ // 31 30 29 28 27 ...
+ IREE_ASSERT_OK(iree_hal_resource_set_insert(
+ resource_set.get(), IREE_ARRAYSIZE(resources), resources));
+ EXPECT_EQ(resource_set->mru[0], resources[31]);
+ EXPECT_EQ(resource_set->mru[1], resources[30]);
+ EXPECT_EQ(resource_set->mru[2], resources[29]);
+ EXPECT_EQ(resource_set->mru[3], resources[28]);
+ EXPECT_EQ(resource_set->mru[4], resources[27]);
+
+ // Insert 31 again, MRU should remain the same as it's at the head.
+ IREE_ASSERT_OK(
+ iree_hal_resource_set_insert(resource_set.get(), 1, &resources[31]));
+ EXPECT_EQ(resource_set->mru[0], resources[31]);
+ EXPECT_EQ(resource_set->mru[1], resources[30]);
+ EXPECT_EQ(resource_set->mru[2], resources[29]);
+ EXPECT_EQ(resource_set->mru[3], resources[28]);
+ EXPECT_EQ(resource_set->mru[4], resources[27]);
+
+ // Insert 28 again, MRU should be updated to move it to the front:
+ // 28 31 30 29 27 ...
+ IREE_ASSERT_OK(
+ iree_hal_resource_set_insert(resource_set.get(), 1, &resources[28]));
+ EXPECT_EQ(resource_set->mru[0], resources[28]);
+ EXPECT_EQ(resource_set->mru[1], resources[31]);
+ EXPECT_EQ(resource_set->mru[2], resources[30]);
+ EXPECT_EQ(resource_set->mru[3], resources[29]);
+ EXPECT_EQ(resource_set->mru[4], resources[27]);
+
+ // Insert 0 again, which should be a miss as it fell off the end of the MRU:
+ // 0 28 31 30 29 27 ...
+ IREE_ASSERT_OK(
+ iree_hal_resource_set_insert(resource_set.get(), 1, &resources[0]));
+ EXPECT_EQ(resource_set->mru[0], resources[0]);
+ EXPECT_EQ(resource_set->mru[1], resources[28]);
+ EXPECT_EQ(resource_set->mru[2], resources[31]);
+ EXPECT_EQ(resource_set->mru[3], resources[30]);
+ EXPECT_EQ(resource_set->mru[4], resources[29]);
+ EXPECT_EQ(resource_set->mru[5], resources[27]);
+
+ // Release all of the resources - they should still be owned by the set.
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(resources); ++i) {
+ iree_hal_resource_release(resources[i]);
+ }
+ EXPECT_EQ(live_bitmap, 0xFFFFFFFFu);
+
+ // Ensure the set releases the resources.
+ resource_set.reset();
+ EXPECT_EQ(live_bitmap, 0u);
+}
+
+} // namespace
+} // namespace hal
+} // namespace iree
diff --git a/runtime/src/iree/hal/vmvx/BUILD b/runtime/src/iree/hal/vmvx/BUILD
new file mode 100644
index 0000000..0820c01
--- /dev/null
+++ b/runtime/src/iree/hal/vmvx/BUILD
@@ -0,0 +1,13 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# A VMVX (VM-based Vector eXtensions) runtime HAL backend.
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
diff --git a/runtime/src/iree/hal/vmvx/CMakeLists.txt b/runtime/src/iree/hal/vmvx/CMakeLists.txt
new file mode 100644
index 0000000..b1096fc
--- /dev/null
+++ b/runtime/src/iree/hal/vmvx/CMakeLists.txt
@@ -0,0 +1,13 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/vmvx/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/hal/vmvx/cts/CMakeLists.txt b/runtime/src/iree/hal/vmvx/cts/CMakeLists.txt
new file mode 100644
index 0000000..8a116f9
--- /dev/null
+++ b/runtime/src/iree/hal/vmvx/cts/CMakeLists.txt
@@ -0,0 +1,40 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+iree_hal_cts_test_suite(
+ DRIVER_NAME
+ vmvx
+ DRIVER_REGISTRATION_HDR
+ "runtime/src/iree/hal/vmvx/registration/driver_module.h"
+ DRIVER_REGISTRATION_FN
+ "iree_hal_vmvx_driver_module_register"
+ COMPILER_TARGET_BACKEND
+ "vmvx"
+ EXECUTABLE_FORMAT
+ "\"vmvx-bytecode-fb\""
+ DEPS
+ iree::hal::vmvx::registration
+)
+
+iree_hal_cts_test_suite(
+ DRIVER_NAME
+ vmvx-sync
+ DRIVER_REGISTRATION_HDR
+ "runtime/src/iree/hal/vmvx/registration/driver_module_sync.h"
+ DRIVER_REGISTRATION_FN
+ "iree_hal_vmvx_sync_driver_module_register"
+ COMPILER_TARGET_BACKEND
+ "vmvx"
+ EXECUTABLE_FORMAT
+ "\"vmvx-bytecode-fb\""
+ DEPS
+ iree::hal::vmvx::registration::sync
+ EXCLUDED_TESTS
+ # TODO(#4680): command buffer recording so that these can run on sync HAL
+ "command_buffer"
+ "event"
+ "semaphore_submission"
+)
diff --git a/runtime/src/iree/hal/vmvx/registration/BUILD b/runtime/src/iree/hal/vmvx/registration/BUILD
new file mode 100644
index 0000000..2bbe131
--- /dev/null
+++ b/runtime/src/iree/hal/vmvx/registration/BUILD
@@ -0,0 +1,71 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_cmake_extra_content", "iree_runtime_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_cmake_extra_content(
+ content = """
+if(${IREE_HAL_DRIVER_VMVX})
+""",
+ inline = True,
+)
+
+iree_runtime_cc_library(
+ name = "registration",
+ srcs = ["driver_module.c"],
+ hdrs = ["driver_module.h"],
+ defines = [
+ "IREE_HAL_HAVE_VMVX_DRIVER_MODULE=1",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/local",
+ "//runtime/src/iree/hal/local:task_driver",
+ "//runtime/src/iree/hal/local/loaders:vmvx_module_loader",
+ "//runtime/src/iree/task:api",
+ "//runtime/src/iree/vm",
+ ],
+)
+
+iree_cmake_extra_content(
+ content = """
+endif()
+
+if(${IREE_HAL_DRIVER_VMVX_SYNC})
+""",
+ inline = True,
+)
+
+iree_runtime_cc_library(
+ name = "sync",
+ srcs = ["driver_module_sync.c"],
+ hdrs = ["driver_module_sync.h"],
+ defines = [
+ "IREE_HAL_HAVE_VMVX_SYNC_DRIVER_MODULE=1",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/local",
+ "//runtime/src/iree/hal/local:sync_driver",
+ "//runtime/src/iree/hal/local/loaders:vmvx_module_loader",
+ "//runtime/src/iree/vm",
+ ],
+)
+
+iree_cmake_extra_content(
+ content = """
+endif()
+""",
+ inline = True,
+)
diff --git a/runtime/src/iree/hal/vmvx/registration/CMakeLists.txt b/runtime/src/iree/hal/vmvx/registration/CMakeLists.txt
new file mode 100644
index 0000000..66ea6d2
--- /dev/null
+++ b/runtime/src/iree/hal/vmvx/registration/CMakeLists.txt
@@ -0,0 +1,60 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/vmvx/registration/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+if(${IREE_HAL_DRIVER_VMVX})
+
+iree_cc_library(
+ NAME
+ registration
+ HDRS
+ "driver_module.h"
+ SRCS
+ "driver_module.c"
+ DEPS
+ iree::base
+ iree::hal
+ iree::hal::local
+ iree::hal::local::loaders::vmvx_module_loader
+ iree::hal::local::task_driver
+ iree::task::api
+ iree::vm
+ DEFINES
+ "IREE_HAL_HAVE_VMVX_DRIVER_MODULE=1"
+ PUBLIC
+)
+
+endif()
+
+if(${IREE_HAL_DRIVER_VMVX_SYNC})
+
+iree_cc_library(
+ NAME
+ sync
+ HDRS
+ "driver_module_sync.h"
+ SRCS
+ "driver_module_sync.c"
+ DEPS
+ iree::base
+ iree::hal
+ iree::hal::local
+ iree::hal::local::loaders::vmvx_module_loader
+ iree::hal::local::sync_driver
+ iree::vm
+ DEFINES
+ "IREE_HAL_HAVE_VMVX_SYNC_DRIVER_MODULE=1"
+ PUBLIC
+)
+
+endif()
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/hal/vmvx/registration/driver_module.c b/runtime/src/iree/hal/vmvx/registration/driver_module.c
new file mode 100644
index 0000000..a1a0228
--- /dev/null
+++ b/runtime/src/iree/hal/vmvx/registration/driver_module.c
@@ -0,0 +1,98 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vmvx/registration/driver_module.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/local/executable_loader.h"
+#include "iree/hal/local/loaders/vmvx_module_loader.h"
+#include "iree/hal/local/task_device.h"
+#include "iree/hal/local/task_driver.h"
+#include "iree/task/api.h"
+#include "iree/vm/api.h"
+
+// TODO(#4298): remove this driver registration and wrapper.
+
+// TODO(benvanik): replace with C flags.
+#define IREE_HAL_VMVX_WORKER_COUNT 0
+#define IREE_HAL_MAX_VMVX_WORKER_COUNT 16
+
+#define IREE_HAL_VMVX_DRIVER_ID 0x564D5658u // VMVX
+
+static iree_status_t iree_hal_vmvx_driver_factory_enumerate(
+ void* self, const iree_hal_driver_info_t** out_driver_infos,
+ iree_host_size_t* out_driver_info_count) {
+ static const iree_hal_driver_info_t driver_infos[1] = {
+ {
+ .driver_id = IREE_HAL_VMVX_DRIVER_ID,
+ .driver_name = iree_string_view_literal("vmvx"),
+ .full_name = iree_string_view_literal("VM-based reference backend"),
+ },
+ };
+ *out_driver_info_count = IREE_ARRAYSIZE(driver_infos);
+ *out_driver_infos = driver_infos;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vmvx_driver_factory_try_create(
+ void* self, iree_hal_driver_id_t driver_id, iree_allocator_t host_allocator,
+ iree_hal_driver_t** out_driver) {
+ if (driver_id != IREE_HAL_VMVX_DRIVER_ID) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "no driver with ID %016" PRIu64
+ " is provided by this factory",
+ driver_id);
+ }
+
+ iree_vm_instance_t* instance = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_instance_create(host_allocator, &instance));
+
+ iree_hal_task_device_params_t default_params;
+ iree_hal_task_device_params_initialize(&default_params);
+
+ iree_hal_executable_loader_t* vmvx_loader = NULL;
+ iree_status_t status = iree_hal_vmvx_module_loader_create(
+ instance, host_allocator, &vmvx_loader);
+ iree_hal_executable_loader_t* loaders[1] = {vmvx_loader};
+
+ iree_task_executor_t* executor = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_task_executor_create_from_flags(host_allocator, &executor);
+ }
+
+ iree_hal_allocator_t* device_allocator = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_allocator_create_heap(iree_make_cstring_view("vmvx"),
+ host_allocator, host_allocator,
+ &device_allocator);
+ }
+
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_task_driver_create(
+ iree_make_cstring_view("vmvx"), &default_params, executor,
+ IREE_ARRAYSIZE(loaders), loaders, device_allocator, host_allocator,
+ out_driver);
+ }
+
+ iree_hal_allocator_release(device_allocator);
+ iree_task_executor_release(executor);
+ iree_hal_executable_loader_release(vmvx_loader);
+ iree_vm_instance_release(instance);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_vmvx_driver_module_register(iree_hal_driver_registry_t* registry) {
+ static const iree_hal_driver_factory_t factory = {
+ .self = NULL,
+ .enumerate = iree_hal_vmvx_driver_factory_enumerate,
+ .try_create = iree_hal_vmvx_driver_factory_try_create,
+ };
+ return iree_hal_driver_registry_register_factory(registry, &factory);
+}
diff --git a/runtime/src/iree/hal/vmvx/registration/driver_module.h b/runtime/src/iree/hal/vmvx/registration/driver_module.h
new file mode 100644
index 0000000..578d9c2
--- /dev/null
+++ b/runtime/src/iree/hal/vmvx/registration/driver_module.h
@@ -0,0 +1,24 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VMVX_REGISTRATION_DRIVER_MODULE_H_
+#define IREE_HAL_VMVX_REGISTRATION_DRIVER_MODULE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+IREE_API_EXPORT iree_status_t
+iree_hal_vmvx_driver_module_register(iree_hal_driver_registry_t* registry);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VMVX_REGISTRATION_DRIVER_MODULE_H_
diff --git a/runtime/src/iree/hal/vmvx/registration/driver_module_sync.c b/runtime/src/iree/hal/vmvx/registration/driver_module_sync.c
new file mode 100644
index 0000000..6a5fc70
--- /dev/null
+++ b/runtime/src/iree/hal/vmvx/registration/driver_module_sync.c
@@ -0,0 +1,92 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vmvx/registration/driver_module_sync.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/local/executable_loader.h"
+#include "iree/hal/local/loaders/vmvx_module_loader.h"
+#include "iree/hal/local/sync_device.h"
+#include "iree/hal/local/sync_driver.h"
+#include "iree/vm/api.h"
+
+// TODO(#4298): remove this driver registration and wrapper.
+
+// TODO(benvanik): replace with C flags.
+#define IREE_HAL_VMVX_WORKER_COUNT 0
+#define IREE_HAL_MAX_VMVX_WORKER_COUNT 16
+
+#define IREE_HAL_VMVX_SYNC_DRIVER_ID 0x53564D58u // SVMX
+
+static iree_status_t iree_hal_vmvx_sync_driver_factory_enumerate(
+ void* self, const iree_hal_driver_info_t** out_driver_infos,
+ iree_host_size_t* out_driver_info_count) {
+ static const iree_hal_driver_info_t driver_infos[1] = {
+ {
+ .driver_id = IREE_HAL_VMVX_SYNC_DRIVER_ID,
+ .driver_name = iree_string_view_literal("vmvx-sync"),
+ .full_name = iree_string_view_literal(
+ "synchronous VM-based reference backend"),
+ },
+ };
+ *out_driver_info_count = IREE_ARRAYSIZE(driver_infos);
+ *out_driver_infos = driver_infos;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vmvx_sync_driver_factory_try_create(
+ void* self, iree_hal_driver_id_t driver_id, iree_allocator_t host_allocator,
+ iree_hal_driver_t** out_driver) {
+ if (driver_id != IREE_HAL_VMVX_SYNC_DRIVER_ID) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "no driver with ID %016" PRIu64
+ " is provided by this factory",
+ driver_id);
+ }
+
+ iree_vm_instance_t* instance = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_instance_create(host_allocator, &instance));
+
+ iree_hal_executable_loader_t* vmvx_loader = NULL;
+ iree_status_t status = iree_hal_vmvx_module_loader_create(
+ instance, host_allocator, &vmvx_loader);
+ iree_hal_executable_loader_t* loaders[1] = {vmvx_loader};
+
+ iree_hal_allocator_t* device_allocator = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_allocator_create_heap(iree_make_cstring_view("vmvx"),
+ host_allocator, host_allocator,
+ &device_allocator);
+ }
+
+ // Set parameters for the device created in the next step.
+ iree_hal_sync_device_params_t default_params;
+ iree_hal_sync_device_params_initialize(&default_params);
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_sync_driver_create(
+ iree_make_cstring_view("vmvx"), &default_params,
+ IREE_ARRAYSIZE(loaders), loaders, device_allocator, host_allocator,
+ out_driver);
+ }
+
+ iree_hal_allocator_release(device_allocator);
+ iree_hal_executable_loader_release(vmvx_loader);
+ iree_vm_instance_release(instance);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_vmvx_sync_driver_module_register(
+ iree_hal_driver_registry_t* registry) {
+ static const iree_hal_driver_factory_t factory = {
+ .self = NULL,
+ .enumerate = iree_hal_vmvx_sync_driver_factory_enumerate,
+ .try_create = iree_hal_vmvx_sync_driver_factory_try_create,
+ };
+ return iree_hal_driver_registry_register_factory(registry, &factory);
+}
diff --git a/runtime/src/iree/hal/vmvx/registration/driver_module_sync.h b/runtime/src/iree/hal/vmvx/registration/driver_module_sync.h
new file mode 100644
index 0000000..a73006d
--- /dev/null
+++ b/runtime/src/iree/hal/vmvx/registration/driver_module_sync.h
@@ -0,0 +1,26 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VMVX_REGISTRATION_DRIVER_MODULE_SYNC_H_
+#define IREE_HAL_VMVX_REGISTRATION_DRIVER_MODULE_SYNC_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// DEPRECATED: this entire driver will be removed soon.
+// TODO(#3580): remove this entire driver w/ iree_hal_executable_library_t.
+IREE_API_EXPORT iree_status_t
+iree_hal_vmvx_sync_driver_module_register(iree_hal_driver_registry_t* registry);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VMVX_REGISTRATION_DRIVER_MODULE_SYNC_H_
diff --git a/runtime/src/iree/hal/vulkan/BUILD b/runtime/src/iree/hal/vulkan/BUILD
new file mode 100644
index 0000000..743310a
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/BUILD
@@ -0,0 +1,143 @@
+# Copyright 2019 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# HAL implementation using Vulkan and (likely) SPIR-V executables.
+
+load("//iree:build_defs.oss.bzl", "iree_cmake_extra_content", "iree_runtime_cc_library", "iree_runtime_cc_test")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_cmake_extra_content(
+ content = """
+if(NOT ${IREE_HAL_DRIVER_VULKAN})
+ return()
+endif()
+""",
+)
+
+iree_runtime_cc_library(
+ name = "vulkan",
+ srcs = [
+ "api.cc",
+ "builtin_executables.cc",
+ "builtin_executables.h",
+ "command_queue.h",
+ "debug_reporter.cc",
+ "debug_reporter.h",
+ "descriptor_pool_cache.cc",
+ "descriptor_pool_cache.h",
+ "descriptor_set_arena.cc",
+ "descriptor_set_arena.h",
+ "direct_command_buffer.cc",
+ "direct_command_buffer.h",
+ "direct_command_queue.cc",
+ "direct_command_queue.h",
+ "emulated_semaphore.cc",
+ "emulated_semaphore.h",
+ "extensibility_util.cc",
+ "extensibility_util.h",
+ "handle_util.h",
+ "internal_vk_mem_alloc.cc",
+ "internal_vk_mem_alloc.h",
+ "native_descriptor_set.cc",
+ "native_descriptor_set.h",
+ "native_descriptor_set_layout.cc",
+ "native_descriptor_set_layout.h",
+ "native_event.cc",
+ "native_event.h",
+ "native_executable.cc",
+ "native_executable.h",
+ "native_executable_layout.cc",
+ "native_executable_layout.h",
+ "native_semaphore.cc",
+ "native_semaphore.h",
+ "nop_executable_cache.cc",
+ "nop_executable_cache.h",
+ "serializing_command_queue.cc",
+ "serializing_command_queue.h",
+ "status_util.c",
+ "status_util.h",
+ "timepoint_util.cc",
+ "timepoint_util.h",
+ "tracing.cc",
+ "tracing.h",
+ "vma_allocator.cc",
+ "vma_allocator.h",
+ "vma_buffer.cc",
+ "vma_buffer.h",
+ "vulkan_device.cc",
+ "vulkan_driver.cc",
+ "vulkan_headers.h",
+ ],
+ hdrs = [
+ # TODO(benvanik): hide all but api.h.
+ "api.h",
+ "vulkan_device.h",
+ "vulkan_driver.h",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":dynamic_symbols",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:cc",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:logging",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal",
+ "//runtime/src/iree/base/internal:arena",
+ "//runtime/src/iree/base/internal:synchronization",
+ "//runtime/src/iree/base/internal/flatcc:parsing",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/utils:buffer_transfer",
+ "//runtime/src/iree/hal/utils:resource_set",
+ "//runtime/src/iree/hal/vulkan/builtin",
+ "//runtime/src/iree/hal/vulkan/util:arena",
+ "//runtime/src/iree/hal/vulkan/util:intrusive_list",
+ "//runtime/src/iree/hal/vulkan/util:ref_ptr",
+ "//runtime/src/iree/schemas:spirv_executable_def_c_fbs",
+ "@vulkan_headers",
+ "@vulkan_memory_allocator//:impl_header_only",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "dynamic_symbols",
+ srcs = [
+ "dynamic_symbols.cc",
+ "vulkan_headers.h",
+ ],
+ hdrs = [
+ "dynamic_symbols.h",
+ ],
+ textual_hdrs = [
+ "dynamic_symbol_tables.h",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:cc",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal:dynamic_library",
+ "//runtime/src/iree/hal/vulkan/util:ref_ptr",
+ "@vulkan_headers",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "dynamic_symbols_test",
+ srcs = ["dynamic_symbols_test.cc"],
+ tags = ["driver=vulkan"],
+ deps = [
+ ":dynamic_symbols",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
diff --git a/runtime/src/iree/hal/vulkan/CMakeLists.txt b/runtime/src/iree/hal/vulkan/CMakeLists.txt
new file mode 100644
index 0000000..17862ee
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/CMakeLists.txt
@@ -0,0 +1,134 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/vulkan/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+if(NOT ${IREE_HAL_DRIVER_VULKAN})
+ return()
+endif()
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ vulkan
+ HDRS
+ "api.h"
+ "vulkan_device.h"
+ "vulkan_driver.h"
+ SRCS
+ "api.cc"
+ "builtin_executables.cc"
+ "builtin_executables.h"
+ "command_queue.h"
+ "debug_reporter.cc"
+ "debug_reporter.h"
+ "descriptor_pool_cache.cc"
+ "descriptor_pool_cache.h"
+ "descriptor_set_arena.cc"
+ "descriptor_set_arena.h"
+ "direct_command_buffer.cc"
+ "direct_command_buffer.h"
+ "direct_command_queue.cc"
+ "direct_command_queue.h"
+ "emulated_semaphore.cc"
+ "emulated_semaphore.h"
+ "extensibility_util.cc"
+ "extensibility_util.h"
+ "handle_util.h"
+ "internal_vk_mem_alloc.cc"
+ "internal_vk_mem_alloc.h"
+ "native_descriptor_set.cc"
+ "native_descriptor_set.h"
+ "native_descriptor_set_layout.cc"
+ "native_descriptor_set_layout.h"
+ "native_event.cc"
+ "native_event.h"
+ "native_executable.cc"
+ "native_executable.h"
+ "native_executable_layout.cc"
+ "native_executable_layout.h"
+ "native_semaphore.cc"
+ "native_semaphore.h"
+ "nop_executable_cache.cc"
+ "nop_executable_cache.h"
+ "serializing_command_queue.cc"
+ "serializing_command_queue.h"
+ "status_util.c"
+ "status_util.h"
+ "timepoint_util.cc"
+ "timepoint_util.h"
+ "tracing.cc"
+ "tracing.h"
+ "vma_allocator.cc"
+ "vma_allocator.h"
+ "vma_buffer.cc"
+ "vma_buffer.h"
+ "vulkan_device.cc"
+ "vulkan_driver.cc"
+ "vulkan_headers.h"
+ DEPS
+ ::dynamic_symbols
+ Vulkan::Headers
+ iree::base
+ iree::base::cc
+ iree::base::core_headers
+ iree::base::internal
+ iree::base::internal::arena
+ iree::base::internal::flatcc::parsing
+ iree::base::internal::synchronization
+ iree::base::logging
+ iree::base::tracing
+ iree::hal
+ iree::hal::utils::buffer_transfer
+ iree::hal::utils::resource_set
+ iree::hal::vulkan::builtin
+ iree::hal::vulkan::util::arena
+ iree::hal::vulkan::util::intrusive_list
+ iree::hal::vulkan::util::ref_ptr
+ iree::schemas::spirv_executable_def_c_fbs
+ vulkan_memory_allocator
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ dynamic_symbols
+ HDRS
+ "dynamic_symbols.h"
+ TEXTUAL_HDRS
+ "dynamic_symbol_tables.h"
+ SRCS
+ "dynamic_symbols.cc"
+ "vulkan_headers.h"
+ DEPS
+ Vulkan::Headers
+ iree::base
+ iree::base::cc
+ iree::base::core_headers
+ iree::base::internal::dynamic_library
+ iree::base::tracing
+ iree::hal::vulkan::util::ref_ptr
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ dynamic_symbols_test
+ SRCS
+ "dynamic_symbols_test.cc"
+ DEPS
+ ::dynamic_symbols
+ iree::base
+ iree::testing::gtest
+ iree::testing::gtest_main
+ LABELS
+ "driver=vulkan"
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/hal/vulkan/api.cc b/runtime/src/iree/hal/vulkan/api.cc
new file mode 100644
index 0000000..f05a296
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/api.cc
@@ -0,0 +1,77 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/api.h"
+
+#include <cstring>
+#include <functional>
+#include <string>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+using namespace iree::hal::vulkan;
+
+// TODO(benvanik): move these into the appropriate files and delete this .cc.
+
+//===----------------------------------------------------------------------===//
+// iree::hal::vulkan::DynamicSymbols
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_hal_vulkan_syms_create(
+ void* vkGetInstanceProcAddr_fn, iree_allocator_t host_allocator,
+ iree_hal_vulkan_syms_t** out_syms) {
+ IREE_TRACE_SCOPE0("iree_hal_vulkan_syms_create");
+ IREE_ASSERT_ARGUMENT(out_syms);
+ *out_syms = nullptr;
+
+ iree::ref_ptr<iree::hal::vulkan::DynamicSymbols> syms;
+ IREE_RETURN_IF_ERROR(DynamicSymbols::Create(
+ [&vkGetInstanceProcAddr_fn](const char* function_name) {
+ // Only resolve vkGetInstanceProcAddr, rely on syms->LoadFromInstance()
+ // and/or syms->LoadFromDevice() for further loading.
+ std::string fn = "vkGetInstanceProcAddr";
+ if (strncmp(function_name, fn.data(), fn.size()) == 0) {
+ return reinterpret_cast<PFN_vkVoidFunction>(vkGetInstanceProcAddr_fn);
+ }
+ return reinterpret_cast<PFN_vkVoidFunction>(NULL);
+ },
+ &syms));
+
+ *out_syms = reinterpret_cast<iree_hal_vulkan_syms_t*>(syms.release());
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_vulkan_syms_create_from_system_loader(
+ iree_allocator_t host_allocator, iree_hal_vulkan_syms_t** out_syms) {
+ IREE_TRACE_SCOPE0("iree_hal_vulkan_syms_create_from_system_loader");
+ IREE_ASSERT_ARGUMENT(out_syms);
+ *out_syms = nullptr;
+
+ iree::ref_ptr<iree::hal::vulkan::DynamicSymbols> syms;
+ IREE_RETURN_IF_ERROR(DynamicSymbols::CreateFromSystemLoader(&syms));
+ *out_syms = reinterpret_cast<iree_hal_vulkan_syms_t*>(syms.release());
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT void iree_hal_vulkan_syms_retain(iree_hal_vulkan_syms_t* syms) {
+ IREE_ASSERT_ARGUMENT(syms);
+ auto* handle = reinterpret_cast<DynamicSymbols*>(syms);
+ if (handle) {
+ handle->AddReference();
+ }
+}
+
+IREE_API_EXPORT void iree_hal_vulkan_syms_release(
+ iree_hal_vulkan_syms_t* syms) {
+ IREE_ASSERT_ARGUMENT(syms);
+ auto* handle = reinterpret_cast<DynamicSymbols*>(syms);
+ if (handle) {
+ handle->ReleaseReference();
+ }
+}
diff --git a/runtime/src/iree/hal/vulkan/api.h b/runtime/src/iree/hal/vulkan/api.h
new file mode 100644
index 0000000..126b4f4
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/api.h
@@ -0,0 +1,268 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// See iree/base/api.h for documentation on the API conventions used.
+
+#ifndef IREE_HAL_VULKAN_API_H_
+#define IREE_HAL_VULKAN_API_H_
+
+#include <stdint.h>
+
+// clang-format off: must be included before all other headers.
+#include "iree/hal/vulkan/vulkan_headers.h"
+// clang-format on
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_hal_vulkan_device_t extensibility util
+//===----------------------------------------------------------------------===//
+
+// TODO(benvanik): replace with feature list (easier to version).
+// Bitfield that defines sets of Vulkan features.
+enum iree_hal_vulkan_feature_bits_t {
+ // Use VK_LAYER_KHRONOS_standard_validation to validate Vulkan API usage.
+ // Has a significant performance penalty and is *not* a security mechanism.
+ IREE_HAL_VULKAN_FEATURE_ENABLE_VALIDATION_LAYERS = 1u << 0,
+
+ // Use VK_EXT_debug_utils, record markers, and log errors.
+ IREE_HAL_VULKAN_FEATURE_ENABLE_DEBUG_UTILS = 1u << 1,
+
+ // Enables tracing of command buffers when IREE tracing is enabled.
+ // May take advantage of additional extensions for more accurate timing or
+ // hardware-specific performance counters.
+ //
+ // NOTE: tracing has a non-trivial overhead and will skew the timing of
+ // submissions and introduce false barriers between dispatches. Use this to
+ // identify slow dispatches and refine from there; be wary of whole-program
+ // tracing with this enabled.
+ IREE_HAL_VULKAN_FEATURE_ENABLE_TRACING = 1u << 2,
+};
+typedef uint32_t iree_hal_vulkan_features_t;
+
+// Describes the type of a set of Vulkan extensions.
+typedef enum iree_hal_vulkan_extensibility_set_e {
+ // A set of required instance layer names. These must all be enabled on
+ // the VkInstance for IREE to function.
+ IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_REQUIRED = 0,
+
+ // A set of optional instance layer names. If omitted fallbacks may be
+ // used or debugging features may not be available.
+ IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_OPTIONAL,
+
+ // A set of required instance extension names. These must all be enabled on
+ // the VkInstance for IREE to function.
+ IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_REQUIRED,
+
+ // A set of optional instance extension names. If omitted fallbacks may be
+ // used or debugging features may not be available.
+ IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_OPTIONAL,
+
+ // A set of required device extension names. These must all be enabled on
+ // the VkDevice for IREE to function.
+ IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_REQUIRED,
+
+ // A set of optional device extension names. If omitted fallbacks may be
+ // used or debugging features may not be available.
+ IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL,
+
+ IREE_HAL_VULKAN_EXTENSIBILITY_SET_COUNT, // used for sizing lookup tables
+} iree_hal_vulkan_extensibility_set_t;
+
+// Queries the names of the Vulkan layers and extensions used for a given set of
+// IREE |requested_features|. All devices used by IREE must have the required
+// layers and extensions as defined by these sets. Optional layers and
+// extensions will be used when needed and otherwise have fallbacks for when
+// they are not available.
+//
+// Instance extensions should be enabled on VkInstances passed to
+// |iree_hal_vulkan_driver_create_using_instance| and device extensions should
+// be enabled on VkDevices passed to |iree_hal_vulkan_driver_wrap_device|.
+//
+// |string_capacity| defines the number of elements available in
+// |out_string_values| and |out_string_count| will be set with the actual number
+// of strings returned. If |string_capacity| is too small then
+// IREE_STATUS_OUT_OF_RANGE will be returned with the required capacity in
+// |out_string_count|. To only query the required capacity then
+// |out_string_values| may be passed as NULL.
+//
+// The returned strings originate from the _EXTENSION_NAME Vulkan macros
+// (such as 'VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME') and have a
+// lifetime matching whatever module they are defined in.
+IREE_API_EXPORT iree_status_t iree_hal_vulkan_query_extensibility_set(
+ iree_hal_vulkan_features_t requested_features,
+ iree_hal_vulkan_extensibility_set_t set, iree_host_size_t string_capacity,
+ const char** out_string_values, iree_host_size_t* out_string_count);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_vulkan_syms_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_vulkan_syms_t iree_hal_vulkan_syms_t;
+
+// Loads Vulkan functions by invoking |vkGetInstanceProcAddr|.
+//
+// |vkGetInstanceProcAddr| can be obtained in whatever way suites the calling
+// application, such as via `dlsym` or `GetProcAddress` when dynamically
+// loading Vulkan, or `reinterpret_cast<void*>(&vkGetInstanceProcAddr)` when
+// statically linking Vulkan.
+//
+// |out_syms| must be released by the caller.
+IREE_API_EXPORT iree_status_t iree_hal_vulkan_syms_create(
+ void* vkGetInstanceProcAddr_fn, iree_allocator_t host_allocator,
+ iree_hal_vulkan_syms_t** out_syms);
+
+// Loads Vulkan functions from the Vulkan loader.
+// This will look for a Vulkan loader on the system (like libvulkan.so) and
+// dlsym the functions from that.
+//
+// |out_syms| must be released by the caller with iree_hal_vulkan_syms_release.
+IREE_API_EXPORT iree_status_t iree_hal_vulkan_syms_create_from_system_loader(
+ iree_allocator_t host_allocator, iree_hal_vulkan_syms_t** out_syms);
+
+// Retains the given |syms| for the caller.
+IREE_API_EXPORT void iree_hal_vulkan_syms_retain(iree_hal_vulkan_syms_t* syms);
+
+// Releases the given |syms| from the caller.
+IREE_API_EXPORT void iree_hal_vulkan_syms_release(iree_hal_vulkan_syms_t* syms);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_vulkan_device_t
+//===----------------------------------------------------------------------===//
+
+// A set of queues within a specific queue family on a VkDevice.
+typedef struct iree_hal_vulkan_queue_set_t {
+ // The index of a particular queue family on a VkPhysicalDevice, as described
+ // by vkGetPhysicalDeviceQueueFamilyProperties.
+ uint32_t queue_family_index;
+
+ // Bitfield of queue indices within the queue family at |queue_family_index|.
+ uint64_t queue_indices;
+} iree_hal_vulkan_queue_set_t;
+
+// TODO(benvanik): replace with flag list (easier to version).
+enum iree_hal_vulkan_device_flag_bits_t {
+ // Uses timeline semaphore emulation even if native support exists.
+ // May be removed in future versions when timeline semaphores can be assumed
+ // present on all platforms (looking at you, Android ಠ_ಠ).
+ IREE_HAL_VULKAN_DEVICE_FORCE_TIMELINE_SEMAPHORE_EMULATION = 1u << 0,
+};
+typedef uint32_t iree_hal_vulkan_device_flags_t;
+
+typedef struct iree_hal_vulkan_device_options_t {
+ // Flags controlling device behavior.
+ iree_hal_vulkan_device_flags_t flags;
+} iree_hal_vulkan_device_options_t;
+
+IREE_API_EXPORT void iree_hal_vulkan_device_options_initialize(
+ iree_hal_vulkan_device_options_t* out_options);
+
+// Creates a Vulkan HAL device that wraps an existing VkDevice.
+//
+// HAL devices created in this way may share Vulkan resources and synchronize
+// within the same physical VkPhysicalDevice and logical VkDevice directly.
+//
+// |logical_device| is expected to have been created with all extensions
+// returned by |iree_hal_vulkan_get_extensions| and
+// IREE_HAL_VULKAN_DEVICE_REQUIRED using the features provided during driver
+// creation.
+//
+// |instance_syms| must have at least the instance-specific functions resolved
+// and device symbols will be queried from |logical_device| as needed.
+//
+// The device will schedule commands against the queues in
+// |compute_queue_set| and (if set) |transfer_queue_set|.
+//
+// Applications may choose how these queues are created and selected in order
+// to control how commands submitted by this device are prioritized and
+// scheduled. For example, a low priority queue could be provided to one IREE
+// device for background processing or a high priority queue could be provided
+// for latency-sensitive processing.
+//
+// Dedicated compute queues (no graphics capabilities) are preferred within
+// |compute_queue_set|, if they are available.
+// Similarly, dedicated transfer queues (no compute or graphics) are preferred
+// within |transfer_queue_set|.
+// The queue sets can be the same.
+//
+// |out_device| must be released by the caller (see |iree_hal_device_release|).
+IREE_API_EXPORT iree_status_t iree_hal_vulkan_wrap_device(
+ iree_string_view_t identifier,
+ const iree_hal_vulkan_device_options_t* options,
+ const iree_hal_vulkan_syms_t* instance_syms, VkInstance instance,
+ VkPhysicalDevice physical_device, VkDevice logical_device,
+ const iree_hal_vulkan_queue_set_t* compute_queue_set,
+ const iree_hal_vulkan_queue_set_t* transfer_queue_set,
+ iree_allocator_t host_allocator, iree_hal_device_t** out_device);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_vulkan_driver_t
+//===----------------------------------------------------------------------===//
+
+// Vulkan driver creation options.
+typedef struct iree_hal_vulkan_driver_options_t {
+ // Vulkan version that will be requested, e.g. `VK_API_VERSION_1_0`.
+ // Driver creation will fail if the required version is not available.
+ uint32_t api_version;
+
+ // IREE features used to configure the VkInstance and VkDevices created using
+ // it. These are used to populate the active Vulkan layers and extensions when
+ // the instance and its devices are created.
+ iree_hal_vulkan_features_t requested_features;
+
+ // TODO(benvanik): remove this single setting - it would be nice instead to
+ // pass a list to force device enumeration/matrix expansion or omit entirely
+ // to have auto-discovered options based on capabilities. Right now this
+ // forces all devices - even if from different vendors - to have the same
+ // options.
+ // Options to use for all devices created by the driver.
+ iree_hal_vulkan_device_options_t device_options;
+
+ // TODO(benvanik): change to something more canonically vulkan (like
+ // VkPhysicalDeviceProperties::deviceID).
+ // Index of the default Vulkan device to use within the list of available
+ // devices. Devices are discovered via vkEnumeratePhysicalDevices then
+ // considered "available" if compatible with the |requested_features|.
+ int default_device_index;
+} iree_hal_vulkan_driver_options_t;
+
+IREE_API_EXPORT void iree_hal_vulkan_driver_options_initialize(
+ iree_hal_vulkan_driver_options_t* out_options);
+
+// Creates a Vulkan HAL driver that manages its own VkInstance.
+//
+// |out_driver| must be released by the caller (see |iree_hal_driver_release|).
+IREE_API_EXPORT iree_status_t iree_hal_vulkan_driver_create(
+ iree_string_view_t identifier,
+ const iree_hal_vulkan_driver_options_t* options,
+ iree_hal_vulkan_syms_t* syms, iree_allocator_t host_allocator,
+ iree_hal_driver_t** out_driver);
+
+// Creates a Vulkan HAL driver that shares an existing VkInstance.
+//
+// |instance| is expected to have been created with all extensions returned by
+// the instance-specific |iree_hal_vulkan_query_extensibility_set| queries.
+//
+// |instance| must remain valid for the life of |out_driver| and |out_driver|
+// itself must be released by the caller (see |iree_hal_driver_release|).
+IREE_API_EXPORT iree_status_t iree_hal_vulkan_driver_create_using_instance(
+ iree_string_view_t identifier,
+ const iree_hal_vulkan_driver_options_t* options,
+ iree_hal_vulkan_syms_t* instance_syms, VkInstance instance,
+ iree_allocator_t host_allocator, iree_hal_driver_t** out_driver);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_API_H_
diff --git a/runtime/src/iree/hal/vulkan/builtin/BUILD b/runtime/src/iree/hal/vulkan/builtin/BUILD
new file mode 100644
index 0000000..083e92f
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/builtin/BUILD
@@ -0,0 +1,24 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/embed_data:build_defs.bzl", "c_embed_data")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+c_embed_data(
+ name = "builtin",
+ srcs = [
+ "fill_unaligned.spv",
+ ],
+ c_file_output = "builtin_shaders_spv.c",
+ flatten = True,
+ h_file_output = "builtin_shaders_spv.h",
+ identifier = "builtin_shaders_spv",
+)
diff --git a/runtime/src/iree/hal/vulkan/builtin/CMakeLists.txt b/runtime/src/iree/hal/vulkan/builtin/CMakeLists.txt
new file mode 100644
index 0000000..b2c5b2e
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/builtin/CMakeLists.txt
@@ -0,0 +1,28 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/vulkan/builtin/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_c_embed_data(
+ NAME
+ builtin
+ SRCS
+ "fill_unaligned.spv"
+ C_FILE_OUTPUT
+ "builtin_shaders_spv.c"
+ H_FILE_OUTPUT
+ "builtin_shaders_spv.h"
+ IDENTIFIER
+ "builtin_shaders_spv"
+ FLATTEN
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/hal/vulkan/builtin/compile_shaders.sh b/runtime/src/iree/hal/vulkan/builtin/compile_shaders.sh
new file mode 100644
index 0000000..fd5f571
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/builtin/compile_shaders.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Compiles input .glsl files into output .spv binary files. As these files are
+# updated infrequently and their binary sizes are small, we check in both files
+# and don't take a hard dependency on the shader compiler tool.
+#
+# To use, ensure `glslc` is on your PATH (such as by installing the Vulkan SDK
+# or builting it from its source at https://github.com/google/shaderc) and run
+# the script.
+
+set -e
+set -x
+
+BUILTIN_DIR="$(dirname $0)"
+
+glslc \
+ -Os -fshader-stage=compute -mfmt=bin \
+ ${BUILTIN_DIR}/fill_unaligned.glsl \
+ -o ${BUILTIN_DIR}/fill_unaligned.spv
diff --git a/runtime/src/iree/hal/vulkan/builtin/fill_unaligned.glsl b/runtime/src/iree/hal/vulkan/builtin/fill_unaligned.glsl
new file mode 100644
index 0000000..9ba434e
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/builtin/fill_unaligned.glsl
@@ -0,0 +1,64 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#version 450
+
+// Polyfill for buffer fills that are not aligned to 4 byte offsets or lengths.
+// This only implements the unaligned edges of fill operations. vkCmdFillBuffer
+// should be used for the aligned interior (if any).
+//
+// Repeats the 4 byte value |fill_pattern| into |output_elements|, between
+// |fill_offset_bytes| and |fill_offset_bytes| + |fill_length_bytes|.
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 3, binding = 0) buffer OutputBuffer { uint output_elements[]; };
+
+layout(push_constant) uniform Constants {
+ // TODO(scotttodd): low and high for 8 byte pattern
+ uint fill_pattern;
+ uint fill_pattern_width; // should be 1 or 2 (or 8 later on)
+ uint fill_offset_bytes; // must be aligned to pattern width
+ uint fill_length_bytes;
+} input_constants;
+
+void FillBufferUnalignedHelper(uint fill_offset_bytes, uint fill_length_bytes) {
+ uint fill_aligned_offset = fill_offset_bytes % 4;
+ uint fill_aligned_start_bytes = fill_offset_bytes - fill_aligned_offset;
+ uint fill_aligned_start_index = fill_aligned_start_bytes / 4;
+
+ uint shifted_pattern = 0x00000000;
+ if (input_constants.fill_pattern_width == 1) {
+ // Shift the pattern into each segment that is within the fill range.
+ uint fill_start = fill_aligned_offset;
+ uint fill_end = min(4, fill_start + fill_length_bytes);
+ for (uint i = fill_start; i < fill_end; ++i) {
+ shifted_pattern |= input_constants.fill_pattern << (8 * i);
+ }
+ } else if (input_constants.fill_pattern_width == 2) {
+ // Shift the pattern into the only supported segment in the fill range.
+ shifted_pattern = input_constants.fill_pattern << (8 * fill_aligned_offset);
+ }
+ output_elements[fill_aligned_start_index] = shifted_pattern;
+}
+
+void main() {
+ uint start_byte = input_constants.fill_offset_bytes;
+ uint end_byte =
+ input_constants.fill_offset_bytes + input_constants.fill_length_bytes;
+
+ // Unaligned start fill, if needed.
+ if (start_byte % 4 != 0 || input_constants.fill_length_bytes < 4) {
+ FillBufferUnalignedHelper(start_byte, input_constants.fill_length_bytes);
+ }
+ // Unaligned end fill, if needed.
+ if ((end_byte % 4 != 0) &&
+ (start_byte % 4 + input_constants.fill_length_bytes > 4)) {
+ uint end_rounded_down = (end_byte / 4) * 4;
+ uint length_end = end_byte - end_rounded_down;
+ FillBufferUnalignedHelper(end_rounded_down, length_end);
+ }
+}
diff --git a/runtime/src/iree/hal/vulkan/builtin/fill_unaligned.spv b/runtime/src/iree/hal/vulkan/builtin/fill_unaligned.spv
new file mode 100644
index 0000000..d457e5d
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/builtin/fill_unaligned.spv
Binary files differ
diff --git a/runtime/src/iree/hal/vulkan/builtin_executables.cc b/runtime/src/iree/hal/vulkan/builtin_executables.cc
new file mode 100644
index 0000000..c7695b0
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/builtin_executables.cc
@@ -0,0 +1,201 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/builtin_executables.h"
+
+#include <cstddef>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/builtin/builtin_shaders_spv.h"
+#include "iree/hal/vulkan/native_descriptor_set.h"
+#include "iree/hal/vulkan/native_descriptor_set_layout.h"
+#include "iree/hal/vulkan/native_executable_layout.h"
+#include "iree/hal/vulkan/status_util.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+namespace {
+
+typedef struct iree_hal_vulkan_builtin_fill_unaligned_constants_t {
+ uint32_t fill_pattern;
+ uint32_t fill_pattern_width;
+ uint32_t fill_offset_bytes;
+ uint32_t fill_length_bytes;
+} iree_hal_vulkan_builtin_fill_unaligned_constants_t;
+
+static_assert(sizeof(iree_hal_vulkan_builtin_fill_unaligned_constants_t) ==
+ IREE_HAL_VULKAN_BUILTIN_PUSH_CONSTANT_COUNT,
+ "push constant count must match struct size");
+
+} // namespace
+
+BuiltinExecutables::BuiltinExecutables(VkDeviceHandle* logical_device)
+ : logical_device_(logical_device) {}
+
+BuiltinExecutables::~BuiltinExecutables() {
+ if (pipeline_ != VK_NULL_HANDLE) {
+ logical_device_->syms()->vkDestroyPipeline(*logical_device_, pipeline_,
+ logical_device_->allocator());
+ }
+
+ if (executable_layout_) {
+ iree_hal_executable_layout_destroy(executable_layout_);
+ }
+
+ for (size_t i = 0; i < IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET_COUNT; ++i) {
+ iree_hal_descriptor_set_layout_release(descriptor_set_layouts_[i]);
+ }
+}
+
+iree_status_t BuiltinExecutables::InitializeExecutables() {
+ IREE_TRACE_SCOPE();
+
+ // Create descriptor set layouts for our compute pipeline.
+ // Even though we're just using one set, we still need to create layout
+ // bindings for those preceding it.
+ for (size_t i = 0; i < IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET_COUNT; ++i) {
+ iree_hal_descriptor_set_layout_t* layout = NULL;
+ iree_hal_descriptor_set_layout_binding_t layout_binding;
+ layout_binding.binding = 0;
+ layout_binding.type = IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_native_descriptor_set_layout_create(
+ logical_device_,
+ i < IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET
+ ? IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_IMMUTABLE
+ : IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_PUSH_ONLY,
+ /*binding_count=*/1, &layout_binding, &layout));
+ descriptor_set_layouts_[i] = layout;
+ }
+
+ iree_status_t status = iree_ok_status();
+
+ // Create shader module.
+ VkShaderModule fill_unaligned_shader = VK_NULL_HANDLE;
+ if (iree_status_is_ok(status)) {
+ VkShaderModuleCreateInfo shader_create_info;
+ shader_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+ shader_create_info.pNext = NULL;
+ shader_create_info.flags = 0;
+ shader_create_info.codeSize = builtin_shaders_spv_create()[0].size;
+ shader_create_info.pCode =
+ (const uint32_t*)builtin_shaders_spv_create()[0].data;
+ status = VK_RESULT_TO_STATUS(logical_device_->syms()->vkCreateShaderModule(
+ *logical_device_, &shader_create_info, logical_device_->allocator(),
+ &fill_unaligned_shader));
+ }
+
+ // Create pipeline layout.
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_vulkan_native_executable_layout_create(
+ logical_device_, IREE_HAL_VULKAN_BUILTIN_PUSH_CONSTANT_COUNT / 4,
+ IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET_COUNT, descriptor_set_layouts_,
+ &executable_layout_);
+ }
+
+ // Create pipeline.
+ if (iree_status_is_ok(status)) {
+ VkComputePipelineCreateInfo pipeline_create_info;
+ pipeline_create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
+ pipeline_create_info.pNext = NULL;
+ pipeline_create_info.flags = VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT;
+ pipeline_create_info.layout =
+ iree_hal_vulkan_native_executable_layout_handle(executable_layout_);
+ pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE;
+ pipeline_create_info.basePipelineIndex = 0;
+ VkPipelineShaderStageCreateInfo* stage_create_info =
+ &pipeline_create_info.stage;
+ stage_create_info->sType =
+ VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+ stage_create_info->pNext = NULL;
+ stage_create_info->flags = 0;
+ stage_create_info->stage = VK_SHADER_STAGE_COMPUTE_BIT;
+ stage_create_info->module = fill_unaligned_shader;
+ stage_create_info->pName = "main";
+ stage_create_info->pSpecializationInfo = NULL;
+ status =
+ VK_RESULT_TO_STATUS(logical_device_->syms()->vkCreateComputePipelines(
+ *logical_device_, /*pipeline_cache=*/VK_NULL_HANDLE,
+ /*pipeline_count=*/1, &pipeline_create_info,
+ logical_device_->allocator(), &pipeline_));
+ }
+
+ // Destroy shader module now that the pipeline is created.
+ if (fill_unaligned_shader != VK_NULL_HANDLE) {
+ logical_device_->syms()->vkDestroyShaderModule(
+ *logical_device_, fill_unaligned_shader, logical_device_->allocator());
+ }
+
+ return status;
+}
+
+iree_status_t BuiltinExecutables::FillBufferUnaligned(
+ VkCommandBuffer command_buffer, DescriptorSetArena* descriptor_set_arena,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length, const void* pattern,
+ iree_host_size_t pattern_length, const void* push_constants_to_restore) {
+ IREE_TRACE_SCOPE();
+
+ iree_hal_vulkan_builtin_fill_unaligned_constants_t constants;
+ switch (pattern_length) {
+ case 1:
+ constants.fill_pattern = *static_cast<const uint8_t*>(pattern);
+ break;
+ case 2:
+ constants.fill_pattern = *static_cast<const uint16_t*>(pattern);
+ break;
+ case 4:
+ constants.fill_pattern = *static_cast<const uint32_t*>(pattern);
+ break;
+ default:
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "pattern length (%" PRIhsz
+ ") is not a power of two or is too large",
+ pattern_length);
+ }
+
+ iree_hal_descriptor_set_binding_t binding;
+ binding.binding = 0;
+ binding.buffer = target_buffer;
+ binding.offset = 0;
+ binding.length = IREE_WHOLE_BUFFER;
+ IREE_RETURN_IF_ERROR(descriptor_set_arena->BindDescriptorSet(
+ command_buffer, executable_layout_,
+ IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET, /*binding_count=*/1, &binding));
+
+ logical_device_->syms()->vkCmdBindPipeline(
+ command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_);
+
+ constants.fill_pattern_width = pattern_length;
+ constants.fill_offset_bytes = target_offset;
+ constants.fill_length_bytes = length;
+ logical_device_->syms()->vkCmdPushConstants(
+ command_buffer,
+ iree_hal_vulkan_native_executable_layout_handle(executable_layout_),
+ VK_SHADER_STAGE_COMPUTE_BIT, /*offset=*/0,
+ sizeof(iree_hal_vulkan_builtin_fill_unaligned_constants_t), &constants);
+
+ // TODO(scotttodd): insert memory barrier if we need to do dispatch<->dispatch
+ // synchronization. The barriers inserted normally by callers would be for
+ // transfer<->dispatch.
+
+ logical_device_->syms()->vkCmdDispatch(command_buffer, 1, 1, 1);
+
+ // Restore push constants.
+ logical_device_->syms()->vkCmdPushConstants(
+ command_buffer,
+ iree_hal_vulkan_native_executable_layout_handle(executable_layout_),
+ VK_SHADER_STAGE_COMPUTE_BIT, /*offset=*/0,
+ sizeof(iree_hal_vulkan_builtin_fill_unaligned_constants_t),
+ push_constants_to_restore);
+
+ return iree_ok_status();
+}
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
diff --git a/runtime/src/iree/hal/vulkan/builtin_executables.h b/runtime/src/iree/hal/vulkan/builtin_executables.h
new file mode 100644
index 0000000..ea25102
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/builtin_executables.h
@@ -0,0 +1,69 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_BUILTIN_EXECUTABLES_H_
+#define IREE_HAL_VULKAN_BUILTIN_EXECUTABLES_H_
+
+#include <vector>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/descriptor_set_arena.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/handle_util.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+// The `maxBoundDescriptorSets` limit is 4 on many devices we support and we
+// want to avoid conflicts with what the compiler uses, so we'll expect the
+// compiler to have reserved the index 3 for our exclusive use.
+#define IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET_COUNT 4
+#define IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET 3
+
+#define IREE_HAL_VULKAN_BUILTIN_PUSH_CONSTANT_COUNT 16
+
+class BuiltinExecutables {
+ public:
+ BuiltinExecutables(VkDeviceHandle* logical_device);
+ ~BuiltinExecutables();
+
+ const ref_ptr<DynamicSymbols>& syms() const {
+ return logical_device_->syms();
+ }
+
+ iree_status_t InitializeExecutables();
+
+ // Fills a buffer without 4 byte offset or length requirements.
+ //
+ // This only implements the unaligned edges of fills, vkCmdFillBuffer should
+ // be used for the aligned interior (if any).
+ //
+ // |push_constants_to_restore| will be pushed using vkCmdPushConstants over
+ // the bytes used by this call.
+ iree_status_t FillBufferUnaligned(
+ VkCommandBuffer command_buffer, DescriptorSetArena* descriptor_set_arena,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length, const void* pattern,
+ iree_host_size_t pattern_length, const void* push_constants_to_restore);
+
+ private:
+ VkDeviceHandle* logical_device_ = NULL;
+
+ iree_hal_descriptor_set_layout_t*
+ descriptor_set_layouts_[IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET_COUNT] = {
+ NULL};
+ iree_hal_executable_layout_t* executable_layout_ = NULL;
+ VkPipeline pipeline_ = VK_NULL_HANDLE;
+};
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_VULKAN_BUILTIN_EXECUTABLES_H_
diff --git a/runtime/src/iree/hal/vulkan/command_queue.h b/runtime/src/iree/hal/vulkan/command_queue.h
new file mode 100644
index 0000000..b8d73aa
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/command_queue.h
@@ -0,0 +1,78 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_COMMAND_QUEUE_H_
+#define IREE_HAL_VULKAN_COMMAND_QUEUE_H_
+
+#include <string>
+
+#include "iree/base/internal/synchronization.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/handle_util.h"
+#include "iree/hal/vulkan/tracing.h"
+#include "iree/hal/vulkan/util/arena.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+class CommandQueue {
+ public:
+ virtual ~CommandQueue() {
+ IREE_TRACE_SCOPE0("CommandQueue::dtor");
+ iree_slim_mutex_lock(&queue_mutex_);
+ syms()->vkQueueWaitIdle(queue_);
+ iree_slim_mutex_unlock(&queue_mutex_);
+ iree_slim_mutex_deinitialize(&queue_mutex_);
+ }
+
+ const ref_ptr<DynamicSymbols>& syms() const {
+ return logical_device_->syms();
+ }
+
+ VkQueue handle() const { return queue_; }
+
+ iree_hal_vulkan_tracing_context_t* tracing_context() {
+ return tracing_context_;
+ }
+ void set_tracing_context(iree_hal_vulkan_tracing_context_t* tracing_context) {
+ tracing_context_ = tracing_context;
+ }
+
+ bool can_dispatch() const {
+ return iree_all_bits_set(supported_categories_,
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH);
+ }
+ virtual iree_status_t Submit(iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches) = 0;
+
+ virtual iree_status_t WaitIdle(iree_timeout_t timeout) = 0;
+
+ protected:
+ CommandQueue(VkDeviceHandle* logical_device,
+ iree_hal_command_category_t supported_categories, VkQueue queue)
+ : logical_device_(logical_device),
+ supported_categories_(supported_categories),
+ queue_(queue) {
+ iree_slim_mutex_initialize(&queue_mutex_);
+ }
+
+ VkDeviceHandle* logical_device_;
+ const iree_hal_command_category_t supported_categories_;
+
+ iree_hal_vulkan_tracing_context_t* tracing_context_ = nullptr;
+
+ // VkQueue needs to be externally synchronized.
+ iree_slim_mutex_t queue_mutex_;
+ VkQueue queue_ IREE_GUARDED_BY(queue_mutex_);
+};
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_VULKAN_COMMAND_QUEUE_H_
diff --git a/runtime/src/iree/hal/vulkan/cts/CMakeLists.txt b/runtime/src/iree/hal/vulkan/cts/CMakeLists.txt
new file mode 100644
index 0000000..17faa3b
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/cts/CMakeLists.txt
@@ -0,0 +1,23 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+iree_hal_cts_test_suite(
+ DRIVER_NAME
+ vulkan
+ DRIVER_REGISTRATION_HDR
+ "runtime/src/iree/hal/vulkan/registration/driver_module.h"
+ DRIVER_REGISTRATION_FN
+ "iree_hal_vulkan_driver_module_register"
+ COMPILER_TARGET_BACKEND
+ "vulkan-spirv"
+ EXECUTABLE_FORMAT
+ "\"SPVE\""
+ DEPS
+ iree::hal::vulkan::registration
+ EXCLUDED_TESTS
+ # Non-push descriptor sets are not implemented in the Vulkan backend yet.
+ "descriptor_set"
+)
diff --git a/runtime/src/iree/hal/vulkan/debug_reporter.cc b/runtime/src/iree/hal/vulkan/debug_reporter.cc
new file mode 100644
index 0000000..6777596
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/debug_reporter.cc
@@ -0,0 +1,127 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/debug_reporter.h"
+
+#include <cstddef>
+#include <ostream>
+
+#include "iree/base/logging.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/status_util.h"
+
+struct iree_hal_vulkan_debug_reporter_t {
+ iree_allocator_t host_allocator;
+ VkInstance instance;
+ iree::hal::vulkan::DynamicSymbols* syms;
+ const VkAllocationCallbacks* allocation_callbacks;
+ VkDebugUtilsMessengerEXT messenger;
+};
+
+// NOTE: |user_data| may be nullptr if we are being called during instance
+// creation. Otherwise it is a pointer to the DebugReporter instance.
+//
+// NOTE: this callback must be thread safe and must be careful not to reach too
+// far outside of the call - it is called in-context from arbitrary threads with
+// some amount of Vulkan state on the stack. Assume that creating or deleting
+// Vulkan objects, issuing most Vulkan commands, etc are off-limits.
+static VKAPI_ATTR VkBool32 VKAPI_CALL
+iree_hal_vulkan_debug_utils_message_callback(
+ VkDebugUtilsMessageSeverityFlagBitsEXT message_severity,
+ VkDebugUtilsMessageTypeFlagsEXT message_type,
+ const VkDebugUtilsMessengerCallbackDataEXT* callback_data,
+ void* user_data) {
+ if (message_severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
+ IREE_LOG(ERROR) << callback_data->pMessage;
+ } else {
+ IREE_VLOG(1) << callback_data->pMessage;
+ }
+ return VK_FALSE; // VK_TRUE is reserved for future use.
+}
+
+// Populates |create_info| with an instance-agnostic callback.
+// This can be used during instance creation by chaining the |create_info| to
+// VkInstanceCreateInfo::pNext.
+//
+// Only use if VK_EXT_debug_utils is present.
+static void iree_hal_vulkan_debug_reporter_populate_create_info(
+ VkDebugUtilsMessengerCreateInfoEXT* out_create_info) {
+ out_create_info->sType =
+ VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
+ out_create_info->pNext = nullptr;
+ out_create_info->flags = 0;
+
+ // TODO(benvanik): only enable the severities that logging has enabled.
+ out_create_info->messageSeverity =
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
+
+ // TODO(benvanik): allow filtering by category as a flag.
+ out_create_info->messageType =
+ VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
+
+ out_create_info->pfnUserCallback =
+ iree_hal_vulkan_debug_utils_message_callback;
+ out_create_info->pUserData = nullptr;
+}
+
+iree_status_t iree_hal_vulkan_debug_reporter_allocate(
+ VkInstance instance, iree::hal::vulkan::DynamicSymbols* syms,
+ const VkAllocationCallbacks* allocation_callbacks,
+ iree_allocator_t host_allocator,
+ iree_hal_vulkan_debug_reporter_t** out_reporter) {
+ IREE_ASSERT_ARGUMENT(instance);
+ IREE_ASSERT_ARGUMENT(syms);
+ IREE_ASSERT_ARGUMENT(out_reporter);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Allocate our struct first as we need to pass the pointer to the userdata
+ // of the messager instance when we create it.
+ iree_hal_vulkan_debug_reporter_t* reporter = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(host_allocator, sizeof(*reporter),
+ (void**)&reporter));
+ reporter->host_allocator = host_allocator;
+ reporter->instance = instance;
+ reporter->syms = syms;
+ reporter->allocation_callbacks = allocation_callbacks;
+
+ VkDebugUtilsMessengerCreateInfoEXT create_info;
+ iree_hal_vulkan_debug_reporter_populate_create_info(&create_info);
+ create_info.pUserData = reporter;
+ iree_status_t status = VK_RESULT_TO_STATUS(
+ syms->vkCreateDebugUtilsMessengerEXT(
+ instance, &create_info, allocation_callbacks, &reporter->messenger),
+ "vkCreateDebugUtilsMessengerEXT");
+
+ if (iree_status_is_ok(status)) {
+ *out_reporter = reporter;
+ } else {
+ iree_hal_vulkan_debug_reporter_free(reporter);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_hal_vulkan_debug_reporter_free(
+ iree_hal_vulkan_debug_reporter_t* reporter) {
+ if (!reporter) return;
+ iree_allocator_t host_allocator = reporter->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ if (reporter->messenger != VK_NULL_HANDLE) {
+ reporter->syms->vkDestroyDebugUtilsMessengerEXT(
+ reporter->instance, reporter->messenger,
+ reporter->allocation_callbacks);
+ }
+ iree_allocator_free(host_allocator, reporter);
+
+ IREE_TRACE_ZONE_END(z0);
+}
diff --git a/runtime/src/iree/hal/vulkan/debug_reporter.h b/runtime/src/iree/hal/vulkan/debug_reporter.h
new file mode 100644
index 0000000..5ddf85c
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/debug_reporter.h
@@ -0,0 +1,36 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_DEBUG_REPORTER_H_
+#define IREE_HAL_VULKAN_DEBUG_REPORTER_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+
+// A debug reporter that works with the VK_EXT_debug_utils extension.
+// One reporter should be created per VkInstance to receive callbacks from the
+// API and route them to our logging systems.
+//
+// Since creating a reporter requires a VkInstance it's not possible to report
+// on messages during instance creation. To work around this it's possible to
+// pass a *CreateInfo struct to vkCreateInstance as part of the
+// VkInstanceCreateInfo::pNext chain. The callback will only be used this way
+// during the creation call after which users can create the real
+// instance-specific reporter.
+typedef struct iree_hal_vulkan_debug_reporter_t
+ iree_hal_vulkan_debug_reporter_t;
+
+iree_status_t iree_hal_vulkan_debug_reporter_allocate(
+ VkInstance instance, iree::hal::vulkan::DynamicSymbols* syms,
+ const VkAllocationCallbacks* allocation_callbacks,
+ iree_allocator_t host_allocator,
+ iree_hal_vulkan_debug_reporter_t** out_reporter);
+
+void iree_hal_vulkan_debug_reporter_free(
+ iree_hal_vulkan_debug_reporter_t* reporter);
+
+#endif // IREE_HAL_VULKAN_DEBUG_REPORTER_H_
diff --git a/runtime/src/iree/hal/vulkan/descriptor_pool_cache.cc b/runtime/src/iree/hal/vulkan/descriptor_pool_cache.cc
new file mode 100644
index 0000000..a62d4d1
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/descriptor_pool_cache.cc
@@ -0,0 +1,102 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/descriptor_pool_cache.h"
+
+#include <array>
+#include <cstdint>
+#include <ostream>
+
+#include "iree/base/logging.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/status_util.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+namespace {
+
+// TODO(benvanik): be more conservative with descriptor set count or allow
+// chaining in the command buffer when pools run out.
+static constexpr int kMaxDescriptorSets = 4096;
+
+} // namespace
+
+DescriptorSetGroup::~DescriptorSetGroup() {
+ IREE_CHECK(descriptor_pools_.empty())
+ << "DescriptorSetGroup must be reset explicitly";
+}
+
+iree_status_t DescriptorSetGroup::Reset() {
+ IREE_TRACE_SCOPE0("DescriptorSetGroup::Reset");
+
+ if (descriptor_pool_cache_ != nullptr) {
+ IREE_RETURN_IF_ERROR(
+ descriptor_pool_cache_->ReleaseDescriptorPools(descriptor_pools_));
+ }
+ descriptor_pools_.clear();
+
+ return iree_ok_status();
+}
+
+DescriptorPoolCache::DescriptorPoolCache(VkDeviceHandle* logical_device)
+ : logical_device_(logical_device) {}
+
+iree_status_t DescriptorPoolCache::AcquireDescriptorPool(
+ VkDescriptorType descriptor_type, int max_descriptor_count,
+ DescriptorPool* out_descriptor_pool) {
+ IREE_TRACE_SCOPE0("DescriptorPoolCache::AcquireDescriptorPool");
+
+ // TODO(benvanik): lookup in cache.
+
+ VkDescriptorPoolCreateInfo create_info;
+ create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
+ create_info.pNext = nullptr;
+ create_info.flags = 0;
+ create_info.maxSets = kMaxDescriptorSets;
+ std::array<VkDescriptorPoolSize, 1> pool_sizes;
+ pool_sizes[0].type = descriptor_type;
+ pool_sizes[0].descriptorCount = max_descriptor_count * create_info.maxSets;
+ create_info.poolSizeCount = static_cast<uint32_t>(pool_sizes.size());
+ create_info.pPoolSizes = pool_sizes.data();
+
+ DescriptorPool descriptor_pool;
+ descriptor_pool.descriptor_type = descriptor_type;
+ descriptor_pool.handle = VK_NULL_HANDLE;
+
+ VK_RETURN_IF_ERROR(syms().vkCreateDescriptorPool(
+ *logical_device_, &create_info,
+ logical_device_->allocator(), &descriptor_pool.handle),
+ "vkCreateDescriptorPool");
+
+ *out_descriptor_pool = descriptor_pool;
+ return iree_ok_status();
+}
+
+iree_status_t DescriptorPoolCache::ReleaseDescriptorPools(
+ const std::vector<DescriptorPool>& descriptor_pools) {
+ IREE_TRACE_SCOPE0("DescriptorPoolCache::ReleaseDescriptorPools");
+
+ for (const auto& descriptor_pool : descriptor_pools) {
+ // Always reset immediately. We could do this on allocation instead however
+ // this leads to better errors when using the validation layers as we'll
+ // throw if there are in-flight command buffers using the sets in the pool.
+ VK_RETURN_IF_ERROR(syms().vkResetDescriptorPool(*logical_device_,
+ descriptor_pool.handle, 0),
+ "vkResetDescriptorPool");
+
+ // TODO(benvanik): release to cache.
+ syms().vkDestroyDescriptorPool(*logical_device_, descriptor_pool.handle,
+ logical_device_->allocator());
+ }
+
+ return iree_ok_status();
+}
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
diff --git a/runtime/src/iree/hal/vulkan/descriptor_pool_cache.h b/runtime/src/iree/hal/vulkan/descriptor_pool_cache.h
new file mode 100644
index 0000000..9e4259e
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/descriptor_pool_cache.h
@@ -0,0 +1,97 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_DESCRIPTOR_POOL_CACHE_H_
+#define IREE_HAL_VULKAN_DESCRIPTOR_POOL_CACHE_H_
+
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "iree/base/api.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/handle_util.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+class DescriptorPoolCache;
+
+// A descriptor pool with a single descriptor type of some number.
+// We only support a single descriptor type for now as we only generate SPIR-V
+// that uses a single type.
+struct DescriptorPool {
+ // Type of the descriptor in the set.
+ VkDescriptorType descriptor_type = VK_DESCRIPTOR_TYPE_MAX_ENUM;
+ // Pool handle.
+ VkDescriptorPool handle = VK_NULL_HANDLE;
+};
+
+// A group of descriptor sets allocated and released together.
+// The group must be explicitly reset with Reset() prior to disposing.
+class DescriptorSetGroup final {
+ public:
+ DescriptorSetGroup() = default;
+ DescriptorSetGroup(DescriptorPoolCache* descriptor_pool_cache,
+ std::vector<DescriptorPool> descriptor_pools)
+ : descriptor_pool_cache_(descriptor_pool_cache),
+ descriptor_pools_(std::move(descriptor_pools)) {}
+ DescriptorSetGroup(const DescriptorSetGroup&) = delete;
+ DescriptorSetGroup& operator=(const DescriptorSetGroup&) = delete;
+ DescriptorSetGroup(DescriptorSetGroup&& other) noexcept
+ : descriptor_pool_cache_(std::move(other.descriptor_pool_cache_)),
+ descriptor_pools_(std::move(other.descriptor_pools_)) {}
+ DescriptorSetGroup& operator=(DescriptorSetGroup&& other) {
+ std::swap(descriptor_pool_cache_, other.descriptor_pool_cache_);
+ std::swap(descriptor_pools_, other.descriptor_pools_);
+ return *this;
+ }
+ ~DescriptorSetGroup();
+
+ iree_status_t Reset();
+
+ private:
+ DescriptorPoolCache* descriptor_pool_cache_;
+ std::vector<DescriptorPool> descriptor_pools_;
+};
+
+// A "cache" (or really, pool) of descriptor pools. These pools are allocated
+// as needed to satisfy different descriptor size requirements and are given
+// to command buffers during recording to write descriptor updates and bind
+// resources. After the descriptors in the pool are no longer used (all
+// command buffers using descriptor sets allocated from the pool have retired)
+// the pool is returned here to be reused in the future.
+class DescriptorPoolCache final {
+ public:
+ explicit DescriptorPoolCache(VkDeviceHandle* logical_device);
+
+ VkDeviceHandle* logical_device() const { return logical_device_; }
+ const DynamicSymbols& syms() const { return *logical_device_->syms(); }
+
+ // Acquires a new descriptor pool for use by the caller.
+ // The pool will have been reset and have all descriptor sets available.
+ // When all sets allocated from the pool are no longer in use it must be
+ // returned to the cache with ReleaseDescriptorPool.
+ iree_status_t AcquireDescriptorPool(VkDescriptorType descriptor_type,
+ int max_descriptor_count,
+ DescriptorPool* out_descriptor_pool);
+
+ // Releases descriptor pools back to the cache. The pools will be reset
+ // immediately and must no longer be in use by any in-flight command.
+ iree_status_t ReleaseDescriptorPools(
+ const std::vector<DescriptorPool>& descriptor_pools);
+
+ private:
+ VkDeviceHandle* logical_device_;
+};
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_VULKAN_DESCRIPTOR_POOL_CACHE_H_
diff --git a/runtime/src/iree/hal/vulkan/descriptor_set_arena.cc b/runtime/src/iree/hal/vulkan/descriptor_set_arena.cc
new file mode 100644
index 0000000..cefa6bc
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/descriptor_set_arena.cc
@@ -0,0 +1,259 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/descriptor_set_arena.h"
+
+#include <cstddef>
+#include <type_traits>
+#include <utility>
+
+#include "iree/base/internal/math.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/extensibility_util.h"
+#include "iree/hal/vulkan/native_descriptor_set_layout.h"
+#include "iree/hal/vulkan/native_executable_layout.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/vma_buffer.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+namespace {
+
+static void PopulateDescriptorSetWriteInfos(
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings, VkDescriptorSet dst_set,
+ Arena* arena, iree_host_size_t* out_info_count,
+ VkWriteDescriptorSet** out_infos) {
+ arena->Reset();
+ auto buffer_infos =
+ arena->AllocateSpan<VkDescriptorBufferInfo>(binding_count);
+ auto write_infos = arena->AllocateSpan<VkWriteDescriptorSet>(binding_count);
+
+ for (int i = 0; i < binding_count; ++i) {
+ const auto& binding = bindings[i];
+
+ auto& buffer_info = buffer_infos[i];
+ buffer_info.buffer = iree_hal_vulkan_vma_buffer_handle(
+ iree_hal_buffer_allocated_buffer(binding.buffer));
+ buffer_info.offset =
+ iree_hal_buffer_byte_offset(binding.buffer) + binding.offset;
+ if (binding.length == IREE_WHOLE_BUFFER) {
+ buffer_info.range = VK_WHOLE_SIZE;
+ } else {
+ // Round up to a multiple of 32-bit. 32-bit is the most native bitwidth on
+ // GPUs; it has the best support compared to other bitwidths. We use VMA
+ // to manage GPU memory for us and VMA should already handled proper
+ // alignment when performing allocations; here we just need to provide the
+ // proper "view" to Vulkan drivers over the allocated memory.
+ //
+ // Note this is needed because we can see unusal buffers like
+ // tensor<3xi8>. Depending on GPU capabilities, this might not always be
+ // directly supported by the hardware. Under such circumstances, we need
+ // to emulate i8 support with i32. Shader CodeGen takes care of that: the
+ // shader will read the buffer as tensor<i32> and perform bit shifts to
+ // extract each byte and conduct computations. The extra additional byte
+ // is read but not really used by the shader. Here in application we need
+ // to match the ABI and provide the buffer as 32-bit aligned, otherwise
+ // the whole read by the shader is considered as out of bounds per the
+ // Vulkan spec. See
+ // https://github.com/google/iree/issues/2022#issuecomment-640617234 for
+ // more details.
+ buffer_info.range = iree_device_align(
+ std::min(binding.length, iree_hal_buffer_byte_length(binding.buffer) -
+ binding.offset),
+ 4);
+ }
+
+ auto& write_info = write_infos[i];
+ write_info.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+ write_info.pNext = nullptr;
+ write_info.dstSet = dst_set;
+ write_info.dstBinding = binding.binding;
+ write_info.dstArrayElement = 0;
+ write_info.descriptorCount = 1;
+ write_info.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+ write_info.pImageInfo = nullptr;
+ write_info.pBufferInfo = &buffer_info;
+ write_info.pTexelBufferView = nullptr;
+ }
+
+ *out_info_count = write_infos.size();
+ *out_infos = write_infos.data();
+}
+
+static VkDescriptorSetAllocateInfo PopulateDescriptorSetsAllocateInfo(
+ const DescriptorPool& descriptor_pool,
+ iree_hal_descriptor_set_layout_t* set_layout) {
+ VkDescriptorSetAllocateInfo allocate_info;
+ allocate_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
+ allocate_info.pNext = nullptr;
+ allocate_info.descriptorPool = descriptor_pool.handle;
+
+ VkDescriptorSetLayout set_layout_handle =
+ iree_hal_vulkan_native_descriptor_set_layout_handle(set_layout);
+ allocate_info.descriptorSetCount = 1;
+ allocate_info.pSetLayouts = &set_layout_handle;
+
+ return allocate_info;
+}
+
+} // namespace
+
+DescriptorSetArena::DescriptorSetArena(
+ DescriptorPoolCache* descriptor_pool_cache)
+ : logical_device_(descriptor_pool_cache->logical_device()),
+ descriptor_pool_cache_(descriptor_pool_cache) {}
+
+DescriptorSetArena::~DescriptorSetArena() {
+ if (!used_descriptor_pools_.empty()) {
+ iree_status_ignore(
+ descriptor_pool_cache_->ReleaseDescriptorPools(used_descriptor_pools_));
+ used_descriptor_pools_.clear();
+ }
+}
+
+iree_status_t DescriptorSetArena::BindDescriptorSet(
+ VkCommandBuffer command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings) {
+ // Always prefer using push descriptors when available as we can avoid the
+ // additional API overhead of updating/resetting pools.
+ if (logical_device_->enabled_extensions().push_descriptors) {
+ PushDescriptorSet(command_buffer, executable_layout, set, binding_count,
+ bindings);
+ return iree_ok_status();
+ }
+
+ IREE_TRACE_SCOPE0("DescriptorSetArena::BindDescriptorSet");
+
+ auto* set_layout =
+ iree_hal_vulkan_native_executable_layout_set(executable_layout, set);
+
+ // Pick a bucket based on the number of descriptors required.
+ // NOTE: right now we are 1:1 with bindings.
+ uint32_t required_descriptor_count = static_cast<int>(binding_count * 1);
+ uint32_t max_descriptor_count =
+ std::max(8u, iree_math_round_up_to_pow2_u32(required_descriptor_count));
+ uint32_t bucket =
+ iree_math_count_trailing_zeros_u32(max_descriptor_count >> 3);
+ if (bucket >= descriptor_pool_buckets_.size()) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "too many descriptors required: %u (max=%u)",
+ required_descriptor_count,
+ (1 << (descriptor_pool_buckets_.size() + 3)));
+ }
+ if (descriptor_pool_buckets_[bucket].handle == VK_NULL_HANDLE) {
+ // Acquire a pool for this max_descriptor_count bucket.
+ IREE_RETURN_IF_ERROR(descriptor_pool_cache_->AcquireDescriptorPool(
+ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, max_descriptor_count,
+ &descriptor_pool_buckets_[bucket]));
+ used_descriptor_pools_.push_back(descriptor_pool_buckets_[bucket]);
+ }
+ auto& descriptor_pool = descriptor_pool_buckets_[bucket];
+
+ VkDescriptorSetAllocateInfo allocate_info;
+ allocate_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
+ allocate_info.pNext = nullptr;
+ allocate_info.descriptorPool = descriptor_pool.handle;
+ VkDescriptorSetLayout set_layout_handle =
+ iree_hal_vulkan_native_descriptor_set_layout_handle(set_layout);
+ allocate_info.descriptorSetCount = 1;
+ allocate_info.pSetLayouts = &set_layout_handle;
+
+ VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
+ VkResult result = syms().vkAllocateDescriptorSets(
+ *logical_device_, &allocate_info, &descriptor_set);
+
+ if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
+ // Allocation failed because the pool is either out of descriptors or too
+ // fragmented. We'll just allocate another pool.
+ IREE_RETURN_IF_ERROR(descriptor_pool_cache_->AcquireDescriptorPool(
+ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, max_descriptor_count,
+ &descriptor_pool_buckets_[bucket]));
+ used_descriptor_pools_.push_back(descriptor_pool_buckets_[bucket]);
+
+ // Allocate descriptor sets.
+ VkDescriptorSetAllocateInfo allocate_info;
+ allocate_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
+ allocate_info.pNext = nullptr;
+ allocate_info.descriptorPool = descriptor_pool_buckets_[bucket].handle;
+ allocate_info.descriptorSetCount = 1;
+ allocate_info.pSetLayouts = &set_layout_handle;
+ descriptor_set = VK_NULL_HANDLE;
+ VK_RETURN_IF_ERROR(syms().vkAllocateDescriptorSets(
+ *logical_device_, &allocate_info, &descriptor_set),
+ "vkAllocateDescriptorSets");
+ }
+
+ // Get a list of VkWriteDescriptorSet structs with all bound buffers.
+ iree_host_size_t write_info_count = 0;
+ VkWriteDescriptorSet* write_infos = NULL;
+ PopulateDescriptorSetWriteInfos(binding_count, bindings, descriptor_set,
+ &scratch_arena_, &write_info_count,
+ &write_infos);
+
+ // This is the reason why push descriptor sets are good.
+ // We can't batch these effectively as we don't know prior to recording what
+ // descriptor sets we will need and what buffers they will point to (without
+ // doing just as much work as actually recording the buffer to try to find
+ // out).
+ syms().vkUpdateDescriptorSets(*logical_device_,
+ static_cast<uint32_t>(write_info_count),
+ write_infos, 0, nullptr);
+
+ // Bind the descriptor set.
+ syms().vkCmdBindDescriptorSets(
+ command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ iree_hal_vulkan_native_executable_layout_handle(executable_layout), set,
+ 1, &descriptor_set, 0, nullptr);
+
+ return iree_ok_status();
+}
+
+void DescriptorSetArena::PushDescriptorSet(
+ VkCommandBuffer command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings) {
+ IREE_TRACE_SCOPE0("DescriptorSetArena::PushDescriptorSet");
+ VkPipelineLayout device_executable_layout =
+ iree_hal_vulkan_native_executable_layout_handle(executable_layout);
+
+ // Get a list of VkWriteDescriptorSet structs with all bound buffers.
+ iree_host_size_t write_info_count = 0;
+ VkWriteDescriptorSet* write_infos = NULL;
+ PopulateDescriptorSetWriteInfos(binding_count, bindings, VK_NULL_HANDLE,
+ &scratch_arena_, &write_info_count,
+ &write_infos);
+
+ // Fast path using push descriptors. These are pooled internally by the
+ // command buffer and prevent the need for our own pooling mechanisms.
+ syms().vkCmdPushDescriptorSetKHR(
+ command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device_executable_layout,
+ set, static_cast<uint32_t>(write_info_count), write_infos);
+}
+
+DescriptorSetGroup DescriptorSetArena::Flush() {
+ IREE_TRACE_SCOPE0("DescriptorSetArena::Flush");
+
+ if (used_descriptor_pools_.empty()) {
+ // No resources to free.
+ return DescriptorSetGroup{};
+ }
+
+ for (auto& bucket : descriptor_pool_buckets_) {
+ bucket = {};
+ }
+ return DescriptorSetGroup(descriptor_pool_cache_,
+ std::move(used_descriptor_pools_));
+}
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
diff --git a/runtime/src/iree/hal/vulkan/descriptor_set_arena.h b/runtime/src/iree/hal/vulkan/descriptor_set_arena.h
new file mode 100644
index 0000000..4805a2c
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/descriptor_set_arena.h
@@ -0,0 +1,76 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_DESCRIPTOR_SET_ARENA_H_
+#define IREE_HAL_VULKAN_DESCRIPTOR_SET_ARENA_H_
+
+#include <stdint.h>
+
+#include <array>
+#include <vector>
+
+#include "iree/base/api.h"
+#include "iree/base/status_cc.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/descriptor_pool_cache.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/handle_util.h"
+#include "iree/hal/vulkan/native_executable.h"
+#include "iree/hal/vulkan/util/arena.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+// A reusable arena for allocating descriptor sets and batching updates.
+class DescriptorSetArena final {
+ public:
+ explicit DescriptorSetArena(DescriptorPoolCache* descriptor_pool_cache);
+ ~DescriptorSetArena();
+
+ // Allocates and binds a descriptor set from the arena.
+ // The command buffer will have the descriptor set containing |bindings| bound
+ // to it.
+ iree_status_t BindDescriptorSet(
+ VkCommandBuffer command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings);
+
+ // Flushes all pending writes to descriptor sets allocated from the arena and
+ // returns a group that - when dropped - will release the descriptor sets
+ // back to the pools they were allocated from.
+ DescriptorSetGroup Flush();
+
+ private:
+ const DynamicSymbols& syms() const { return *logical_device_->syms(); }
+
+ // Pushes the descriptor set to the command buffer, if supported.
+ void PushDescriptorSet(VkCommandBuffer command_buffer,
+ iree_hal_executable_layout_t* executable_layout,
+ uint32_t set, iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings);
+
+ VkDeviceHandle* logical_device_;
+ DescriptorPoolCache* descriptor_pool_cache_;
+
+ // Arena used for temporary binding information used during allocation.
+ Arena scratch_arena_;
+
+ // A list of pools acquired on demand as different descriptor counts are
+ // needed. Allocation granularity is max_descriptor_count=[8, 16, 32, 64].
+ std::array<DescriptorPool, 4> descriptor_pool_buckets_;
+
+ // All pools that have been used during allocation.
+ std::vector<DescriptorPool> used_descriptor_pools_;
+};
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_VULKAN_DESCRIPTOR_SET_ARENA_H_
diff --git a/runtime/src/iree/hal/vulkan/direct_command_buffer.cc b/runtime/src/iree/hal/vulkan/direct_command_buffer.cc
new file mode 100644
index 0000000..515bbc0
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/direct_command_buffer.cc
@@ -0,0 +1,856 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/direct_command_buffer.h"
+
+#include <cstddef>
+#include <cstdint>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/inline_array.h"
+#include "iree/base/internal/math.h"
+#include "iree/base/logging.h"
+#include "iree/base/status_cc.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/utils/resource_set.h"
+#include "iree/hal/vulkan/descriptor_set_arena.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/native_descriptor_set.h"
+#include "iree/hal/vulkan/native_event.h"
+#include "iree/hal/vulkan/native_executable.h"
+#include "iree/hal/vulkan/native_executable_layout.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+#include "iree/hal/vulkan/vma_buffer.h"
+
+using namespace iree::hal::vulkan;
+
+// Command buffer implementation that directly maps to VkCommandBuffer.
+// This records the commands on the calling thread without additional threading
+// indirection.
+typedef struct iree_hal_vulkan_direct_command_buffer_t {
+ iree_hal_command_buffer_t base;
+ VkDeviceHandle* logical_device;
+ iree_hal_vulkan_tracing_context_t* tracing_context;
+ iree_arena_block_pool_t* block_pool;
+
+ VkCommandPoolHandle* command_pool;
+ VkCommandBuffer handle;
+
+ DynamicSymbols* syms;
+
+ // Maintains a reference to all resources used within the command buffer.
+ // Reset on each begin.
+ iree_hal_resource_set_t* resource_set;
+
+ // TODO(benvanik): may grow large - should try to reclaim or reuse.
+ DescriptorSetArena descriptor_set_arena;
+
+ // The current descriptor set group in use by the command buffer, if any.
+ // This must remain valid until all in-flight submissions of the command
+ // buffer complete.
+ DescriptorSetGroup descriptor_set_group;
+
+ BuiltinExecutables* builtin_executables;
+
+ // Shadow copy of push constants used during normal operation, for restoring
+ // after builtin_executables uses vkCmdPushConstants. Size must be greater
+ // than or equal to the push constant memory used by builtin_executables.
+ // TODO(scotttodd): use [maxPushConstantsSize - 16, maxPushConstantsSize]
+ // instead of [0, 16] to reduce frequency of updates
+ uint8_t push_constants_storage[IREE_HAL_VULKAN_BUILTIN_PUSH_CONSTANT_COUNT];
+} iree_hal_vulkan_direct_command_buffer_t;
+
+namespace {
+extern const iree_hal_command_buffer_vtable_t
+ iree_hal_vulkan_direct_command_buffer_vtable;
+} // namespace
+
+static iree_hal_vulkan_direct_command_buffer_t*
+iree_hal_vulkan_direct_command_buffer_cast(
+ iree_hal_command_buffer_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value,
+ &iree_hal_vulkan_direct_command_buffer_vtable);
+ return (iree_hal_vulkan_direct_command_buffer_t*)base_value;
+}
+
+iree_status_t iree_hal_vulkan_direct_command_buffer_allocate(
+ iree_hal_device_t* device,
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ iree::hal::vulkan::VkCommandPoolHandle* command_pool,
+ iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ iree_hal_vulkan_tracing_context_t* tracing_context,
+ iree::hal::vulkan::DescriptorPoolCache* descriptor_pool_cache,
+ iree::hal::vulkan::BuiltinExecutables* builtin_executables,
+ iree_arena_block_pool_t* block_pool,
+ iree_hal_command_buffer_t** out_command_buffer) {
+ IREE_ASSERT_ARGUMENT(logical_device);
+ IREE_ASSERT_ARGUMENT(command_pool);
+ IREE_ASSERT_ARGUMENT(descriptor_pool_cache);
+ IREE_ASSERT_ARGUMENT(block_pool);
+ IREE_ASSERT_ARGUMENT(out_command_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ VkCommandBufferAllocateInfo allocate_info;
+ allocate_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+ allocate_info.pNext = NULL;
+ allocate_info.commandPool = *command_pool;
+ allocate_info.commandBufferCount = 1;
+ allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+
+ VkCommandBuffer handle = VK_NULL_HANDLE;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, command_pool->Allocate(&allocate_info, &handle));
+
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(logical_device->host_allocator(),
+ sizeof(*command_buffer), (void**)&command_buffer);
+ if (iree_status_is_ok(status)) {
+ iree_hal_command_buffer_initialize(
+ device, mode, command_categories, queue_affinity,
+ &iree_hal_vulkan_direct_command_buffer_vtable, &command_buffer->base);
+ command_buffer->logical_device = logical_device;
+ command_buffer->tracing_context = tracing_context;
+ command_buffer->block_pool = block_pool;
+ command_buffer->command_pool = command_pool;
+ command_buffer->handle = handle;
+ command_buffer->syms = logical_device->syms().get();
+
+ new (&command_buffer->descriptor_set_arena)
+ DescriptorSetArena(descriptor_pool_cache);
+ new (&command_buffer->descriptor_set_group) DescriptorSetGroup();
+
+ command_buffer->builtin_executables = builtin_executables;
+ status = iree_hal_resource_set_allocate(block_pool,
+ &command_buffer->resource_set);
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_command_buffer = &command_buffer->base;
+ } else {
+ command_pool->Free(handle);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_vulkan_direct_command_buffer_reset(
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer) {
+ // NOTE: we require that command buffers not be recorded while they are
+ // in-flight so this is safe.
+ IREE_IGNORE_ERROR(command_buffer->descriptor_set_group.Reset());
+ iree_hal_resource_set_reset(command_buffer->resource_set);
+}
+
+bool iree_hal_vulkan_direct_command_buffer_isa(
+ iree_hal_command_buffer_t* command_buffer) {
+ return iree_hal_command_buffer_dyn_cast(
+ command_buffer, &iree_hal_vulkan_direct_command_buffer_vtable);
+}
+
+static void* iree_hal_vulkan_direct_command_buffer_dyn_cast(
+ iree_hal_command_buffer_t* command_buffer, const void* vtable) {
+ if (vtable == &iree_hal_vulkan_direct_command_buffer_vtable) {
+ IREE_HAL_ASSERT_TYPE(command_buffer, vtable);
+ return command_buffer;
+ }
+ return NULL;
+}
+
+static void iree_hal_vulkan_direct_command_buffer_destroy(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+ iree_allocator_t host_allocator =
+ command_buffer->logical_device->host_allocator();
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_vulkan_direct_command_buffer_reset(command_buffer);
+ command_buffer->command_pool->Free(command_buffer->handle);
+
+ command_buffer->descriptor_set_group.~DescriptorSetGroup();
+ command_buffer->descriptor_set_arena.~DescriptorSetArena();
+
+ iree_hal_resource_set_free(command_buffer->resource_set);
+ iree_allocator_free(host_allocator, command_buffer);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+VkCommandBuffer iree_hal_vulkan_direct_command_buffer_handle(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ (iree_hal_vulkan_direct_command_buffer_t*)
+ iree_hal_command_buffer_dyn_cast(
+ base_command_buffer,
+ &iree_hal_vulkan_direct_command_buffer_vtable);
+ return command_buffer->handle;
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_begin(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+
+ iree_hal_vulkan_direct_command_buffer_reset(command_buffer);
+
+ VkCommandBufferBeginInfo begin_info;
+ begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ begin_info.pNext = NULL;
+ begin_info.flags = iree_all_bits_set(command_buffer->base.mode,
+ IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT)
+ ? VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
+ : 0;
+ begin_info.pInheritanceInfo = NULL;
+ VK_RETURN_IF_ERROR(command_buffer->syms->vkBeginCommandBuffer(
+ command_buffer->handle, &begin_info),
+ "vkBeginCommandBuffer");
+
+ IREE_VULKAN_TRACE_ZONE_BEGIN_EXTERNAL(
+ command_buffer->tracing_context, command_buffer->handle,
+ /*file_name=*/NULL, 0,
+ /*line=*/0, /*func_name=*/NULL, 0,
+ "iree_hal_vulkan_direct_command_buffer",
+ strlen("iree_hal_vulkan_direct_command_buffer"));
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_end(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+
+ IREE_VULKAN_TRACE_ZONE_END(command_buffer->tracing_context,
+ command_buffer->handle);
+
+ VK_RETURN_IF_ERROR(
+ command_buffer->syms->vkEndCommandBuffer(command_buffer->handle),
+ "vkEndCommandBuffer");
+
+ // Flush all pending descriptor set writes (if any).
+ command_buffer->descriptor_set_group =
+ command_buffer->descriptor_set_arena.Flush();
+
+ return iree_ok_status();
+}
+
+static void iree_hal_vulkan_direct_command_buffer_begin_debug_group(
+ iree_hal_command_buffer_t* base_command_buffer, iree_string_view_t label,
+ iree_hal_label_color_t label_color,
+ const iree_hal_label_location_t* location) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+ IREE_VULKAN_TRACE_ZONE_BEGIN_EXTERNAL(
+ command_buffer->tracing_context, command_buffer->handle,
+ location ? location->file.data : NULL, location ? location->file.size : 0,
+ location ? location->line : 0, /*func_name=*/NULL, 0, label.data,
+ label.size);
+ if (command_buffer->syms->vkCmdBeginDebugUtilsLabelEXT) {
+ char label_buffer[128];
+ snprintf(label_buffer, sizeof(label_buffer), "%.*s", (int)label.size,
+ label.data);
+ VkDebugUtilsLabelEXT label_info = {
+ /*.sType=*/VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+ /*.pNext=*/NULL,
+ /*.pLabelName=*/label_buffer,
+ /*.color=*/
+ {
+ /*r=*/label_color.r / 255.0f,
+ /*g=*/label_color.g / 255.0f,
+ /*b=*/label_color.b / 255.0f,
+ /*a=*/label_color.a / 255.0f,
+ },
+ };
+ command_buffer->syms->vkCmdBeginDebugUtilsLabelEXT(command_buffer->handle,
+ &label_info);
+ }
+}
+
+static void iree_hal_vulkan_direct_command_buffer_end_debug_group(
+ iree_hal_command_buffer_t* base_command_buffer) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+ if (command_buffer->syms->vkCmdEndDebugUtilsLabelEXT) {
+ command_buffer->syms->vkCmdEndDebugUtilsLabelEXT(command_buffer->handle);
+ }
+ IREE_VULKAN_TRACE_ZONE_END(command_buffer->tracing_context,
+ command_buffer->handle);
+}
+
+static VkPipelineStageFlags iree_hal_vulkan_convert_pipeline_stage_flags(
+ iree_hal_execution_stage_t stage_mask) {
+ VkPipelineStageFlags flags = 0;
+ flags |= iree_any_bit_set(stage_mask, IREE_HAL_EXECUTION_STAGE_COMMAND_ISSUE)
+ ? VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
+ : 0;
+ flags |=
+ iree_any_bit_set(stage_mask, IREE_HAL_EXECUTION_STAGE_COMMAND_PROCESS)
+ ? VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
+ : 0;
+ flags |= iree_any_bit_set(stage_mask, IREE_HAL_EXECUTION_STAGE_DISPATCH)
+ ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
+ : 0;
+ flags |= iree_any_bit_set(stage_mask, IREE_HAL_EXECUTION_STAGE_TRANSFER)
+ ? VK_PIPELINE_STAGE_TRANSFER_BIT
+ : 0;
+ flags |= iree_any_bit_set(stage_mask, IREE_HAL_EXECUTION_STAGE_COMMAND_RETIRE)
+ ? VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
+ : 0;
+ flags |= iree_any_bit_set(stage_mask, IREE_HAL_EXECUTION_STAGE_HOST)
+ ? VK_PIPELINE_STAGE_HOST_BIT
+ : 0;
+ return flags;
+}
+
+static VkAccessFlags iree_hal_vulkan_convert_access_mask(
+ iree_hal_access_scope_t access_mask) {
+ VkAccessFlags flags = 0;
+ flags |=
+ iree_any_bit_set(access_mask, IREE_HAL_ACCESS_SCOPE_INDIRECT_COMMAND_READ)
+ ? VK_ACCESS_INDIRECT_COMMAND_READ_BIT
+ : 0;
+ flags |= iree_any_bit_set(access_mask, IREE_HAL_ACCESS_SCOPE_CONSTANT_READ)
+ ? VK_ACCESS_UNIFORM_READ_BIT
+ : 0;
+ flags |= iree_any_bit_set(access_mask, IREE_HAL_ACCESS_SCOPE_DISPATCH_READ)
+ ? VK_ACCESS_SHADER_READ_BIT
+ : 0;
+ flags |= iree_any_bit_set(access_mask, IREE_HAL_ACCESS_SCOPE_DISPATCH_WRITE)
+ ? VK_ACCESS_SHADER_WRITE_BIT
+ : 0;
+ flags |= iree_any_bit_set(access_mask, IREE_HAL_ACCESS_SCOPE_TRANSFER_READ)
+ ? VK_ACCESS_TRANSFER_READ_BIT
+ : 0;
+ flags |= iree_any_bit_set(access_mask, IREE_HAL_ACCESS_SCOPE_TRANSFER_WRITE)
+ ? VK_ACCESS_TRANSFER_WRITE_BIT
+ : 0;
+ flags |= iree_any_bit_set(access_mask, IREE_HAL_ACCESS_SCOPE_HOST_READ)
+ ? VK_ACCESS_HOST_READ_BIT
+ : 0;
+ flags |= iree_any_bit_set(access_mask, IREE_HAL_ACCESS_SCOPE_HOST_WRITE)
+ ? VK_ACCESS_HOST_WRITE_BIT
+ : 0;
+ flags |= iree_any_bit_set(access_mask, IREE_HAL_ACCESS_SCOPE_MEMORY_READ)
+ ? VK_ACCESS_MEMORY_READ_BIT
+ : 0;
+ flags |= iree_any_bit_set(access_mask, IREE_HAL_ACCESS_SCOPE_MEMORY_WRITE)
+ ? VK_ACCESS_MEMORY_WRITE_BIT
+ : 0;
+ return flags;
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_execution_barrier(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_hal_execution_barrier_flags_t flags,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+ iree_allocator_t host_allocator =
+ command_buffer->logical_device->host_allocator();
+
+ iree_inline_array(VkMemoryBarrier, memory_barrier_infos, memory_barrier_count,
+ host_allocator);
+ for (int i = 0; i < memory_barrier_count; ++i) {
+ const auto& memory_barrier = memory_barriers[i];
+ VkMemoryBarrier* info = iree_inline_array_at(memory_barrier_infos, i);
+ info->sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+ info->pNext = NULL;
+ info->srcAccessMask =
+ iree_hal_vulkan_convert_access_mask(memory_barrier.source_scope);
+ info->dstAccessMask =
+ iree_hal_vulkan_convert_access_mask(memory_barrier.target_scope);
+ }
+
+ iree_inline_array(VkBufferMemoryBarrier, buffer_barrier_infos,
+ buffer_barrier_count, host_allocator);
+ for (int i = 0; i < buffer_barrier_count; ++i) {
+ const auto& buffer_barrier = buffer_barriers[i];
+ VkBufferMemoryBarrier* info = iree_inline_array_at(buffer_barrier_infos, i);
+ info->sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
+ info->pNext = NULL;
+ info->srcAccessMask =
+ iree_hal_vulkan_convert_access_mask(buffer_barrier.source_scope);
+ info->dstAccessMask =
+ iree_hal_vulkan_convert_access_mask(buffer_barrier.target_scope);
+ info->srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ info->dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ info->buffer = iree_hal_vulkan_vma_buffer_handle(
+ iree_hal_buffer_allocated_buffer(buffer_barrier.buffer));
+ info->offset = buffer_barrier.offset;
+ info->size = buffer_barrier.length;
+ }
+
+ command_buffer->syms->vkCmdPipelineBarrier(
+ command_buffer->handle,
+ iree_hal_vulkan_convert_pipeline_stage_flags(source_stage_mask),
+ iree_hal_vulkan_convert_pipeline_stage_flags(target_stage_mask),
+ /*dependencyFlags=*/0, (uint32_t)memory_barrier_count,
+ iree_inline_array_data(memory_barrier_infos),
+ (uint32_t)buffer_barrier_count,
+ iree_inline_array_data(buffer_barrier_infos), 0, NULL);
+
+ iree_inline_array_deinitialize(memory_barrier_infos);
+ iree_inline_array_deinitialize(buffer_barrier_infos);
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_signal_event(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+
+ IREE_RETURN_IF_ERROR(
+ iree_hal_resource_set_insert(command_buffer->resource_set, 1, &event));
+
+ command_buffer->syms->vkCmdSetEvent(
+ command_buffer->handle, iree_hal_vulkan_native_event_handle(event),
+ iree_hal_vulkan_convert_pipeline_stage_flags(source_stage_mask));
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_reset_event(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event,
+ iree_hal_execution_stage_t source_stage_mask) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+
+ IREE_RETURN_IF_ERROR(
+ iree_hal_resource_set_insert(command_buffer->resource_set, 1, &event));
+
+ command_buffer->syms->vkCmdResetEvent(
+ command_buffer->handle, iree_hal_vulkan_native_event_handle(event),
+ iree_hal_vulkan_convert_pipeline_stage_flags(source_stage_mask));
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_wait_events(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_host_size_t event_count, const iree_hal_event_t** events,
+ iree_hal_execution_stage_t source_stage_mask,
+ iree_hal_execution_stage_t target_stage_mask,
+ iree_host_size_t memory_barrier_count,
+ const iree_hal_memory_barrier_t* memory_barriers,
+ iree_host_size_t buffer_barrier_count,
+ const iree_hal_buffer_barrier_t* buffer_barriers) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+ iree_allocator_t host_allocator =
+ command_buffer->logical_device->host_allocator();
+
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, event_count, events));
+
+ iree_inline_array(VkEvent, event_handles, event_count, host_allocator);
+ for (int i = 0; i < event_count; ++i) {
+ *iree_inline_array_at(event_handles, i) =
+ iree_hal_vulkan_native_event_handle(events[i]);
+ }
+
+ iree_inline_array(VkMemoryBarrier, memory_barrier_infos, memory_barrier_count,
+ host_allocator);
+ for (int i = 0; i < memory_barrier_count; ++i) {
+ const auto& memory_barrier = memory_barriers[i];
+ VkMemoryBarrier* info = iree_inline_array_at(memory_barrier_infos, i);
+ info->sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+ info->pNext = NULL;
+ info->srcAccessMask =
+ iree_hal_vulkan_convert_access_mask(memory_barrier.source_scope);
+ info->dstAccessMask =
+ iree_hal_vulkan_convert_access_mask(memory_barrier.target_scope);
+ }
+
+ iree_inline_array(VkBufferMemoryBarrier, buffer_barrier_infos,
+ buffer_barrier_count, host_allocator);
+ for (int i = 0; i < buffer_barrier_count; ++i) {
+ const auto& buffer_barrier = buffer_barriers[i];
+ VkBufferMemoryBarrier* info = iree_inline_array_at(buffer_barrier_infos, i);
+ info->sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
+ info->pNext = NULL;
+ info->srcAccessMask =
+ iree_hal_vulkan_convert_access_mask(buffer_barrier.source_scope);
+ info->dstAccessMask =
+ iree_hal_vulkan_convert_access_mask(buffer_barrier.target_scope);
+ info->srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ info->dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ info->buffer = iree_hal_vulkan_vma_buffer_handle(
+ iree_hal_buffer_allocated_buffer(buffer_barrier.buffer));
+ info->offset = buffer_barrier.offset;
+ info->size = buffer_barrier.length;
+ }
+
+ command_buffer->syms->vkCmdWaitEvents(
+ command_buffer->handle, (uint32_t)event_count,
+ iree_inline_array_data(event_handles),
+ iree_hal_vulkan_convert_pipeline_stage_flags(source_stage_mask),
+ iree_hal_vulkan_convert_pipeline_stage_flags(target_stage_mask),
+ (uint32_t)memory_barrier_count,
+ iree_inline_array_data(memory_barrier_infos),
+ (uint32_t)buffer_barrier_count,
+ iree_inline_array_data(buffer_barrier_infos), 0, NULL);
+
+ iree_inline_array_deinitialize(event_handles);
+ iree_inline_array_deinitialize(memory_barrier_infos);
+ iree_inline_array_deinitialize(buffer_barrier_infos);
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_discard_buffer(
+ iree_hal_command_buffer_t* base_command_buffer, iree_hal_buffer_t* buffer) {
+ // NOTE: we could use this to prevent queue family transitions.
+ return iree_ok_status();
+}
+
+// Splats a pattern value of 1, 2, or 4 bytes out to a 4 byte value.
+static uint32_t iree_hal_vulkan_splat_pattern(const void* pattern,
+ size_t pattern_length) {
+ switch (pattern_length) {
+ case 1: {
+ uint32_t pattern_value = *static_cast<const uint8_t*>(pattern);
+ return (pattern_value << 24) | (pattern_value << 16) |
+ (pattern_value << 8) | pattern_value;
+ }
+ case 2: {
+ uint32_t pattern_value = *static_cast<const uint16_t*>(pattern);
+ return (pattern_value << 16) | pattern_value;
+ }
+ case 4: {
+ uint32_t pattern_value = *static_cast<const uint32_t*>(pattern);
+ return pattern_value;
+ }
+ default:
+ return 0; // Already verified that this should not be possible.
+ }
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_fill_buffer(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length, const void* pattern,
+ iree_host_size_t pattern_length) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+ VkBuffer target_device_buffer = iree_hal_vulkan_vma_buffer_handle(
+ iree_hal_buffer_allocated_buffer(target_buffer));
+
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &target_buffer));
+
+ // vkCmdFillBuffer requires a 4 byte alignment for the offset, pattern, and
+ // length. We use a polyfill here that fills the unaligned start and end of
+ // fill operations, if needed.
+
+ if (target_offset % 4 != 0 || length % 4 != 0) {
+ // TODO(scotttodd): only restore push constants that have been modified?
+ // (this can pass uninitialized memory right now, which
+ // *should* be safe but is wasteful)
+ IREE_RETURN_IF_ERROR(
+ command_buffer->builtin_executables->FillBufferUnaligned(
+ command_buffer->handle, &(command_buffer->descriptor_set_arena),
+ target_buffer, target_offset, length, pattern, pattern_length,
+ command_buffer->push_constants_storage));
+
+ // Continue using vkCmdFillBuffer below, but only for the inner aligned
+ // portion of the fill operation.
+ // For example:
+ // original offset 2, length 8
+ // aligned offset 4, length 4
+ // [0x00,0x00,0xAB,0xAB | 0xAB,0xAB,0xAB,0xAB | 0xAB,0xAB,0x00,0x00]
+ // <-------> <---------------------> <------->
+ // unaligned vkCmdFillBuffer unaligned
+ iree_device_size_t aligned_target_offset =
+ iree_device_align(target_offset, 4);
+ iree_device_size_t target_end = target_offset + length;
+ iree_device_size_t rounded_down_target_end = (target_end / 4) * 4;
+ length -= (aligned_target_offset - target_offset) +
+ (target_end - rounded_down_target_end);
+ target_offset = aligned_target_offset;
+ }
+
+ if (length > 0) {
+ // Note that vkCmdFillBuffer only accepts 4-byte aligned values so we need
+ // to splat out our variable-length pattern.
+ target_offset += iree_hal_buffer_byte_offset(target_buffer);
+ uint32_t dword_pattern =
+ iree_hal_vulkan_splat_pattern(pattern, pattern_length);
+ command_buffer->syms->vkCmdFillBuffer(command_buffer->handle,
+ target_device_buffer, target_offset,
+ length, dword_pattern);
+ }
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_update_buffer(
+ iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
+ iree_host_size_t source_offset, iree_hal_buffer_t* target_buffer,
+ iree_device_size_t target_offset, iree_device_size_t length) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+ VkBuffer target_device_buffer = iree_hal_vulkan_vma_buffer_handle(
+ iree_hal_buffer_allocated_buffer(target_buffer));
+
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &target_buffer));
+
+ // Vulkan only allows updates of <= 65536 because you really, really, really
+ // shouldn't do large updates like this (as it wastes command buffer space and
+ // may be slower than just using write-through mapped memory). The
+ // recommendation in the spec for larger updates is to split the single update
+ // into multiple updates over the entire desired range.
+ const auto* source_buffer_ptr =
+ static_cast<const uint8_t*>(source_buffer) + source_offset;
+ target_offset += iree_hal_buffer_byte_offset(target_buffer);
+ while (length > 0) {
+ iree_device_size_t chunk_length =
+ iree_min((iree_device_size_t)65536u, length);
+ command_buffer->syms->vkCmdUpdateBuffer(command_buffer->handle,
+ target_device_buffer, target_offset,
+ chunk_length, source_buffer_ptr);
+ source_buffer_ptr += chunk_length;
+ target_offset += chunk_length;
+ length -= chunk_length;
+ }
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_copy_buffer(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+ iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+ iree_device_size_t length) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+ VkBuffer source_device_buffer = iree_hal_vulkan_vma_buffer_handle(
+ iree_hal_buffer_allocated_buffer(source_buffer));
+ VkBuffer target_device_buffer = iree_hal_vulkan_vma_buffer_handle(
+ iree_hal_buffer_allocated_buffer(target_buffer));
+
+ const iree_hal_buffer_t* buffers[2] = {source_buffer, target_buffer};
+ IREE_RETURN_IF_ERROR(
+ iree_hal_resource_set_insert(command_buffer->resource_set, 2, buffers));
+
+ VkBufferCopy region;
+ region.srcOffset = iree_hal_buffer_byte_offset(source_buffer) + source_offset;
+ region.dstOffset = iree_hal_buffer_byte_offset(target_buffer) + target_offset;
+ region.size = length;
+ command_buffer->syms->vkCmdCopyBuffer(command_buffer->handle,
+ source_device_buffer,
+ target_device_buffer, 1, ®ion);
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_push_constants(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, iree_host_size_t offset,
+ const void* values, iree_host_size_t values_length) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+
+ iree_host_size_t storage_size =
+ IREE_ARRAYSIZE(command_buffer->push_constants_storage);
+ if (offset < storage_size) {
+ memcpy(command_buffer->push_constants_storage + offset, values,
+ std::min(values_length, storage_size) - offset);
+ }
+
+ command_buffer->syms->vkCmdPushConstants(
+ command_buffer->handle,
+ iree_hal_vulkan_native_executable_layout_handle(executable_layout),
+ VK_SHADER_STAGE_COMPUTE_BIT, (uint32_t)offset, (uint32_t)values_length,
+ values);
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_push_descriptor_set(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+
+ // TODO(benvanik): batch insert by getting the resources in their own list.
+ for (iree_host_size_t i = 0; i < binding_count; ++i) {
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &bindings[i].buffer));
+ }
+
+ // Either allocate, update, and bind a descriptor set or use push descriptor
+ // sets to use the command buffer pool when supported.
+ return command_buffer->descriptor_set_arena.BindDescriptorSet(
+ command_buffer->handle, executable_layout, set, binding_count, bindings);
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_bind_descriptor_set(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_layout_t* executable_layout, uint32_t set,
+ iree_hal_descriptor_set_t* descriptor_set,
+ iree_host_size_t dynamic_offset_count,
+ const iree_device_size_t* dynamic_offsets) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+ iree_allocator_t host_allocator =
+ command_buffer->logical_device->host_allocator();
+
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &descriptor_set));
+
+ // Vulkan takes uint32_t as the size here, unlike everywhere else.
+ iree_inline_array(uint32_t, dynamic_offsets_i32, dynamic_offset_count,
+ host_allocator);
+ for (int i = 0; i < dynamic_offset_count; ++i) {
+ *iree_inline_array_at(dynamic_offsets_i32, i) =
+ (uint32_t)dynamic_offsets[i];
+ }
+
+ VkDescriptorSet descriptor_sets[1] = {
+ iree_hal_vulkan_native_descriptor_set_handle(descriptor_set),
+ };
+ command_buffer->syms->vkCmdBindDescriptorSets(
+ command_buffer->handle, VK_PIPELINE_BIND_POINT_COMPUTE,
+ iree_hal_vulkan_native_executable_layout_handle(executable_layout), set,
+ (uint32_t)IREE_ARRAYSIZE(descriptor_sets), descriptor_sets,
+ (uint32_t)dynamic_offset_count,
+ iree_inline_array_data(dynamic_offsets_i32));
+
+ iree_inline_array_deinitialize(dynamic_offsets_i32);
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_dispatch(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+
+ IREE_TRACE({
+ iree_hal_vulkan_source_location_t source_location;
+ iree_hal_vulkan_native_executable_entry_point_source_location(
+ executable, entry_point, &source_location);
+ IREE_VULKAN_TRACE_ZONE_BEGIN_EXTERNAL(
+ command_buffer->tracing_context, command_buffer->handle,
+ source_location.file_name.data, source_location.file_name.size,
+ source_location.line, /*func_name=*/NULL, 0,
+ source_location.func_name.data, source_location.func_name.size);
+ });
+
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, 1, &executable));
+
+ // Get the compiled and linked pipeline for the specified entry point and
+ // bind it to the command buffer.
+ VkPipeline pipeline_handle = VK_NULL_HANDLE;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_vulkan_native_executable_pipeline_for_entry_point(
+ executable, entry_point, &pipeline_handle));
+ command_buffer->syms->vkCmdBindPipeline(
+ command_buffer->handle, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
+
+ command_buffer->syms->vkCmdDispatch(command_buffer->handle, workgroup_x,
+ workgroup_y, workgroup_z);
+
+ IREE_VULKAN_TRACE_ZONE_END(command_buffer->tracing_context,
+ command_buffer->handle);
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_direct_command_buffer_dispatch_indirect(
+ iree_hal_command_buffer_t* base_command_buffer,
+ iree_hal_executable_t* executable, int32_t entry_point,
+ iree_hal_buffer_t* workgroups_buffer,
+ iree_device_size_t workgroups_offset) {
+ iree_hal_vulkan_direct_command_buffer_t* command_buffer =
+ iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
+
+ const void* resources[2] = {executable, workgroups_buffer};
+ IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
+ command_buffer->resource_set, IREE_ARRAYSIZE(resources), resources));
+
+ iree_hal_vulkan_source_location_t source_location;
+ iree_hal_vulkan_native_executable_entry_point_source_location(
+ executable, entry_point, &source_location);
+ IREE_VULKAN_TRACE_ZONE_BEGIN_EXTERNAL(
+ command_buffer->tracing_context, command_buffer->handle,
+ source_location.file_name.data, source_location.file_name.size,
+ source_location.line, /*func_name=*/NULL, 0,
+ source_location.func_name.data, source_location.func_name.size);
+
+ // Get the compiled and linked pipeline for the specified entry point and
+ // bind it to the command buffer.
+ VkPipeline pipeline_handle = VK_NULL_HANDLE;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_vulkan_native_executable_pipeline_for_entry_point(
+ executable, entry_point, &pipeline_handle));
+ command_buffer->syms->vkCmdBindPipeline(
+ command_buffer->handle, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
+
+ VkBuffer workgroups_device_buffer = iree_hal_vulkan_vma_buffer_handle(
+ iree_hal_buffer_allocated_buffer(workgroups_buffer));
+ workgroups_offset += iree_hal_buffer_byte_offset(workgroups_buffer);
+ command_buffer->syms->vkCmdDispatchIndirect(
+ command_buffer->handle, workgroups_device_buffer, workgroups_offset);
+
+ IREE_VULKAN_TRACE_ZONE_END(command_buffer->tracing_context,
+ command_buffer->handle);
+
+ return iree_ok_status();
+}
+
+namespace {
+const iree_hal_command_buffer_vtable_t
+ iree_hal_vulkan_direct_command_buffer_vtable = {
+ /*.destroy=*/iree_hal_vulkan_direct_command_buffer_destroy,
+ /*.dyn_cast=*/iree_hal_vulkan_direct_command_buffer_dyn_cast,
+ /*.begin=*/iree_hal_vulkan_direct_command_buffer_begin,
+ /*.end=*/iree_hal_vulkan_direct_command_buffer_end,
+ /*.begin_debug_group=*/
+ iree_hal_vulkan_direct_command_buffer_begin_debug_group,
+ /*.end_debug_group=*/
+ iree_hal_vulkan_direct_command_buffer_end_debug_group,
+ /*.execution_barrier=*/
+ iree_hal_vulkan_direct_command_buffer_execution_barrier,
+ /*.signal_event=*/
+ iree_hal_vulkan_direct_command_buffer_signal_event,
+ /*.reset_event=*/iree_hal_vulkan_direct_command_buffer_reset_event,
+ /*.wait_events=*/iree_hal_vulkan_direct_command_buffer_wait_events,
+ /*.discard_buffer=*/
+ iree_hal_vulkan_direct_command_buffer_discard_buffer,
+ /*.fill_buffer=*/iree_hal_vulkan_direct_command_buffer_fill_buffer,
+ /*.update_buffer=*/
+ iree_hal_vulkan_direct_command_buffer_update_buffer,
+ /*.copy_buffer=*/iree_hal_vulkan_direct_command_buffer_copy_buffer,
+ /*.push_constants=*/
+ iree_hal_vulkan_direct_command_buffer_push_constants,
+ /*.push_descriptor_set=*/
+ iree_hal_vulkan_direct_command_buffer_push_descriptor_set,
+ /*.bind_descriptor_set=*/
+ iree_hal_vulkan_direct_command_buffer_bind_descriptor_set,
+ /*.dispatch=*/iree_hal_vulkan_direct_command_buffer_dispatch,
+ /*.dispatch_indirect=*/
+ iree_hal_vulkan_direct_command_buffer_dispatch_indirect,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/vulkan/direct_command_buffer.h b/runtime/src/iree/hal/vulkan/direct_command_buffer.h
new file mode 100644
index 0000000..57c15ad
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/direct_command_buffer.h
@@ -0,0 +1,52 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_DIRECT_COMMAND_BUFFER_H_
+#define IREE_HAL_VULKAN_DIRECT_COMMAND_BUFFER_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/builtin_executables.h"
+#include "iree/hal/vulkan/descriptor_pool_cache.h"
+#include "iree/hal/vulkan/handle_util.h"
+#include "iree/hal/vulkan/tracing.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_arena_block_pool_t iree_arena_block_pool_t;
+
+// Creates a command buffer that directly records into a VkCommandBuffer.
+//
+// NOTE: the |block_pool| must remain live for the lifetime of the command
+// buffers that use it.
+iree_status_t iree_hal_vulkan_direct_command_buffer_allocate(
+ iree_hal_device_t* device,
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ iree::hal::vulkan::VkCommandPoolHandle* command_pool,
+ iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ iree_hal_vulkan_tracing_context_t* tracing_context,
+ iree::hal::vulkan::DescriptorPoolCache* descriptor_pool_cache,
+ iree::hal::vulkan::BuiltinExecutables* builtin_executables,
+ iree_arena_block_pool_t* block_pool,
+ iree_hal_command_buffer_t** out_command_buffer);
+
+// Returns the native Vulkan VkCommandBuffer handle.
+VkCommandBuffer iree_hal_vulkan_direct_command_buffer_handle(
+ iree_hal_command_buffer_t* command_buffer);
+
+// Returns true if |command_buffer| is a Vulkan command buffer.
+bool iree_hal_vulkan_direct_command_buffer_isa(
+ iree_hal_command_buffer_t* command_buffer);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_DIRECT_COMMAND_BUFFER_H_
diff --git a/runtime/src/iree/hal/vulkan/direct_command_queue.cc b/runtime/src/iree/hal/vulkan/direct_command_queue.cc
new file mode 100644
index 0000000..1a132a8
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/direct_command_queue.cc
@@ -0,0 +1,197 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/direct_command_queue.h"
+
+#include <cstdint>
+
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/direct_command_buffer.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/native_semaphore.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/tracing.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+DirectCommandQueue::DirectCommandQueue(
+ VkDeviceHandle* logical_device,
+ iree_hal_command_category_t supported_categories, VkQueue queue)
+ : CommandQueue(logical_device, supported_categories, queue) {}
+
+DirectCommandQueue::~DirectCommandQueue() = default;
+
+iree_status_t DirectCommandQueue::TranslateBatchInfo(
+ const iree_hal_submission_batch_t* batch, VkSubmitInfo* submit_info,
+ VkTimelineSemaphoreSubmitInfo* timeline_submit_info, Arena* arena) {
+ // TODO(benvanik): see if we can go to finer-grained stages.
+ // For example, if this was just queue ownership transfers then we can use
+ // the pseudo-stage of VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT.
+ VkPipelineStageFlags dst_stage_mask =
+ VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+
+ auto wait_semaphore_handles =
+ arena->AllocateSpan<VkSemaphore>(batch->wait_semaphores.count);
+ auto wait_semaphore_values =
+ arena->AllocateSpan<uint64_t>(batch->wait_semaphores.count);
+ auto wait_dst_stage_masks =
+ arena->AllocateSpan<VkPipelineStageFlags>(batch->wait_semaphores.count);
+ for (iree_host_size_t i = 0; i < batch->wait_semaphores.count; ++i) {
+ wait_semaphore_handles[i] = iree_hal_vulkan_native_semaphore_handle(
+ batch->wait_semaphores.semaphores[i]);
+ wait_semaphore_values[i] = batch->wait_semaphores.payload_values[i];
+ wait_dst_stage_masks[i] = dst_stage_mask;
+ }
+
+ auto signal_semaphore_handles =
+ arena->AllocateSpan<VkSemaphore>(batch->signal_semaphores.count);
+ auto signal_semaphore_values =
+ arena->AllocateSpan<uint64_t>(batch->signal_semaphores.count);
+ for (iree_host_size_t i = 0; i < batch->signal_semaphores.count; ++i) {
+ signal_semaphore_handles[i] = iree_hal_vulkan_native_semaphore_handle(
+ batch->signal_semaphores.semaphores[i]);
+ signal_semaphore_values[i] = batch->signal_semaphores.payload_values[i];
+ }
+
+ auto command_buffer_handles =
+ arena->AllocateSpan<VkCommandBuffer>(batch->command_buffer_count);
+ for (iree_host_size_t i = 0; i < batch->command_buffer_count; ++i) {
+ command_buffer_handles[i] =
+ iree_hal_vulkan_direct_command_buffer_handle(batch->command_buffers[i]);
+ }
+
+ submit_info->sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ submit_info->pNext = timeline_submit_info;
+ submit_info->waitSemaphoreCount =
+ static_cast<uint32_t>(wait_semaphore_handles.size());
+ submit_info->pWaitSemaphores = wait_semaphore_handles.data();
+ submit_info->pWaitDstStageMask = wait_dst_stage_masks.data();
+ submit_info->commandBufferCount =
+ static_cast<uint32_t>(command_buffer_handles.size());
+ submit_info->pCommandBuffers = command_buffer_handles.data();
+ submit_info->signalSemaphoreCount =
+ static_cast<uint32_t>(signal_semaphore_handles.size());
+ submit_info->pSignalSemaphores = signal_semaphore_handles.data();
+
+ timeline_submit_info->sType =
+ VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
+ timeline_submit_info->pNext = nullptr;
+ timeline_submit_info->waitSemaphoreValueCount =
+ static_cast<uint32_t>(wait_semaphore_values.size());
+ timeline_submit_info->pWaitSemaphoreValues = wait_semaphore_values.data();
+ timeline_submit_info->signalSemaphoreValueCount =
+ static_cast<uint32_t>(signal_semaphore_values.size());
+ timeline_submit_info->pSignalSemaphoreValues = signal_semaphore_values.data();
+
+ return iree_ok_status();
+}
+
+iree_status_t DirectCommandQueue::Submit(
+ iree_host_size_t batch_count, const iree_hal_submission_batch_t* batches) {
+ IREE_TRACE_SCOPE0("DirectCommandQueue::Submit");
+
+ // Map the submission batches to VkSubmitInfos.
+ // Note that we must keep all arrays referenced alive until submission
+ // completes and since there are a bunch of them we use an arena.
+ Arena arena(4 * 1024);
+ auto submit_infos = arena.AllocateSpan<VkSubmitInfo>(batch_count);
+ auto timeline_submit_infos =
+ arena.AllocateSpan<VkTimelineSemaphoreSubmitInfo>(batch_count);
+ for (int i = 0; i < batch_count; ++i) {
+ IREE_RETURN_IF_ERROR(TranslateBatchInfo(&batches[i], &submit_infos[i],
+ &timeline_submit_infos[i], &arena));
+ }
+
+ iree_slim_mutex_lock(&queue_mutex_);
+ iree_status_t status = VK_RESULT_TO_STATUS(
+ syms()->vkQueueSubmit(queue_, static_cast<uint32_t>(submit_infos.size()),
+ submit_infos.data(), VK_NULL_HANDLE),
+ "vkQueueSubmit");
+ iree_slim_mutex_unlock(&queue_mutex_);
+ IREE_RETURN_IF_ERROR(status);
+
+ return iree_ok_status();
+}
+
+iree_status_t DirectCommandQueue::WaitIdle(iree_timeout_t timeout) {
+ iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
+ if (deadline_ns == IREE_TIME_INFINITE_FUTURE) {
+ // Fast path for using vkQueueWaitIdle, which is usually cheaper (as it
+ // requires fewer calls into the driver).
+ IREE_TRACE_SCOPE0("DirectCommandQueue::WaitIdle#vkQueueWaitIdle");
+ iree_slim_mutex_lock(&queue_mutex_);
+ iree_status_t status =
+ VK_RESULT_TO_STATUS(syms()->vkQueueWaitIdle(queue_), "vkQueueWaitIdle");
+ iree_slim_mutex_unlock(&queue_mutex_);
+ iree_hal_vulkan_tracing_context_collect(tracing_context(), VK_NULL_HANDLE);
+ return status;
+ }
+
+ IREE_TRACE_SCOPE0("DirectCommandQueue::WaitIdle#Fence");
+
+ // Create a new fence just for this wait. This keeps us thread-safe as the
+ // behavior of wait+reset is racey.
+ VkFenceCreateInfo create_info;
+ create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+ create_info.pNext = nullptr;
+ create_info.flags = 0;
+ VkFence fence = VK_NULL_HANDLE;
+ VK_RETURN_IF_ERROR(
+ syms()->vkCreateFence(*logical_device_, &create_info,
+ logical_device_->allocator(), &fence),
+ "vkCreateFence");
+
+ uint64_t timeout_ns;
+ if (deadline_ns == IREE_TIME_INFINITE_PAST) {
+ // Do not wait.
+ timeout_ns = 0;
+ } else if (deadline_ns == IREE_TIME_INFINITE_FUTURE) {
+ // Wait forever.
+ timeout_ns = UINT64_MAX;
+ } else {
+ // Convert to relative time in nanoseconds.
+ // The implementation may not wait with this granularity (like by 10000x).
+ iree_time_t now_ns = iree_time_now();
+ if (deadline_ns < now_ns) {
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+ timeout_ns = (uint64_t)(deadline_ns - now_ns);
+ }
+
+ iree_slim_mutex_lock(&queue_mutex_);
+ iree_status_t status = VK_RESULT_TO_STATUS(
+ syms()->vkQueueSubmit(queue_, 0, nullptr, fence), "vkQueueSubmit");
+ iree_slim_mutex_unlock(&queue_mutex_);
+
+ if (iree_status_is_ok(status)) {
+ VkResult result = syms()->vkWaitForFences(*logical_device_, 1, &fence,
+ VK_TRUE, timeout_ns);
+ switch (result) {
+ case VK_SUCCESS:
+ status = iree_ok_status();
+ break;
+ case VK_TIMEOUT:
+ status = iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ break;
+ default:
+ status = VK_RESULT_TO_STATUS(result, "vkWaitForFences");
+ break;
+ }
+ }
+
+ syms()->vkDestroyFence(*logical_device_, fence, logical_device_->allocator());
+
+ iree_hal_vulkan_tracing_context_collect(tracing_context(), VK_NULL_HANDLE);
+
+ return status;
+}
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
diff --git a/runtime/src/iree/hal/vulkan/direct_command_queue.h b/runtime/src/iree/hal/vulkan/direct_command_queue.h
new file mode 100644
index 0000000..5ff9a68
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/direct_command_queue.h
@@ -0,0 +1,43 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_DIRECT_COMMAND_QUEUE_H_
+#define IREE_HAL_VULKAN_DIRECT_COMMAND_QUEUE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/command_queue.h"
+#include "iree/hal/vulkan/handle_util.h"
+#include "iree/hal/vulkan/util/arena.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+// Command queue implementation directly maps to VkQueue.
+class DirectCommandQueue final : public CommandQueue {
+ public:
+ DirectCommandQueue(VkDeviceHandle* logical_device,
+ iree_hal_command_category_t supported_categories,
+ VkQueue queue);
+ ~DirectCommandQueue() override;
+
+ iree_status_t Submit(iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches) override;
+
+ iree_status_t WaitIdle(iree_timeout_t timeout) override;
+
+ private:
+ iree_status_t TranslateBatchInfo(
+ const iree_hal_submission_batch_t* batch, VkSubmitInfo* submit_info,
+ VkTimelineSemaphoreSubmitInfo* timeline_submit_info, Arena* arena);
+};
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_VULKAN_DIRECT_COMMAND_QUEUE_H_
diff --git a/runtime/src/iree/hal/vulkan/dynamic_symbol_tables.h b/runtime/src/iree/hal/vulkan/dynamic_symbol_tables.h
new file mode 100644
index 0000000..e819a7e
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/dynamic_symbol_tables.h
@@ -0,0 +1,501 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Use these tables whenever enumerating all functions in the Vulkan API is
+// required. In most cases IREE_VULKAN_DYNAMIC_SYMBOL_TABLES is the right
+// choice (includes both common and enabled platform-specific functions).
+//
+// Table macros are designed to take two macros: one for each instance-specific
+// function and one for each device-specific function. These macros are also
+// passed a requirement flag that enables compile-time exclusion of methods that
+// are not used in the binary. If you find yourself getting compilation errors
+// on missing methods you probably need to change it in the tables below from
+// EXCLUDED to REQUIRED or OPTIONAL.
+//
+// Define to get instance-specific functions:
+// #define INS_PFN(requirement, function_name)
+//
+// Define to get device-specific functions:
+// #define DEV_PFN(requirement, function_name)
+//
+// requirement is one of REQUIRED, OPTIONAL, or EXCLUDED.
+
+#ifndef IREE_HAL_VULKAN_DYNAMIC_SYMBOL_TABLES_H_
+#define IREE_HAL_VULKAN_DYNAMIC_SYMBOL_TABLES_H_
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+// Defines the list of symbols that can be queried from vkGetInstanceProcAddr
+// before Vulkan instance creation.
+#define IREE_VULKAN_DYNAMIC_SYMBOL_INSTANCELESS_TABLE(INS_PFN) \
+ INS_PFN(REQUIRED, vkCreateInstance) \
+ INS_PFN(REQUIRED, vkEnumerateInstanceExtensionProperties) \
+ INS_PFN(REQUIRED, vkEnumerateInstanceLayerProperties) \
+ INS_PFN(OPTIONAL, vkEnumerateInstanceVersion)
+
+// Defines the list of instance/device symbols that are queried from
+// vkGetInstanceProcAddr/vkGetDeviceProcAddr after Vulkan instance/device
+// creation.
+#define IREE_VULKAN_DYNAMIC_SYMBOL_COMMON_TABLE(INS_PFN, DEV_PFN) \
+ DEV_PFN(REQUIRED, vkBeginCommandBuffer) \
+ DEV_PFN(EXCLUDED, vkCmdBeginConditionalRenderingEXT) \
+ DEV_PFN(OPTIONAL, vkCmdBeginDebugUtilsLabelEXT) \
+ DEV_PFN(EXCLUDED, vkCmdBeginQuery) \
+ DEV_PFN(EXCLUDED, vkCmdBeginQueryIndexedEXT) \
+ DEV_PFN(EXCLUDED, vkCmdBeginRenderPass) \
+ DEV_PFN(EXCLUDED, vkCmdBeginRenderPass2KHR) \
+ DEV_PFN(EXCLUDED, vkCmdBeginTransformFeedbackEXT) \
+ DEV_PFN(REQUIRED, vkCmdBindDescriptorSets) \
+ DEV_PFN(EXCLUDED, vkCmdBindIndexBuffer) \
+ DEV_PFN(REQUIRED, vkCmdBindPipeline) \
+ DEV_PFN(EXCLUDED, vkCmdBindShadingRateImageNV) \
+ DEV_PFN(EXCLUDED, vkCmdBindTransformFeedbackBuffersEXT) \
+ DEV_PFN(EXCLUDED, vkCmdBindVertexBuffers) \
+ DEV_PFN(EXCLUDED, vkCmdBlitImage) \
+ DEV_PFN(EXCLUDED, vkCmdBuildAccelerationStructureNV) \
+ DEV_PFN(EXCLUDED, vkCmdClearAttachments) \
+ DEV_PFN(EXCLUDED, vkCmdClearColorImage) \
+ DEV_PFN(EXCLUDED, vkCmdClearDepthStencilImage) \
+ DEV_PFN(EXCLUDED, vkCmdCopyAccelerationStructureNV) \
+ DEV_PFN(REQUIRED, vkCmdCopyBuffer) \
+ DEV_PFN(EXCLUDED, vkCmdCopyBufferToImage) \
+ DEV_PFN(EXCLUDED, vkCmdCopyImage) \
+ DEV_PFN(EXCLUDED, vkCmdCopyImageToBuffer) \
+ DEV_PFN(EXCLUDED, vkCmdCopyQueryPoolResults) \
+ DEV_PFN(EXCLUDED, vkCmdDebugMarkerBeginEXT) \
+ DEV_PFN(EXCLUDED, vkCmdDebugMarkerEndEXT) \
+ DEV_PFN(EXCLUDED, vkCmdDebugMarkerInsertEXT) \
+ DEV_PFN(REQUIRED, vkCmdDispatch) \
+ DEV_PFN(EXCLUDED, vkCmdDispatchBase) \
+ DEV_PFN(EXCLUDED, vkCmdDispatchBaseKHR) \
+ DEV_PFN(REQUIRED, vkCmdDispatchIndirect) \
+ DEV_PFN(EXCLUDED, vkCmdDraw) \
+ DEV_PFN(EXCLUDED, vkCmdDrawIndexed) \
+ DEV_PFN(EXCLUDED, vkCmdDrawIndexedIndirect) \
+ DEV_PFN(EXCLUDED, vkCmdDrawIndexedIndirectCountAMD) \
+ DEV_PFN(EXCLUDED, vkCmdDrawIndexedIndirectCountKHR) \
+ DEV_PFN(EXCLUDED, vkCmdDrawIndirect) \
+ DEV_PFN(EXCLUDED, vkCmdDrawIndirectByteCountEXT) \
+ DEV_PFN(EXCLUDED, vkCmdDrawIndirectCountAMD) \
+ DEV_PFN(EXCLUDED, vkCmdDrawIndirectCountKHR) \
+ DEV_PFN(EXCLUDED, vkCmdDrawMeshTasksIndirectCountNV) \
+ DEV_PFN(EXCLUDED, vkCmdDrawMeshTasksIndirectNV) \
+ DEV_PFN(EXCLUDED, vkCmdDrawMeshTasksNV) \
+ DEV_PFN(EXCLUDED, vkCmdEndConditionalRenderingEXT) \
+ DEV_PFN(OPTIONAL, vkCmdEndDebugUtilsLabelEXT) \
+ DEV_PFN(EXCLUDED, vkCmdEndQuery) \
+ DEV_PFN(EXCLUDED, vkCmdEndQueryIndexedEXT) \
+ DEV_PFN(EXCLUDED, vkCmdEndRenderPass) \
+ DEV_PFN(EXCLUDED, vkCmdEndRenderPass2KHR) \
+ DEV_PFN(EXCLUDED, vkCmdEndTransformFeedbackEXT) \
+ DEV_PFN(REQUIRED, vkCmdExecuteCommands) \
+ DEV_PFN(REQUIRED, vkCmdFillBuffer) \
+ DEV_PFN(OPTIONAL, vkCmdInsertDebugUtilsLabelEXT) \
+ DEV_PFN(EXCLUDED, vkCmdNextSubpass) \
+ DEV_PFN(EXCLUDED, vkCmdNextSubpass2KHR) \
+ DEV_PFN(REQUIRED, vkCmdPipelineBarrier) \
+ DEV_PFN(EXCLUDED, vkCmdProcessCommandsNVX) \
+ DEV_PFN(REQUIRED, vkCmdPushConstants) \
+ DEV_PFN(OPTIONAL, vkCmdPushDescriptorSetKHR) \
+ DEV_PFN(EXCLUDED, vkCmdPushDescriptorSetWithTemplateKHR) \
+ DEV_PFN(EXCLUDED, vkCmdReserveSpaceForCommandsNVX) \
+ DEV_PFN(REQUIRED, vkCmdResetEvent) \
+ DEV_PFN(REQUIRED, vkCmdResetQueryPool) \
+ DEV_PFN(EXCLUDED, vkCmdResolveImage) \
+ DEV_PFN(EXCLUDED, vkCmdSetBlendConstants) \
+ DEV_PFN(EXCLUDED, vkCmdSetCheckpointNV) \
+ DEV_PFN(EXCLUDED, vkCmdSetCoarseSampleOrderNV) \
+ DEV_PFN(EXCLUDED, vkCmdSetDepthBias) \
+ DEV_PFN(EXCLUDED, vkCmdSetDepthBounds) \
+ DEV_PFN(EXCLUDED, vkCmdSetDeviceMask) \
+ DEV_PFN(EXCLUDED, vkCmdSetDeviceMaskKHR) \
+ DEV_PFN(EXCLUDED, vkCmdSetDiscardRectangleEXT) \
+ DEV_PFN(REQUIRED, vkCmdSetEvent) \
+ DEV_PFN(EXCLUDED, vkCmdSetExclusiveScissorNV) \
+ DEV_PFN(EXCLUDED, vkCmdSetLineWidth) \
+ DEV_PFN(EXCLUDED, vkCmdSetSampleLocationsEXT) \
+ DEV_PFN(EXCLUDED, vkCmdSetScissor) \
+ DEV_PFN(EXCLUDED, vkCmdSetStencilCompareMask) \
+ DEV_PFN(EXCLUDED, vkCmdSetStencilReference) \
+ DEV_PFN(EXCLUDED, vkCmdSetStencilWriteMask) \
+ DEV_PFN(EXCLUDED, vkCmdSetViewport) \
+ DEV_PFN(EXCLUDED, vkCmdSetViewportShadingRatePaletteNV) \
+ DEV_PFN(EXCLUDED, vkCmdSetViewportWScalingNV) \
+ DEV_PFN(EXCLUDED, vkCmdTraceRaysNV) \
+ DEV_PFN(REQUIRED, vkCmdUpdateBuffer) \
+ DEV_PFN(REQUIRED, vkCmdWaitEvents) \
+ DEV_PFN(EXCLUDED, vkCmdWriteAccelerationStructuresPropertiesNV) \
+ DEV_PFN(EXCLUDED, vkCmdWriteBufferMarkerAMD) \
+ DEV_PFN(REQUIRED, vkCmdWriteTimestamp) \
+ DEV_PFN(REQUIRED, vkEndCommandBuffer) \
+ DEV_PFN(EXCLUDED, vkResetCommandBuffer) \
+ DEV_PFN(EXCLUDED, vkAcquireNextImage2KHR) \
+ DEV_PFN(EXCLUDED, vkAcquireNextImageKHR) \
+ DEV_PFN(REQUIRED, vkAllocateCommandBuffers) \
+ DEV_PFN(REQUIRED, vkAllocateDescriptorSets) \
+ DEV_PFN(REQUIRED, vkAllocateMemory) \
+ DEV_PFN(EXCLUDED, vkBindAccelerationStructureMemoryNV) \
+ DEV_PFN(REQUIRED, vkBindBufferMemory) \
+ DEV_PFN(EXCLUDED, vkBindBufferMemory2) \
+ DEV_PFN(EXCLUDED, vkBindBufferMemory2KHR) \
+ DEV_PFN(REQUIRED, vkBindImageMemory) \
+ DEV_PFN(EXCLUDED, vkBindImageMemory2) \
+ DEV_PFN(EXCLUDED, vkBindImageMemory2KHR) \
+ DEV_PFN(EXCLUDED, vkCompileDeferredNV) \
+ DEV_PFN(EXCLUDED, vkCreateAccelerationStructureNV) \
+ DEV_PFN(REQUIRED, vkCreateBuffer) \
+ DEV_PFN(REQUIRED, vkCreateBufferView) \
+ DEV_PFN(REQUIRED, vkCreateCommandPool) \
+ DEV_PFN(REQUIRED, vkCreateComputePipelines) \
+ DEV_PFN(REQUIRED, vkCreateDescriptorPool) \
+ DEV_PFN(REQUIRED, vkCreateDescriptorSetLayout) \
+ DEV_PFN(EXCLUDED, vkCreateDescriptorUpdateTemplate) \
+ DEV_PFN(EXCLUDED, vkCreateDescriptorUpdateTemplateKHR) \
+ DEV_PFN(REQUIRED, vkCreateEvent) \
+ DEV_PFN(REQUIRED, vkCreateFence) \
+ DEV_PFN(EXCLUDED, vkCreateFramebuffer) \
+ DEV_PFN(EXCLUDED, vkCreateGraphicsPipelines) \
+ DEV_PFN(REQUIRED, vkCreateImage) \
+ DEV_PFN(EXCLUDED, vkCreateImageView) \
+ DEV_PFN(EXCLUDED, vkCreateIndirectCommandsLayoutNVX) \
+ DEV_PFN(EXCLUDED, vkCreateObjectTableNVX) \
+ DEV_PFN(REQUIRED, vkCreatePipelineCache) \
+ DEV_PFN(REQUIRED, vkCreatePipelineLayout) \
+ DEV_PFN(REQUIRED, vkCreateQueryPool) \
+ DEV_PFN(EXCLUDED, vkCreateRayTracingPipelinesNV) \
+ DEV_PFN(EXCLUDED, vkCreateRenderPass) \
+ DEV_PFN(EXCLUDED, vkCreateRenderPass2KHR) \
+ DEV_PFN(EXCLUDED, vkCreateSampler) \
+ DEV_PFN(EXCLUDED, vkCreateSamplerYcbcrConversion) \
+ DEV_PFN(EXCLUDED, vkCreateSamplerYcbcrConversionKHR) \
+ DEV_PFN(REQUIRED, vkCreateSemaphore) \
+ DEV_PFN(REQUIRED, vkCreateShaderModule) \
+ DEV_PFN(EXCLUDED, vkCreateSharedSwapchainsKHR) \
+ DEV_PFN(EXCLUDED, vkCreateSwapchainKHR) \
+ DEV_PFN(EXCLUDED, vkCreateValidationCacheEXT) \
+ DEV_PFN(EXCLUDED, vkDebugMarkerSetObjectNameEXT) \
+ DEV_PFN(EXCLUDED, vkDebugMarkerSetObjectTagEXT) \
+ DEV_PFN(EXCLUDED, vkDestroyAccelerationStructureNV) \
+ DEV_PFN(REQUIRED, vkDestroyBuffer) \
+ DEV_PFN(REQUIRED, vkDestroyBufferView) \
+ DEV_PFN(REQUIRED, vkDestroyCommandPool) \
+ DEV_PFN(REQUIRED, vkDestroyDescriptorPool) \
+ DEV_PFN(REQUIRED, vkDestroyDescriptorSetLayout) \
+ DEV_PFN(EXCLUDED, vkDestroyDescriptorUpdateTemplate) \
+ DEV_PFN(EXCLUDED, vkDestroyDescriptorUpdateTemplateKHR) \
+ DEV_PFN(REQUIRED, vkDestroyDevice) \
+ DEV_PFN(REQUIRED, vkDestroyEvent) \
+ DEV_PFN(REQUIRED, vkDestroyFence) \
+ DEV_PFN(EXCLUDED, vkDestroyFramebuffer) \
+ DEV_PFN(REQUIRED, vkDestroyImage) \
+ DEV_PFN(EXCLUDED, vkDestroyImageView) \
+ DEV_PFN(EXCLUDED, vkDestroyIndirectCommandsLayoutNVX) \
+ DEV_PFN(EXCLUDED, vkDestroyObjectTableNVX) \
+ DEV_PFN(REQUIRED, vkDestroyPipeline) \
+ DEV_PFN(REQUIRED, vkDestroyPipelineCache) \
+ DEV_PFN(REQUIRED, vkDestroyPipelineLayout) \
+ DEV_PFN(REQUIRED, vkDestroyQueryPool) \
+ DEV_PFN(EXCLUDED, vkDestroyRenderPass) \
+ DEV_PFN(EXCLUDED, vkDestroySampler) \
+ DEV_PFN(EXCLUDED, vkDestroySamplerYcbcrConversion) \
+ DEV_PFN(EXCLUDED, vkDestroySamplerYcbcrConversionKHR) \
+ DEV_PFN(REQUIRED, vkDestroySemaphore) \
+ DEV_PFN(REQUIRED, vkDestroyShaderModule) \
+ DEV_PFN(EXCLUDED, vkDestroySwapchainKHR) \
+ DEV_PFN(EXCLUDED, vkDestroyValidationCacheEXT) \
+ DEV_PFN(REQUIRED, vkDeviceWaitIdle) \
+ DEV_PFN(EXCLUDED, vkDisplayPowerControlEXT) \
+ DEV_PFN(REQUIRED, vkFlushMappedMemoryRanges) \
+ DEV_PFN(REQUIRED, vkFreeCommandBuffers) \
+ DEV_PFN(REQUIRED, vkFreeDescriptorSets) \
+ DEV_PFN(REQUIRED, vkFreeMemory) \
+ DEV_PFN(EXCLUDED, vkGetAccelerationStructureHandleNV) \
+ DEV_PFN(EXCLUDED, vkGetAccelerationStructureMemoryRequirementsNV) \
+ DEV_PFN(EXCLUDED, vkGetBufferDeviceAddressEXT) \
+ DEV_PFN(REQUIRED, vkGetBufferMemoryRequirements) \
+ DEV_PFN(EXCLUDED, vkGetBufferMemoryRequirements2) \
+ DEV_PFN(EXCLUDED, vkGetBufferMemoryRequirements2KHR) \
+ DEV_PFN(OPTIONAL, vkGetCalibratedTimestampsEXT) \
+ DEV_PFN(EXCLUDED, vkGetDescriptorSetLayoutSupport) \
+ DEV_PFN(EXCLUDED, vkGetDescriptorSetLayoutSupportKHR) \
+ DEV_PFN(EXCLUDED, vkGetDeviceGroupPeerMemoryFeatures) \
+ DEV_PFN(EXCLUDED, vkGetDeviceGroupPeerMemoryFeaturesKHR) \
+ DEV_PFN(EXCLUDED, vkGetDeviceGroupPresentCapabilitiesKHR) \
+ DEV_PFN(EXCLUDED, vkGetDeviceGroupSurfacePresentModesKHR) \
+ DEV_PFN(EXCLUDED, vkGetDeviceMemoryCommitment) \
+ DEV_PFN(REQUIRED, vkGetDeviceQueue) \
+ DEV_PFN(EXCLUDED, vkGetDeviceQueue2) \
+ DEV_PFN(REQUIRED, vkGetEventStatus) \
+ DEV_PFN(OPTIONAL, vkGetFenceFdKHR) \
+ DEV_PFN(REQUIRED, vkGetFenceStatus) \
+ DEV_PFN(EXCLUDED, vkGetImageDrmFormatModifierPropertiesEXT) \
+ DEV_PFN(REQUIRED, vkGetImageMemoryRequirements) \
+ DEV_PFN(EXCLUDED, vkGetImageMemoryRequirements2) \
+ DEV_PFN(EXCLUDED, vkGetImageMemoryRequirements2KHR) \
+ DEV_PFN(EXCLUDED, vkGetImageSparseMemoryRequirements) \
+ DEV_PFN(EXCLUDED, vkGetImageSparseMemoryRequirements2) \
+ DEV_PFN(EXCLUDED, vkGetImageSparseMemoryRequirements2KHR) \
+ DEV_PFN(EXCLUDED, vkGetImageSubresourceLayout) \
+ DEV_PFN(EXCLUDED, vkGetImageViewHandleNVX) \
+ DEV_PFN(EXCLUDED, vkGetMemoryFdKHR) \
+ DEV_PFN(EXCLUDED, vkGetMemoryFdPropertiesKHR) \
+ DEV_PFN(EXCLUDED, vkGetMemoryHostPointerPropertiesEXT) \
+ DEV_PFN(EXCLUDED, vkGetPastPresentationTimingGOOGLE) \
+ DEV_PFN(REQUIRED, vkGetPipelineCacheData) \
+ DEV_PFN(REQUIRED, vkGetQueryPoolResults) \
+ DEV_PFN(EXCLUDED, vkGetRayTracingShaderGroupHandlesNV) \
+ DEV_PFN(EXCLUDED, vkGetRefreshCycleDurationGOOGLE) \
+ DEV_PFN(EXCLUDED, vkGetRenderAreaGranularity) \
+ DEV_PFN(OPTIONAL, vkGetSemaphoreFdKHR) \
+ DEV_PFN(EXCLUDED, vkGetShaderInfoAMD) \
+ DEV_PFN(EXCLUDED, vkGetSwapchainCounterEXT) \
+ DEV_PFN(EXCLUDED, vkGetSwapchainImagesKHR) \
+ DEV_PFN(EXCLUDED, vkGetSwapchainStatusKHR) \
+ DEV_PFN(EXCLUDED, vkGetValidationCacheDataEXT) \
+ DEV_PFN(OPTIONAL, vkImportFenceFdKHR) \
+ DEV_PFN(OPTIONAL, vkImportSemaphoreFdKHR) \
+ DEV_PFN(REQUIRED, vkInvalidateMappedMemoryRanges) \
+ DEV_PFN(REQUIRED, vkMapMemory) \
+ DEV_PFN(REQUIRED, vkMergePipelineCaches) \
+ DEV_PFN(EXCLUDED, vkMergeValidationCachesEXT) \
+ DEV_PFN(EXCLUDED, vkRegisterDeviceEventEXT) \
+ DEV_PFN(EXCLUDED, vkRegisterDisplayEventEXT) \
+ DEV_PFN(EXCLUDED, vkRegisterObjectsNVX) \
+ DEV_PFN(EXCLUDED, vkResetCommandPool) \
+ DEV_PFN(REQUIRED, vkResetDescriptorPool) \
+ DEV_PFN(REQUIRED, vkResetEvent) \
+ DEV_PFN(REQUIRED, vkResetFences) \
+ DEV_PFN(OPTIONAL, vkResetQueryPool) \
+ DEV_PFN(OPTIONAL, vkResetQueryPoolEXT) \
+ DEV_PFN(OPTIONAL, vkSetDebugUtilsObjectNameEXT) \
+ DEV_PFN(OPTIONAL, vkSetDebugUtilsObjectTagEXT) \
+ DEV_PFN(REQUIRED, vkSetEvent) \
+ DEV_PFN(EXCLUDED, vkSetHdrMetadataEXT) \
+ DEV_PFN(EXCLUDED, vkSetLocalDimmingAMD) \
+ DEV_PFN(EXCLUDED, vkTrimCommandPool) \
+ DEV_PFN(EXCLUDED, vkTrimCommandPoolKHR) \
+ DEV_PFN(REQUIRED, vkUnmapMemory) \
+ DEV_PFN(EXCLUDED, vkUnregisterObjectsNVX) \
+ DEV_PFN(EXCLUDED, vkUpdateDescriptorSetWithTemplate) \
+ DEV_PFN(EXCLUDED, vkUpdateDescriptorSetWithTemplateKHR) \
+ DEV_PFN(REQUIRED, vkUpdateDescriptorSets) \
+ DEV_PFN(REQUIRED, vkWaitForFences) \
+ \
+ DEV_PFN(OPTIONAL, vkGetSemaphoreCounterValue) \
+ DEV_PFN(OPTIONAL, vkGetSemaphoreCounterValueKHR) \
+ DEV_PFN(OPTIONAL, vkWaitSemaphores) \
+ DEV_PFN(OPTIONAL, vkWaitSemaphoresKHR) \
+ DEV_PFN(OPTIONAL, vkSignalSemaphore) \
+ DEV_PFN(OPTIONAL, vkSignalSemaphoreKHR) \
+ \
+ INS_PFN(EXCLUDED, vkCreateDebugReportCallbackEXT) \
+ INS_PFN(OPTIONAL, vkCreateDebugUtilsMessengerEXT) \
+ INS_PFN(EXCLUDED, vkCreateDisplayPlaneSurfaceKHR) \
+ INS_PFN(EXCLUDED, vkCreateHeadlessSurfaceEXT) \
+ INS_PFN(EXCLUDED, vkDebugReportMessageEXT) \
+ INS_PFN(EXCLUDED, vkDestroyDebugReportCallbackEXT) \
+ INS_PFN(OPTIONAL, vkDestroyDebugUtilsMessengerEXT) \
+ INS_PFN(REQUIRED, vkDestroyInstance) \
+ INS_PFN(EXCLUDED, vkDestroySurfaceKHR) \
+ INS_PFN(EXCLUDED, vkEnumeratePhysicalDeviceGroups) \
+ INS_PFN(EXCLUDED, vkEnumeratePhysicalDeviceGroupsKHR) \
+ INS_PFN(REQUIRED, vkEnumeratePhysicalDevices) \
+ INS_PFN(EXCLUDED, vkSubmitDebugUtilsMessageEXT) \
+ INS_PFN(REQUIRED, vkCreateDevice) \
+ INS_PFN(EXCLUDED, vkCreateDisplayModeKHR) \
+ INS_PFN(REQUIRED, vkEnumerateDeviceExtensionProperties) \
+ INS_PFN(REQUIRED, vkEnumerateDeviceLayerProperties) \
+ INS_PFN(EXCLUDED, vkGetDisplayModeProperties2KHR) \
+ INS_PFN(EXCLUDED, vkGetDisplayModePropertiesKHR) \
+ INS_PFN(EXCLUDED, vkGetDisplayPlaneCapabilities2KHR) \
+ INS_PFN(EXCLUDED, vkGetDisplayPlaneCapabilitiesKHR) \
+ INS_PFN(EXCLUDED, vkGetDisplayPlaneSupportedDisplaysKHR) \
+ INS_PFN(OPTIONAL, vkGetPhysicalDeviceCalibrateableTimeDomainsEXT) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceCooperativeMatrixPropertiesNV) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceDisplayPlaneProperties2KHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceDisplayPlanePropertiesKHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceDisplayProperties2KHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceDisplayPropertiesKHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceExternalBufferProperties) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceExternalBufferPropertiesKHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceExternalFenceProperties) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceExternalFencePropertiesKHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceExternalImageFormatPropertiesNV) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceExternalSemaphoreProperties) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceExternalSemaphorePropertiesKHR) \
+ INS_PFN(REQUIRED, vkGetPhysicalDeviceFeatures) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceFeatures2) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceFeatures2KHR) \
+ INS_PFN(REQUIRED, vkGetPhysicalDeviceFormatProperties) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceFormatProperties2) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceFormatProperties2KHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceImageFormatProperties) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceImageFormatProperties2) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceImageFormatProperties2KHR) \
+ INS_PFN(REQUIRED, vkGetPhysicalDeviceMemoryProperties) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceMemoryProperties2) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceMemoryProperties2KHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceMultisamplePropertiesEXT) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDevicePresentRectanglesKHR) \
+ INS_PFN(REQUIRED, vkGetPhysicalDeviceProperties) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceProperties2) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceProperties2KHR) \
+ INS_PFN(REQUIRED, vkGetPhysicalDeviceQueueFamilyProperties) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceQueueFamilyProperties2) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceQueueFamilyProperties2KHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceSparseImageFormatProperties) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceSparseImageFormatProperties2) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceSparseImageFormatProperties2KHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceSurfaceCapabilities2EXT) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceSurfaceCapabilities2KHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceSurfaceCapabilitiesKHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceSurfaceFormats2KHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceSurfaceFormatsKHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceSurfacePresentModesKHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceSurfaceSupportKHR) \
+ INS_PFN(EXCLUDED, vkReleaseDisplayEXT) \
+ DEV_PFN(EXCLUDED, vkGetQueueCheckpointDataNV) \
+ DEV_PFN(OPTIONAL, vkQueueBeginDebugUtilsLabelEXT) \
+ DEV_PFN(EXCLUDED, vkQueueBindSparse) \
+ DEV_PFN(OPTIONAL, vkQueueEndDebugUtilsLabelEXT) \
+ DEV_PFN(OPTIONAL, vkQueueInsertDebugUtilsLabelEXT) \
+ DEV_PFN(EXCLUDED, vkQueuePresentKHR) \
+ DEV_PFN(REQUIRED, vkQueueSubmit) \
+ DEV_PFN(REQUIRED, vkQueueWaitIdle)
+
+#ifdef VK_USE_PLATFORM_ANDROID_KHR
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_ANDROID_KHR(INS_PFN, DEV_PFN) \
+ DEV_PFN(OPTIONAL, vkGetAndroidHardwareBufferPropertiesANDROID) \
+ DEV_PFN(OPTIONAL, vkGetMemoryAndroidHardwareBufferANDROID) \
+ INS_PFN(EXCLUDED, vkCreateAndroidSurfaceKHR)
+#else
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_ANDROID_KHR(INS_PFN, DEV_PFN)
+#endif // VK_USE_PLATFORM_ANDROID_KHR
+
+#ifdef VK_USE_PLATFORM_GGP
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_GGP(INS_PFN, DEV_PFN) \
+ INS_PFN(EXCLUDED, vkCreateStreamDescriptorSurfaceGGP)
+#else
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_GGP(INS_PFN, DEV_PFN)
+#endif // VK_USE_PLATFORM_GGP
+
+#ifdef VK_USE_PLATFORM_IOS_MVK
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_IOS_MVK(INS_PFN, DEV_PFN) \
+ INS_PFN(EXCLUDED, vkCreateIOSSurfaceMVK)
+#else
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_IOS_MVK(INS_PFN, DEV_PFN)
+#endif // VK_USE_PLATFORM_IOS_MVK
+
+#ifdef VK_USE_PLATFORM_FUCHSIA
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_FUSCHIA(INS_PFN, DEV_PFN) \
+ INS_PFN(EXCLUDED, vkCreateImagePipeSurfaceFUCHSIA)
+#else
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_FUSCHIA(INS_PFN, DEV_PFN)
+#endif // VK_USE_PLATFORM_FUCHSIA
+
+#ifdef VK_USE_PLATFORM_MACOS_MVK
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_MACOS_MVK(INS_PFN, DEV_PFN) \
+ INS_PFN(EXCLUDED, vkCreateMacOSSurfaceMVK)
+#else
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_MACOS_MVK(INS_PFN, DEV_PFN)
+#endif // VK_USE_PLATFORM_MACOS_MVK
+
+#ifdef VK_USE_PLATFORM_METAL_EXT
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_METAL_EXT(INS_PFN, DEV_PFN) \
+ INS_PFN(EXCLUDED, vkCreateMetalSurfaceEXT)
+#else
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_METAL_EXT(INS_PFN, DEV_PFN)
+#endif // VK_USE_PLATFORM_METAL_EXT
+
+#ifdef VK_USE_PLATFORM_VI_NN
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_VI_NN(INS_PFN, DEV_PFN) \
+ INS_PFN(EXCLUDED, vkCreateViSurfaceNN)
+#else
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_VI_NN(INS_PFN, DEV_PFN)
+#endif // VK_USE_PLATFORM_VI_NN
+
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_WAYLAND_KHR(INS_PFN, DEV_PFN) \
+ INS_PFN(EXCLUDED, vkCreateWaylandSurfaceKHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceWaylandPresentationSupportKHR)
+#else
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_WAYLAND_KHR(INS_PFN, DEV_PFN)
+#endif // VK_USE_PLATFORM_WAYLAND_KHR
+
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_WIN32_KHR(INS_PFN, DEV_PFN) \
+ DEV_PFN(EXCLUDED, vkAcquireFullScreenExclusiveModeEXT) \
+ DEV_PFN(EXCLUDED, vkGetDeviceGroupSurfacePresentModes2EXT) \
+ DEV_PFN(REQUIRED, vkGetFenceWin32HandleKHR) \
+ DEV_PFN(EXCLUDED, vkGetMemoryWin32HandleKHR) \
+ DEV_PFN(EXCLUDED, vkGetMemoryWin32HandleNV) \
+ DEV_PFN(EXCLUDED, vkGetMemoryWin32HandlePropertiesKHR) \
+ DEV_PFN(REQUIRED, vkGetSemaphoreWin32HandleKHR) \
+ DEV_PFN(REQUIRED, vkImportFenceWin32HandleKHR) \
+ DEV_PFN(REQUIRED, vkImportSemaphoreWin32HandleKHR) \
+ DEV_PFN(EXCLUDED, vkReleaseFullScreenExclusiveModeEXT) \
+ INS_PFN(EXCLUDED, vkCreateWin32SurfaceKHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceSurfacePresentModes2EXT) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceWin32PresentationSupportKHR)
+#else
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_WIN32_KHR(INS_PFN, DEV_PFN)
+#endif // VK_USE_PLATFORM_WIN32_KHR
+
+#ifdef VK_USE_PLATFORM_XCB_KHR
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_XCB_KHR(INS_PFN, DEV_PFN) \
+ INS_PFN(EXCLUDED, vkCreateXcbSurfaceKHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceXcbPresentationSupportKHR)
+#else
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_XCB_KHR(INS_PFN, DEV_PFN)
+#endif // VK_USE_PLATFORM_XCB_KHR
+
+#ifdef VK_USE_PLATFORM_XLIB_KHR
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_XLIB_KHR(INS_PFN, DEV_PFN) \
+ INS_PFN(EXCLUDED, vkCreateXlibSurfaceKHR) \
+ INS_PFN(EXCLUDED, vkGetPhysicalDeviceXlibPresentationSupportKHR)
+#else
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_XLIB_KHR(INS_PFN, DEV_PFN)
+#endif // VK_USE_PLATFORM_XLIB_KHR
+
+#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_XLIB_XRANDR_EXT(INS_PFN, DEV_PFN) \
+ INS_PFN(EXCLUDED, vkAcquireXlibDisplayEXT) \
+ INS_PFN(EXCLUDED, vkGetRandROutputDisplayEXT)
+#else
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_XLIB_XRANDR_EXT(INS_PFN, DEV_PFN)
+#endif // VK_USE_PLATFORM_XLIB_XRANDR_EXT
+
+#define IREE_VULKAN_DYNAMIC_SYMBOL_PLATFORM_TABLES(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_ANDROID_KHR(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_GGP(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_IOS_MVK(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_FUSCHIA(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_MACOS_MVK(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_METAL_EXT(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_VI_NN(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_WAYLAND_KHR(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_WIN32_KHR(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_XCB_KHR(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_XLIB_KHR(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_TABLE_XLIB_XRANDR_EXT(INS_PFN, DEV_PFN)
+
+#define IREE_VULKAN_DYNAMIC_SYMBOL_INSTANCE_DEVICE_TABLES(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_COMMON_TABLE(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_PLATFORM_TABLES(INS_PFN, DEV_PFN)
+
+#define IREE_VULKAN_DYNAMIC_SYMBOL_TABLES(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_INSTANCELESS_TABLE(INS_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_COMMON_TABLE(INS_PFN, DEV_PFN) \
+ IREE_VULKAN_DYNAMIC_SYMBOL_PLATFORM_TABLES(INS_PFN, DEV_PFN)
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_VULKAN_DYNAMIC_SYMBOL_TABLES_H_
diff --git a/runtime/src/iree/hal/vulkan/dynamic_symbols.cc b/runtime/src/iree/hal/vulkan/dynamic_symbols.cc
new file mode 100644
index 0000000..1b2bc9d
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/dynamic_symbols.cc
@@ -0,0 +1,265 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/dynamic_symbols.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/dynamic_symbol_tables.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+// Read-only table of function pointer information designed to be in .rdata.
+// To reduce binary size this structure is packed (knowing that we won't have
+// gigabytes of function pointers :).
+struct FunctionPtrInfo {
+ // Name of the function (like 'vkSomeFunction').
+ const char* function_name;
+ // 1 if the function pointer can be resolved via vkGetDeviceProcAddr.
+ uint32_t is_device : 1;
+ // 1 if the function is required and the loader should bail if not found.
+ uint32_t is_required : 1;
+ // TODO(benvanik): remove from table by manually walking sizeof(uintptr_t).
+ // An offset in bytes from the base of &syms to where the PFN_vkSomeFunction
+ // member is located.
+ uint32_t member_offset : 30;
+} IREE_ATTRIBUTE_PACKED;
+
+namespace {
+
+#define REQUIRED_PFN_FUNCTION_PTR(function_name, is_device) \
+ {#function_name, is_device, 1, offsetof(DynamicSymbols, function_name)},
+#define OPTIONAL_PFN_FUNCTION_PTR(function_name, is_device) \
+ {#function_name, is_device, 0, offsetof(DynamicSymbols, function_name)},
+#define EXCLUDED_PFN_FUNCTION_PTR(function_name, is_device)
+#define INS_PFN_FUNCTION_PTR(requirement, function_name) \
+ requirement##_PFN_FUNCTION_PTR(function_name, 0)
+#define DEV_PFN_FUNCTION_PTR(requirement, function_name) \
+ requirement##_PFN_FUNCTION_PTR(function_name, 1)
+
+// Defines the table of mandatory FunctionPtrInfos resolved prior to instance
+// creation. These are safe to call with no instance parameter and should be
+// exported by all loaders/ICDs.
+static constexpr const FunctionPtrInfo kInstancelessFunctionPtrInfos[] = {
+ IREE_VULKAN_DYNAMIC_SYMBOL_INSTANCELESS_TABLE(INS_PFN_FUNCTION_PTR)};
+
+// Defines the table of FunctionPtrInfos for dynamic loading that must wait
+// until an instance has been created to be resolved.
+static constexpr const FunctionPtrInfo kDynamicFunctionPtrInfos[] = {
+ IREE_VULKAN_DYNAMIC_SYMBOL_INSTANCE_DEVICE_TABLES(INS_PFN_FUNCTION_PTR,
+ DEV_PFN_FUNCTION_PTR)};
+
+static const char* kVulkanLoaderSearchNames[] = {
+#if defined(IREE_PLATFORM_ANDROID)
+ "libvulkan.so",
+#elif defined(IREE_PLATFORM_IOS) || defined(IREE_PLATFORM_MACOS)
+ "libvulkan.dylib",
+#elif defined(IREE_PLATFORM_WINDOWS)
+ "vulkan-1.dll",
+#else
+ "libvulkan.so.1",
+#endif // IREE_PLATFORM_ANDROID
+};
+
+iree_status_t ResolveFunctions(
+ DynamicSymbols* syms, const DynamicSymbols::GetProcAddrFn& get_proc_addr) {
+ // Resolve the method the shared object uses to resolve other functions.
+ // Some libraries will export all symbols while others will only export this
+ // single function.
+ syms->vkGetInstanceProcAddr = reinterpret_cast<PFN_vkGetInstanceProcAddr>(
+ get_proc_addr("vkGetInstanceProcAddr"));
+
+#if defined(IREE_PLATFORM_ANDROID)
+ // Since Android 8 Oreo, Android re-architected the OS framework with project
+ // Treble. Framework libraries and vendor libraries have a more strict and
+ // clear separation. Their dependencies are carefully scrutinized and only
+ // selected cases are allowed. This is enforced with linker namespaces.
+ //
+ // /data/local/tmp is the preferred directory for automating native binary
+ // tests built using NDK toolchain. They should be allowed to access libraries
+ // like libvulkan.so for their functionality. However, there was an issue
+ // with fully treblized Android 10 where /data/local/tmp did not have access
+ // to the linker namespaces needed by libvulkan.so. This is fixed via
+ // https://android.googlesource.com/platform/system/linkerconfig/+/296da5b1eb88a3527ee76352c2d987f82f3252eb
+ //
+ // But as typically in the Android system, it takes a long time to see the
+ // fix getting propagated, if ever. A known workaround is to symlink the
+ // vendor Vulkan implementation under /vendor/lib[64]/hw/vulkan.*.so as
+ // libvulkan.so under /data/local/tmp and use LD_LIBRARY_PATH=/data/local/tmp
+ // when invoking the test binaries. This effectively bypasses the Android
+ // Vulkan loader. This is fine for ARM Mali GPUs, whose driver exposes
+ // the symbol `vkGetInstanceProcAddr`. But for Qualcomm Adreno GPUs,
+ // the Vulkan implementation library does not directly expose the symbol.
+ // Instead it's hidden as `qglinternal::vkGetInstanceProcAddr`. So try to
+ // see whether we can get this symbol. This is a reasonable workaround
+ // as otherwise it means we need to wrap. every. single. binary. test.
+ // as. a. full-blown. Android. app.
+ if (!syms->vkGetInstanceProcAddr) {
+ syms->vkGetInstanceProcAddr =
+ reinterpret_cast<PFN_vkGetInstanceProcAddr>(get_proc_addr(
+ // C++ mangled name for "qglinternal::vkGetInstanceProcAddr"
+ "_ZN11qglinternal21vkGetInstanceProcAddrEP12VkInstance_TPKc"));
+ }
+#endif // IREE_PLATFORM_ANDROID
+
+ if (!syms->vkGetInstanceProcAddr) {
+ return iree_make_status(
+ IREE_STATUS_UNAVAILABLE,
+ "required method vkGetInstanceProcAddr not found in provided Vulkan "
+ "library (did you pick the wrong file?)");
+ }
+
+ // Resolve the mandatory functions that we need to create instances.
+ // If the provided |get_proc_addr| cannot resolve these then it's not a loader
+ // or ICD we want to use, anyway.
+ for (int i = 0; i < IREE_ARRAYSIZE(kInstancelessFunctionPtrInfos); ++i) {
+ const auto& function_ptr = kInstancelessFunctionPtrInfos[i];
+ auto* member_ptr = reinterpret_cast<PFN_vkVoidFunction*>(
+ reinterpret_cast<uint8_t*>(syms) + function_ptr.member_offset);
+ *member_ptr =
+ syms->vkGetInstanceProcAddr(VK_NULL_HANDLE, function_ptr.function_name);
+ if (*member_ptr == nullptr) {
+ return iree_make_status(
+ IREE_STATUS_UNAVAILABLE,
+ "mandatory Vulkan function %s not available; invalid loader/ICD?",
+ function_ptr.function_name);
+ }
+ }
+
+ return iree_ok_status();
+}
+
+} // namespace
+
+// static
+iree_status_t DynamicSymbols::Create(const GetProcAddrFn& get_proc_addr,
+ ref_ptr<DynamicSymbols>* out_syms) {
+ IREE_TRACE_SCOPE0("DynamicSymbols::Create");
+
+ auto syms = make_ref<DynamicSymbols>();
+ IREE_RETURN_IF_ERROR(ResolveFunctions(syms.get(), get_proc_addr));
+ syms->FixupExtensionFunctions();
+
+ *out_syms = std::move(syms);
+ return iree_ok_status();
+}
+
+// static
+iree_status_t DynamicSymbols::CreateFromSystemLoader(
+ ref_ptr<DynamicSymbols>* out_syms) {
+ IREE_TRACE_SCOPE0("DynamicSymbols::CreateFromSystemLoader");
+
+ iree_dynamic_library_t* loader_library = NULL;
+ iree_status_t status = iree_dynamic_library_load_from_files(
+ IREE_ARRAYSIZE(kVulkanLoaderSearchNames), kVulkanLoaderSearchNames,
+ IREE_DYNAMIC_LIBRARY_FLAG_NONE, iree_allocator_system(), &loader_library);
+ if (iree_status_is_not_found(status)) {
+ iree_status_ignore(status);
+ return iree_make_status(
+ IREE_STATUS_UNAVAILABLE,
+ "Vulkan runtime library not available; ensure installed and on path");
+ } else if (!iree_status_is_ok(status)) {
+ return status;
+ }
+
+ auto syms = make_ref<DynamicSymbols>();
+ syms->loader_library_ = loader_library;
+
+ IREE_RETURN_IF_ERROR(
+ ResolveFunctions(syms.get(), [loader_library](const char* function_name) {
+ PFN_vkVoidFunction fn = NULL;
+ iree_status_t status = iree_dynamic_library_lookup_symbol(
+ loader_library, function_name, (void**)&fn);
+ if (!iree_status_is_ok(status)) {
+ IREE_IGNORE_ERROR(status);
+ return (PFN_vkVoidFunction)NULL;
+ }
+ return fn;
+ }));
+ syms->FixupExtensionFunctions();
+
+ *out_syms = std::move(syms);
+ return iree_ok_status();
+}
+
+iree_status_t DynamicSymbols::LoadFromInstance(VkInstance instance) {
+ IREE_TRACE_SCOPE0("DynamicSymbols::LoadFromInstance");
+ return LoadFromDevice(instance, VK_NULL_HANDLE);
+}
+
+iree_status_t DynamicSymbols::LoadFromDevice(VkInstance instance,
+ VkDevice device) {
+ IREE_TRACE_SCOPE0("DynamicSymbols::LoadFromDevice");
+
+ if (!instance) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "instance must have been created and a default "
+ "instance proc lookup function is required");
+ }
+
+ // Setup the lookup methods first. The rest of the syms uses these to
+ // resolve function pointers.
+ this->vkGetDeviceProcAddr = reinterpret_cast<PFN_vkGetDeviceProcAddr>(
+ this->vkGetInstanceProcAddr(instance, "vkGetDeviceProcAddr"));
+ if (!this->vkGetDeviceProcAddr) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "required Vulkan function vkGetDeviceProcAddr not "
+ "available; invalid driver handle?");
+ }
+
+ // Load the rest of the functions.
+ for (int i = 0; i < IREE_ARRAYSIZE(kDynamicFunctionPtrInfos); ++i) {
+ const auto& function_ptr = kDynamicFunctionPtrInfos[i];
+ auto* member_ptr = reinterpret_cast<PFN_vkVoidFunction*>(
+ reinterpret_cast<uint8_t*>(this) + function_ptr.member_offset);
+ if (function_ptr.is_device && device) {
+ *member_ptr =
+ this->vkGetDeviceProcAddr(device, function_ptr.function_name);
+ } else {
+ *member_ptr =
+ this->vkGetInstanceProcAddr(instance, function_ptr.function_name);
+ }
+ if (*member_ptr == nullptr && function_ptr.is_required) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "required Vulkan function %s not available",
+ function_ptr.function_name);
+ }
+ }
+
+ FixupExtensionFunctions();
+
+ return iree_ok_status();
+}
+
+DynamicSymbols::DynamicSymbols() = default;
+
+DynamicSymbols::~DynamicSymbols() {
+ if (loader_library_) {
+ iree_dynamic_library_release(loader_library_);
+ }
+}
+
+void DynamicSymbols::FixupExtensionFunctions() {
+ this->vkGetSemaphoreCounterValue = this->vkGetSemaphoreCounterValue
+ ? this->vkGetSemaphoreCounterValue
+ : this->vkGetSemaphoreCounterValueKHR;
+ this->vkWaitSemaphores = this->vkWaitSemaphores ? this->vkWaitSemaphores
+ : this->vkWaitSemaphoresKHR;
+ this->vkSignalSemaphore = this->vkSignalSemaphore
+ ? this->vkSignalSemaphore
+ : this->vkSignalSemaphoreKHR;
+}
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
diff --git a/runtime/src/iree/hal/vulkan/dynamic_symbols.h b/runtime/src/iree/hal/vulkan/dynamic_symbols.h
new file mode 100644
index 0000000..d02c0ea
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/dynamic_symbols.h
@@ -0,0 +1,128 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_DYNAMIC_SYMBOLS_H_
+#define IREE_HAL_VULKAN_DYNAMIC_SYMBOLS_H_
+
+// clang-format off: must be included before all other headers.
+#include "iree/hal/vulkan/vulkan_headers.h" // IWYU pragma: export
+// clang-format on
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/dynamic_library.h"
+#include "iree/base/status_cc.h"
+#include "iree/hal/vulkan/dynamic_symbol_tables.h" // IWYU pragma: export
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+struct FunctionPtrInfo;
+
+// Dynamic Vulkan function loader for use with vulkan.hpp.
+// This loader is a subset of the DispatchLoaderDynamic implementation that only
+// loads functions we are interested in (a compute-specific subset) and avoids
+// extensions we will never use.
+//
+// This exposes all Vulkan methods as function pointer members. Optional
+// methods will be nullptr if not present. Excluded methods will be omitted.
+//
+// DynamicSymbols instances are designed to be passed to vulkan.hpp methods as
+// the last argument, though they may also be called directly.
+// **Always make sure to pass the loader to vulkan.hpp methods!**
+//
+// Loading is performed by walking a table of required and optional functions
+// (defined in dynamic_symbol_tables.h) and populating the member function
+// pointers exposed on this struct when available. For example, if the
+// vkSomeFunction method is marked in the table as OPTIONAL the loader will
+// attempt to lookup the function and if successful set the
+// DynamicSymbols::vkSomeFunction pointer to the resolved address. If the
+// function is not found then it will be set to nullptr so users can check for
+// function availability.
+//
+// Documentation:
+// https://github.com/KhronosGroup/Vulkan-Hpp#extensions--per-device-function-pointers
+//
+// Usage:
+// IREE_ASSIGN_OR_RETURN(auto syms, DynamicSymbols::CreateFromSystemLoader());
+// VkInstance instance = VK_NULL_HANDLE;
+// syms->vkCreateInstance(..., &instance);
+// IREE_RETURN_IF_ERROR(syms->LoadFromInstance(instance));
+struct DynamicSymbols : public RefObject<DynamicSymbols> {
+ using GetProcAddrFn =
+ std::function<PFN_vkVoidFunction(const char* function_name)>;
+
+ DynamicSymbols();
+ ~DynamicSymbols();
+
+ // Creates the dynamic symbol table using the given |get_proc_addr| to resolve
+ // the vkCreateInstance function.
+ //
+ // After the instance is created the caller must use LoadFromInstance (or
+ // LoadFromDevice) to load the remaining symbols.
+ static iree_status_t Create(const GetProcAddrFn& get_proc_addr,
+ ref_ptr<DynamicSymbols>* out_syms);
+
+ // Loads all required and optional Vulkan functions from the Vulkan loader.
+ // This will look for a Vulkan loader on the system (like libvulkan.so) and
+ // dlsym the functions from that.
+ //
+ // The loaded function pointers will point to thunks in the ICD. This may
+ // enable additional debug checking and more readable stack traces (as
+ // errors come from within the ICD, where we have symbols).
+ static iree_status_t CreateFromSystemLoader(
+ ref_ptr<DynamicSymbols>* out_syms);
+
+ // Loads all required and optional Vulkan functions from the given instance.
+ //
+ // The loaded function pointers will point to thunks in the ICD. This may
+ // enable additional debug checking and more readable stack traces (as
+ // errors come from within the ICD, where we have symbols).
+ iree_status_t LoadFromInstance(VkInstance instance);
+
+ // Loads all required and optional Vulkan functions from the given device,
+ // falling back to the instance when required.
+ //
+ // This attempts to directly query the methods from the device, bypassing any
+ // ICD or shim layers. These methods will generally have less overhead at
+ // runtime as they need not jump through the various trampolines.
+ iree_status_t LoadFromDevice(VkInstance instance, VkDevice device);
+
+ // Define members for each function pointer.
+ // See dynamic_symbol_tables.h for the full list of methods.
+ //
+ // Each required and optional function in the loader tables will expand to
+ // the following member, such as for example 'vkSomeFunction':
+ // PFN_vkSomeFunction vkSomeFunction;
+#define REQUIRED_PFN(function_name) PFN_##function_name function_name = nullptr
+#define OPTIONAL_PFN(function_name) PFN_##function_name function_name = nullptr
+#define EXCLUDED_PFN(function_name)
+#define PFN_MEMBER(requirement, function_name) requirement##_PFN(function_name);
+ REQUIRED_PFN(vkGetInstanceProcAddr);
+ REQUIRED_PFN(vkGetDeviceProcAddr);
+ IREE_VULKAN_DYNAMIC_SYMBOL_TABLES(PFN_MEMBER, PFN_MEMBER);
+#undef REQUIRED_PFN
+#undef OPTIONAL_PFN
+#undef EXCLUDED_PFN
+#undef PFN_MEMBER
+
+ private:
+ void FixupExtensionFunctions();
+
+ // Optional Vulkan Loader dynamic library.
+ iree_dynamic_library_t* loader_library_ = nullptr;
+};
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_VULKAN_DYNAMIC_SYMBOLS_H_
diff --git a/runtime/src/iree/hal/vulkan/dynamic_symbols_test.cc b/runtime/src/iree/hal/vulkan/dynamic_symbols_test.cc
new file mode 100644
index 0000000..4d96c92
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/dynamic_symbols_test.cc
@@ -0,0 +1,63 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/dynamic_symbols.h"
+
+#include "iree/base/api.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+namespace {
+
+VkApplicationInfo GetApplicationInfo() {
+ VkApplicationInfo app_info;
+ app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
+ app_info.pNext = nullptr;
+ app_info.pApplicationName = "IREE-ML-TEST";
+ app_info.applicationVersion = 0;
+ app_info.pEngineName = "IREE";
+ app_info.engineVersion = 0;
+ app_info.apiVersion = VK_API_VERSION_1_0;
+ return app_info;
+}
+
+VkInstanceCreateInfo GetInstanceCreateInfo(VkApplicationInfo* app_info) {
+ VkInstanceCreateInfo create_info;
+ create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
+ create_info.pNext = nullptr;
+ create_info.flags = 0;
+ create_info.pApplicationInfo = app_info;
+ create_info.enabledLayerCount = 0;
+ create_info.ppEnabledLayerNames = nullptr;
+ create_info.enabledExtensionCount = 0;
+ create_info.ppEnabledExtensionNames = nullptr;
+ return create_info;
+}
+
+TEST(DynamicSymbolsTest, CreateFromSystemLoader) {
+ iree::ref_ptr<iree::hal::vulkan::DynamicSymbols> syms;
+ IREE_ASSERT_OK(DynamicSymbols::CreateFromSystemLoader(&syms));
+
+ // Create and destroy a VkInstance using the symbols. This is mainly testing
+ // that the symbols were loaded successfully and are actually able to be used.
+ VkApplicationInfo app_info = GetApplicationInfo();
+ VkInstanceCreateInfo create_info = GetInstanceCreateInfo(&app_info);
+ VkInstance instance = VK_NULL_HANDLE;
+ ASSERT_EQ(VK_SUCCESS, syms->vkCreateInstance(
+ &create_info, /*pAllocator=*/nullptr, &instance));
+
+ IREE_ASSERT_OK(syms->LoadFromInstance(instance));
+
+ syms->vkDestroyInstance(instance, /*pAllocator=*/nullptr);
+}
+
+} // namespace
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
diff --git a/runtime/src/iree/hal/vulkan/emulated_semaphore.cc b/runtime/src/iree/hal/vulkan/emulated_semaphore.cc
new file mode 100644
index 0000000..f83f20e
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/emulated_semaphore.cc
@@ -0,0 +1,649 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/emulated_semaphore.h"
+
+#include <atomic>
+#include <cinttypes>
+#include <cstddef>
+#include <cstdint>
+#include <ostream>
+#include <vector>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/logging.h"
+#include "iree/base/status_cc.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/serializing_command_queue.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/util/intrusive_list.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+namespace {
+
+class RAIILock {
+ public:
+ explicit RAIILock(iree_slim_mutex_t* mu)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis)
+ : mu_(mu) {
+ iree_slim_mutex_lock(mu_);
+ }
+ ~RAIILock() IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis) {
+ iree_slim_mutex_unlock(mu_);
+ }
+
+ private:
+ iree_slim_mutex_t* mu_;
+};
+
+} // namespace
+
+class EmulatedTimelineSemaphore final {
+ public:
+ EmulatedTimelineSemaphore(VkDeviceHandle* logical_device,
+ TimePointSemaphorePool* semaphore_pool,
+ iree_host_size_t command_queue_count,
+ iree::hal::vulkan::CommandQueue** command_queues,
+ uint64_t initial_value);
+
+ ~EmulatedTimelineSemaphore();
+
+ iree_status_t Query(uint64_t* out_value);
+
+ iree_status_t Signal(uint64_t value);
+
+ iree_status_t Wait(uint64_t value, iree_timeout_t timeout);
+
+ void Fail(iree_status_t status);
+
+ // Gets a binary semaphore for waiting on the timeline to advance to the given
+ // |value|. The semaphore returned won't be waited by anyone else. Returns
+ // VK_NULL_HANDLE if no available semaphores for the given |value|.
+ // |wait_fence| is the fence associated with the queue submission that waiting
+ // on this semaphore.
+ VkSemaphore GetWaitSemaphore(uint64_t value,
+ const ref_ptr<TimePointFence>& wait_fence);
+
+ // Cancels the waiting attempt on the given binary |semaphore|. This allows
+ // the |semaphore| to be waited by others.
+ iree_status_t CancelWaitSemaphore(VkSemaphore semaphore);
+
+ // Gets a binary semaphore for signaling the timeline to the given |value|.
+ // |value| must be smaller than the current timeline value. |signal_fence| is
+ // the fence associated with the queue submission that signals this semaphore.
+ iree_status_t GetSignalSemaphore(uint64_t value,
+ const ref_ptr<TimePointFence>& signal_fence,
+ VkSemaphore* out_handle);
+
+ private:
+ // Tries to advance the timeline to the given |to_upper_value| without
+ // blocking and returns whether the |to_upper_value| is reached.
+ iree_status_t TryToAdvanceTimeline(uint64_t to_upper_value,
+ bool* out_reached_upper_value);
+ // Similar to the above, but also returns the fences that are known to have
+ // already signaled via |signaled_fences|.
+ iree_status_t TryToAdvanceTimeline(uint64_t to_upper_value,
+ bool* out_reached_upper_value,
+ std::vector<VkFence>* out_signaled_fences);
+
+ std::atomic<uint64_t> signaled_value_;
+
+ VkDeviceHandle* logical_device_;
+ TimePointSemaphorePool* semaphore_pool_;
+
+ iree_host_size_t command_queue_count_;
+ CommandQueue** command_queues_;
+
+ mutable iree_slim_mutex_t mutex_;
+
+ // A list of outstanding semaphores used to emulate time points.
+ //
+ // The life time of each semaphore is in one of the following state:
+ //
+ // * Unused state: value = UINT64_MAX, signal/wait fence = nullptr. This is
+ // the state of the semaphore when it's initially acquired from the pool and
+ // not put in the queue for emulating a time point yet.
+ // * Pending state: signaled value < value < UINT64_MAX, signal fence =
+ // <some-fence>, wait fence == nullptr. This is the state of the semaphore
+ // when it's put into the GPU queue for emulating a time point.
+ // * Pending and waiting state: signaled value < value < UINT64_MAX, signal
+ // fence = <some-fence>, wait fence == <some-fence>. This is the state of
+ // the semaphore when it's put into the GPU queue for emulating a time
+ // point and there is another queue submission waiting on it in GPU.
+ // * Signaled and not ever waited state: value <= signaled value, singal/wait
+ // fence = nullptr. This is the state of the semaphore when we know it's
+ // already signaled on GPU and there is no waiters for it.
+ // * Signaled and waiting state: value <= signaled value, signal fence =
+ // nullptr, wait fence = <some-fence>. This is the state of the semaphore
+ // when we know it's already signaled on GPU and there is still one queue
+ // submission on GPU is waiting for it.
+ IntrusiveList<TimePointSemaphore> outstanding_semaphores_
+ IREE_GUARDED_BY(mutex_);
+
+ // NOTE: We only need to access this status (and thus take the lock) when we
+ // want to either signal failure or query the status in the case of the
+ // semaphore being set to UINT64_MAX.
+ iree_status_t status_ IREE_GUARDED_BY(mutex_) = iree_ok_status();
+};
+
+EmulatedTimelineSemaphore::EmulatedTimelineSemaphore(
+ VkDeviceHandle* logical_device, TimePointSemaphorePool* semaphore_pool,
+ iree_host_size_t command_queue_count, CommandQueue** command_queues,
+ uint64_t initial_value)
+ : signaled_value_(initial_value),
+ logical_device_(logical_device),
+ semaphore_pool_(semaphore_pool),
+ command_queue_count_(command_queue_count),
+ command_queues_(command_queues) {
+ iree_slim_mutex_initialize(&mutex_);
+}
+
+EmulatedTimelineSemaphore::~EmulatedTimelineSemaphore() {
+ IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::dtor");
+ IREE_CHECK_OK(
+ TryToAdvanceTimeline(UINT64_MAX, /*out_reached_upper_value=*/NULL));
+
+ iree_slim_mutex_lock(&mutex_);
+ IREE_CHECK(outstanding_semaphores_.empty())
+ << "Destroying an emulated timeline semaphore without first waiting on "
+ "outstanding signals";
+ iree_status_free(status_);
+ iree_slim_mutex_unlock(&mutex_);
+ iree_slim_mutex_deinitialize(&mutex_);
+}
+
+iree_status_t EmulatedTimelineSemaphore::Query(uint64_t* out_value) {
+ IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::Query");
+ IREE_DVLOG(2) << "EmulatedTimelineSemaphore::Query";
+ IREE_RETURN_IF_ERROR(
+ TryToAdvanceTimeline(UINT64_MAX, /*out_reached_upper_value=*/NULL));
+ uint64_t value = signaled_value_.load();
+ IREE_DVLOG(2) << "Current timeline value: " << value;
+ if (value == UINT64_MAX) {
+ RAIILock locker(&mutex_);
+ return iree_status_clone(status_);
+ }
+ *out_value = value;
+ return iree_ok_status();
+}
+
+iree_status_t EmulatedTimelineSemaphore::Signal(uint64_t value) {
+ IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::Signal");
+ IREE_DVLOG(2) << "EmulatedTimelineSemaphore::Signal";
+ auto signaled_value = signaled_value_.exchange(value);
+ IREE_DVLOG(2) << "Previous value: " << signaled_value
+ << "; new value: " << value;
+ // Make sure the previous signaled value is smaller than the new value.
+ IREE_CHECK(signaled_value < value)
+ << "Attempting to signal a timeline value out of order; trying " << value
+ << " but " << signaled_value << " already signaled";
+
+ // Inform the device to make progress given we have a new value signaled now.
+ for (iree_host_size_t i = 0; i < command_queue_count_; ++i) {
+ IREE_RETURN_IF_ERROR(((SerializingCommandQueue*)command_queues_[i])
+ ->AdvanceQueueSubmission());
+ }
+
+ return iree_ok_status();
+}
+
+iree_status_t EmulatedTimelineSemaphore::Wait(uint64_t value,
+ iree_timeout_t timeout) {
+ IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::Wait");
+ IREE_DVLOG(2) << "EmulatedTimelineSemaphore::Wait";
+
+ iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
+
+ VkFence fence = VK_NULL_HANDLE;
+ do {
+ IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::Wait#loop");
+ // First try to advance the timeline without blocking to see whether we've
+ // already reached the desired value.
+ bool reached_desired_value = false;
+ IREE_RETURN_IF_ERROR(TryToAdvanceTimeline(value, &reached_desired_value));
+ if (reached_desired_value) return iree_ok_status();
+
+ // We must wait now. Find the first emulated time point that has a value >=
+ // the desired value so we can wait on its associated signal fence to make
+ // sure the timeline is advanced to the desired value.
+ RAIILock locker(&mutex_);
+ auto semaphore = outstanding_semaphores_.begin();
+ for (; semaphore != outstanding_semaphores_.end(); ++semaphore) {
+ if ((*semaphore)->value >= value) break;
+ }
+ if (semaphore != outstanding_semaphores_.end()) {
+ if (!(*semaphore)->signal_fence) {
+ return iree_make_status(IREE_STATUS_INTERNAL,
+ "timeline should have a signal fence for the "
+ "first time point beyond the signaled value");
+ }
+ IREE_DVLOG(2) << "Found timepoint semaphore " << *semaphore
+ << " (value: " << (*semaphore)->value
+ << ") to wait for desired timeline value: " << value;
+ fence = (*semaphore)->signal_fence->value();
+ // Found; we can break the loop and proceed to waiting now.
+ break;
+ }
+ // TODO(antiagainst): figure out a better way instead of the busy loop here.
+ } while (iree_time_now() < deadline_ns);
+
+ if (fence == VK_NULL_HANDLE) {
+ // NOTE: not an error; it may be expected that the semaphore is not ready.
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+
+ uint64_t timeout_ns =
+ static_cast<uint64_t>(iree_absolute_deadline_to_timeout_ns(deadline_ns));
+ VK_RETURN_IF_ERROR(logical_device_->syms()->vkWaitForFences(
+ *logical_device_, /*fenceCount=*/1, &fence,
+ /*waitAll=*/true, timeout_ns),
+ "vkWaitForFences");
+
+ return TryToAdvanceTimeline(value, /*out_reached_upper_value=*/NULL);
+}
+
+void EmulatedTimelineSemaphore::Fail(iree_status_t status) {
+ IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::Fail");
+ RAIILock locker(&mutex_);
+ if (status_) return;
+ status_ = status;
+ signaled_value_.store(UINT64_MAX);
+}
+
+VkSemaphore EmulatedTimelineSemaphore::GetWaitSemaphore(
+ uint64_t value, const ref_ptr<TimePointFence>& wait_fence) {
+ IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::GetWaitSemaphore");
+ IREE_DVLOG(2) << "EmulatedTimelineSemaphore::GetWaitSemaphore";
+
+ RAIILock locker(&mutex_);
+
+ VkSemaphore semaphore = VK_NULL_HANDLE;
+ for (TimePointSemaphore* point : outstanding_semaphores_) {
+ if (point->value > value && point->wait_fence) {
+ point->wait_fence = add_ref(wait_fence);
+ semaphore = point->semaphore;
+ break;
+ }
+ }
+
+ IREE_DVLOG(2) << "Binary VkSemaphore to wait on for timeline value (" << value
+ << ") and wait fence (" << wait_fence.get()
+ << "): " << semaphore;
+
+ return semaphore;
+}
+
+iree_status_t EmulatedTimelineSemaphore::CancelWaitSemaphore(
+ VkSemaphore semaphore) {
+ IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::CancelWaitSemaphore");
+ IREE_DVLOG(2) << "EmulatedTimelineSemaphore::CancelWaitSemaphore";
+
+ RAIILock locker(&mutex_);
+ for (TimePointSemaphore* point : outstanding_semaphores_) {
+ if (point->semaphore != semaphore) continue;
+
+ if (!point->wait_fence) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "time point wasn't waited before");
+ }
+ point->wait_fence = nullptr;
+ IREE_DVLOG(2) << "Cancelled waiting on binary VkSemaphore: " << semaphore;
+ return iree_ok_status();
+ }
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "no time point for the given semaphore");
+}
+
+iree_status_t EmulatedTimelineSemaphore::GetSignalSemaphore(
+ uint64_t value, const ref_ptr<TimePointFence>& signal_fence,
+ VkSemaphore* out_handle) {
+ IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::GetSignalSemaphore");
+ IREE_DVLOG(2) << "EmulatedTimelineSemaphore::GetSignalSemaphore";
+
+ if (signaled_value_.load() >= value) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "timeline semaphore already signaled past %" PRIu64,
+ value);
+ }
+
+ RAIILock locker(&mutex_);
+
+ auto insertion_point = outstanding_semaphores_.begin();
+ while (insertion_point != outstanding_semaphores_.end()) {
+ if ((*insertion_point)->value > value) break;
+ }
+
+ TimePointSemaphore* semaphore = NULL;
+ IREE_RETURN_IF_ERROR(semaphore_pool_->Acquire(&semaphore));
+ semaphore->value = value;
+ semaphore->signal_fence = add_ref(signal_fence);
+ if (semaphore->wait_fence) {
+ return iree_make_status(
+ IREE_STATUS_INTERNAL,
+ "newly acquired time point semaphore should not have waiters");
+ }
+ outstanding_semaphores_.insert(insertion_point, semaphore);
+ IREE_DVLOG(2) << "Timepoint semaphore to signal for timeline value (" << value
+ << ") and wait fence (" << signal_fence.get()
+ << "): " << semaphore
+ << " (binary VkSemaphore: " << semaphore->semaphore << ")";
+
+ *out_handle = semaphore->semaphore;
+ return iree_ok_status();
+}
+
+iree_status_t EmulatedTimelineSemaphore::TryToAdvanceTimeline(
+ uint64_t to_upper_value, bool* out_reached_upper_value) {
+ std::vector<VkFence> signaled_fences;
+ iree_status_t status = TryToAdvanceTimeline(
+ to_upper_value, out_reached_upper_value, &signaled_fences);
+ // Inform the queue that some fences are known to have signaled. This should
+ // happen here instead of inside the other TryToAdvanceTimeline to avoid
+ // potential mutex deadlock, given here we are not holding a mutex anymore.
+ if (!signaled_fences.empty()) {
+ for (iree_host_size_t i = 0; i < command_queue_count_; ++i) {
+ ((SerializingCommandQueue*)command_queues_[i])
+ ->SignalFences(signaled_fences);
+ }
+ }
+ return status;
+}
+
+iree_status_t EmulatedTimelineSemaphore::TryToAdvanceTimeline(
+ uint64_t to_upper_value, bool* out_reached_upper_value,
+ std::vector<VkFence>* out_signaled_fences) {
+ IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::TryToAdvanceTimeline");
+ IREE_DVLOG(3) << "EmulatedTimelineSemaphore::TryToAdvanceTimeline";
+ if (out_reached_upper_value) *out_reached_upper_value = false;
+
+ uint64_t past_value = signaled_value_.load();
+ IREE_DVLOG(3) << "Current timeline value: " << past_value
+ << "; desired timeline value: " << to_upper_value;
+
+ // Fast path for when already signaled past the desired value.
+ if (past_value >= to_upper_value) {
+ if (out_reached_upper_value) *out_reached_upper_value = true;
+ return iree_ok_status();
+ }
+
+ // We hold the lock during the entire resolve process so that we can resolve
+ // to the furthest possible value.
+ RAIILock locker(&mutex_);
+
+ IREE_DVLOG(3) << "# outstanding semaphores: "
+ << outstanding_semaphores_.size();
+
+ // The timeline has not signaled past the desired value and there is no
+ // binary semaphore pending on GPU yet: certainly the timeline cannot
+ // advance to the desired value.
+ if (outstanding_semaphores_.empty()) return iree_ok_status();
+
+ IntrusiveList<TimePointSemaphore> resolved_semaphores;
+
+ auto clear_signal_fence =
+ [&out_signaled_fences](ref_ptr<TimePointFence>& fence) {
+ if (fence) {
+ if (out_signaled_fences)
+ out_signaled_fences->push_back(fence->value());
+ fence.reset();
+ }
+ };
+
+ bool keep_resolving = true;
+ bool reached_desired_value = false;
+ while (keep_resolving && !outstanding_semaphores_.empty()) {
+ auto* semaphore = outstanding_semaphores_.front();
+ IREE_DVLOG(3) << "Looking at timepoint semaphore " << semaphore << "..";
+ IREE_DVLOG(3) << " value: " << semaphore->value;
+ IREE_DVLOG(3) << " VkSemaphore: " << semaphore->semaphore;
+ IREE_DVLOG(3) << " signal fence: " << semaphore->signal_fence.get();
+ IREE_DVLOG(3) << " wait fence: " << semaphore->wait_fence.get();
+
+ // If the current semaphore is for a value beyond our upper limit, then
+ // early exit so that we don't spend time dealing with signals we don't yet
+ // care about. This can prevent live lock where one thread is signaling
+ // fences as fast/faster than another thread can consume them.
+ if (semaphore->value > to_upper_value) {
+ keep_resolving = false;
+ reached_desired_value = true;
+ break;
+ }
+
+ // If the current semaphore is for a value not greater than the past
+ // signaled value, then we know it was signaled previously. But there might
+ // be a waiter on it on GPU.
+ if (semaphore->value <= past_value) {
+ if (semaphore->signal_fence) {
+ return iree_make_status(IREE_STATUS_INTERNAL,
+ "timeline should already signaled past this "
+ "time point and cleared the signal fence");
+ }
+
+ // If ther is no waiters, we can recycle this semaphore now. If there
+ // exists one waiter, then query its status and recycle on success. We
+ // only handle success status here. Others will be handled when the fence
+ // is checked for other semaphores' signaling status for the same queue
+ // submission.
+ if (!semaphore->wait_fence ||
+ semaphore->wait_fence->GetStatus() == VK_SUCCESS) {
+ clear_signal_fence(semaphore->signal_fence);
+ semaphore->wait_fence = nullptr;
+ outstanding_semaphores_.erase(semaphore);
+ resolved_semaphores.push_back(semaphore);
+ IREE_DVLOG(3) << "Resolved and recycling semaphore " << semaphore;
+ }
+
+ continue;
+ }
+
+ // This semaphore represents a value gerater than the known previously
+ // signaled value. We don't know its status so we need to really query now.
+
+ if (!semaphore->signal_fence) {
+ return iree_make_status(IREE_STATUS_INTERNAL,
+ "status of this time point in the timeline "
+ "should still be pending with a singal fence");
+ }
+ VkResult signal_status = semaphore->signal_fence->GetStatus();
+
+ switch (signal_status) {
+ case VK_SUCCESS:
+ IREE_DVLOG(3) << "..semaphore signaled";
+ signaled_value_.store(semaphore->value);
+ clear_signal_fence(semaphore->signal_fence);
+ // If no waiters, we can recycle this semaphore now.
+ if (!semaphore->wait_fence) {
+ semaphore->wait_fence = nullptr;
+ outstanding_semaphores_.erase(semaphore);
+ resolved_semaphores.push_back(semaphore);
+ IREE_DVLOG(3) << "Resolved and recycling semaphore " << semaphore;
+ }
+ break;
+ case VK_NOT_READY:
+ // The fence has not been signaled yet so this is the furthest time
+ // point we can go in this timeline.
+ keep_resolving = false;
+ IREE_DVLOG(3) << "..semaphore not yet signaled";
+ break;
+ default:
+ // Fence indicates an error (device lost, out of memory, etc).
+ // Propagate this back to our status (and thus any waiters).
+ // Since we only take the first error we find we skip all remaining
+ // fences.
+ keep_resolving = false;
+ clear_signal_fence(semaphore->signal_fence);
+ status_ = VK_RESULT_TO_STATUS(signal_status, "signal status");
+ signaled_value_.store(UINT64_MAX);
+ break;
+ }
+ }
+
+ IREE_DVLOG(3) << "Releasing " << resolved_semaphores.size()
+ << " resolved semaphores; " << outstanding_semaphores_.size()
+ << " still outstanding";
+ semaphore_pool_->ReleaseResolved(&resolved_semaphores);
+ if (!iree_status_is_ok(status_)) {
+ for (iree_host_size_t i = 0; i < command_queue_count_; ++i) {
+ ((SerializingCommandQueue*)command_queues_[i])->AbortQueueSubmission();
+ }
+ semaphore_pool_->ReleaseUnresolved(&outstanding_semaphores_);
+ return status_;
+ }
+
+ if (out_reached_upper_value) *out_reached_upper_value = reached_desired_value;
+ return iree_ok_status();
+}
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
+
+using namespace iree::hal::vulkan;
+
+// Wrap the C++ type above so that we have a somewhat normal C interface.
+// Porting the above to C is ideal but since this is just a fallback layer I'm
+// not sure it's worth it (given that we may require Vulkan 1.2 with timeline
+// semaphores built in at some point soon).
+typedef struct iree_hal_vulkan_emulated_semaphore_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+ EmulatedTimelineSemaphore* handle;
+} iree_hal_vulkan_emulated_semaphore_t;
+
+namespace {
+extern const iree_hal_semaphore_vtable_t
+ iree_hal_vulkan_emulated_semaphore_vtable;
+} // namespace
+
+static EmulatedTimelineSemaphore* iree_hal_vulkan_emulated_semaphore_cast(
+ iree_hal_semaphore_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_vulkan_emulated_semaphore_vtable);
+ return ((iree_hal_vulkan_emulated_semaphore_t*)base_value)->handle;
+}
+
+iree_status_t iree_hal_vulkan_emulated_semaphore_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ iree::hal::vulkan::TimePointSemaphorePool* semaphore_pool,
+ iree_host_size_t command_queue_count,
+ iree::hal::vulkan::CommandQueue** command_queues, uint64_t initial_value,
+ iree_hal_semaphore_t** out_semaphore) {
+ iree_hal_vulkan_emulated_semaphore_t* semaphore = NULL;
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc(logical_device->host_allocator(),
+ sizeof(*semaphore),
+ (void**)&semaphore));
+ iree_hal_resource_initialize(&iree_hal_vulkan_emulated_semaphore_vtable,
+ &semaphore->resource);
+ semaphore->host_allocator = logical_device->host_allocator();
+ semaphore->handle = new EmulatedTimelineSemaphore(
+ logical_device, semaphore_pool, command_queue_count, command_queues,
+ initial_value);
+
+ *out_semaphore = (iree_hal_semaphore_t*)semaphore;
+ return iree_ok_status();
+}
+
+static void iree_hal_vulkan_emulated_semaphore_destroy(
+ iree_hal_semaphore_t* base_semaphore) {
+ iree_hal_vulkan_emulated_semaphore_t* semaphore =
+ (iree_hal_vulkan_emulated_semaphore_t*)base_semaphore;
+ iree_allocator_t host_allocator = semaphore->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ delete semaphore->handle;
+ iree_allocator_free(host_allocator, semaphore);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_hal_vulkan_emulated_semaphore_acquire_wait_handle(
+ iree_hal_semaphore_t* base_semaphore, uint64_t value,
+ const iree::ref_ptr<iree::hal::vulkan::TimePointFence>& wait_fence,
+ VkSemaphore* out_handle) {
+ EmulatedTimelineSemaphore* semaphore =
+ iree_hal_vulkan_emulated_semaphore_cast(base_semaphore);
+ *out_handle = semaphore->GetWaitSemaphore(value, wait_fence);
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_vulkan_emulated_semaphore_cancel_wait_handle(
+ iree_hal_semaphore_t* base_semaphore, VkSemaphore handle) {
+ EmulatedTimelineSemaphore* semaphore =
+ iree_hal_vulkan_emulated_semaphore_cast(base_semaphore);
+ return semaphore->CancelWaitSemaphore(handle);
+}
+
+iree_status_t iree_hal_vulkan_emulated_semaphore_acquire_signal_handle(
+ iree_hal_semaphore_t* base_semaphore, uint64_t value,
+ const iree::ref_ptr<iree::hal::vulkan::TimePointFence>& signal_fence,
+ VkSemaphore* out_handle) {
+ EmulatedTimelineSemaphore* semaphore =
+ iree_hal_vulkan_emulated_semaphore_cast(base_semaphore);
+ return semaphore->GetSignalSemaphore(value, signal_fence, out_handle);
+}
+
+static iree_status_t iree_hal_vulkan_emulated_semaphore_query(
+ iree_hal_semaphore_t* base_semaphore, uint64_t* out_value) {
+ EmulatedTimelineSemaphore* semaphore =
+ iree_hal_vulkan_emulated_semaphore_cast(base_semaphore);
+ return semaphore->Query(out_value);
+}
+
+static iree_status_t iree_hal_vulkan_emulated_semaphore_signal(
+ iree_hal_semaphore_t* base_semaphore, uint64_t new_value) {
+ EmulatedTimelineSemaphore* semaphore =
+ iree_hal_vulkan_emulated_semaphore_cast(base_semaphore);
+ return semaphore->Signal(new_value);
+}
+
+static void iree_hal_vulkan_emulated_semaphore_fail(
+ iree_hal_semaphore_t* base_semaphore, iree_status_t status) {
+ EmulatedTimelineSemaphore* semaphore =
+ iree_hal_vulkan_emulated_semaphore_cast(base_semaphore);
+ semaphore->Fail(status);
+}
+
+static iree_status_t iree_hal_vulkan_emulated_semaphore_wait(
+ iree_hal_semaphore_t* base_semaphore, uint64_t value,
+ iree_timeout_t timeout) {
+ EmulatedTimelineSemaphore* semaphore =
+ iree_hal_vulkan_emulated_semaphore_cast(base_semaphore);
+ return semaphore->Wait(value, timeout);
+}
+
+iree_status_t iree_hal_vulkan_emulated_semaphore_multi_wait(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout,
+ VkSemaphoreWaitFlags wait_flags) {
+ // TODO(antiagainst): We actually should get the fences associated with the
+ // emulated timeline semaphores so that we can wait them in a bunch. This
+ // implementation is problematic if we wait to wait any and we have the
+ // first semaphore taking extra long time but the following ones signal
+ // quickly.
+ for (iree_host_size_t i = 0; i < semaphore_list->count; ++i) {
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_emulated_semaphore_wait(
+ semaphore_list->semaphores[i], semaphore_list->payload_values[i],
+ timeout));
+ if (wait_flags & VK_SEMAPHORE_WAIT_ANY_BIT) return iree_ok_status();
+ }
+ return iree_ok_status();
+}
+
+namespace {
+const iree_hal_semaphore_vtable_t iree_hal_vulkan_emulated_semaphore_vtable = {
+ /*.destroy=*/iree_hal_vulkan_emulated_semaphore_destroy,
+ /*.query=*/iree_hal_vulkan_emulated_semaphore_query,
+ /*.signal=*/iree_hal_vulkan_emulated_semaphore_signal,
+ /*.fail=*/iree_hal_vulkan_emulated_semaphore_fail,
+ /*.wait=*/
+ iree_hal_vulkan_emulated_semaphore_wait,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/vulkan/emulated_semaphore.h b/runtime/src/iree/hal/vulkan/emulated_semaphore.h
new file mode 100644
index 0000000..ac7c62c
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/emulated_semaphore.h
@@ -0,0 +1,161 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_ENUMLATED_SEMAPHORE_H_
+#define IREE_HAL_VULKAN_ENUMLATED_SEMAPHORE_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/command_queue.h"
+#include "iree/hal/vulkan/handle_util.h"
+#include "iree/hal/vulkan/timepoint_util.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a timeline semaphore emulated via `VkFence`s and binary
+// `VkSemaphore`s.
+//
+// Vulkan provides several explicit synchronization primitives: fences,
+// (binary/timeline) semaphores, events, pipeline barriers, and render passes.
+// See "6. Synchronization and Cache Control" of the Vulkan specification
+// for the details.
+//
+// Render passes are for graphics pipelines so IREE does not care about them.
+// Pipeline barriers synchronize control within a command buffer at a single
+// point. Fences, (binary/timeline) semaphores, and events are synchronization
+// primitives that have separate signal and wait operations. Events are more
+// fine-grained compared to fences and semaphores given that they can be
+// signaled or waited within a command buffer while fences and semaphores are
+// at queue submissions. Each of them have its usage requirements:
+//
+// * Fences must be signaled on GPU and waited on CPU. Fences must be reset
+// before reuse.
+// * Binary semaphores must be signaled on GPU and waited on GPU. They do not
+// support wait-before-signal submission order. More importantly, binary
+// semaphore wait also unsignals the semaphore. So binary semaphore signals
+// and waits should occur in discrete 1:1 pairs.
+// * Timeline semaphores can be signaled on CPU or GPU and waited on CPU or GPU.
+// They support wait-before-signal submission order. Timeline semaphores do
+// not need to be reset.
+//
+// It's clear that timeline semaphore is more flexible than fences and binary
+// semaphores: it unifies GPU and CPU synchronization with a single primitive.
+// But it's not always available: it requires the VK_KHR_timeline_semaphore
+// or Vulkan 1.2. When it's not available, it can be emulated via `VkFence`s
+// and binary `VkSemaphore`s. The emulation need to provide the functionality of
+// timeline semaphores and also not violate the usage requirements of `VkFence`s
+// and binary `VkSemaphore`s.
+//
+// The basic idea is to create a timeline object with time points to emulate the
+// timeline semaphore, which consists of a monotonically increasing 64-bit
+// integer value. Each time point represents a specific signaled/waited integer
+// value of the timeline semaphore; each time point can associate with binary
+// `VkSemaphore`s and/or `VkFence`s for emulating the synchronization.
+//
+// Concretely, for each of the possible signal -> wait scenarios timeline
+// semaphore supports:
+//
+// ### GPU -> GPU (via `vkQueueSubmit`)
+//
+// Each `vkQueueSubmit` can attach a `VkTimelineSemaphoreSubmitInfo` to describe
+// the timeline semaphore values signaled and waited. Each of the signaled value
+// will be a time point and emulated by a binary `VkSemaphore`. We submit the
+// binary `VkSemahpore`s to the GPU under the hood. For the waited values, the
+// situation is more complicated because of the differences between binary and
+// timeline semaphores:
+//
+// * Binary semaphore signal-wait relationship is strictly 1:1, unlike timeline
+// semaphore where we can have 1:N cases. This means for a specific binary
+// `VkSemaphore` used to emulate a signaled time point, we can have at most
+// one subsequent `vkQueueSubmit` waits on it. We need other mechanisms for
+// additional waits. A simple way is to involve the CPU and don't sumbit
+// the additional work to queue until the desired value is already signaled
+// past. This requires `VkFence`s for letting the CPU know the status of
+// GPU progress, but `VkFence` is needed anyway because of GPU -> CPU
+// synchronization.
+// * Binary semaphores does not support wait-before-signal submission order.
+// This means we need to put the submission into a self-managed queue if the
+// binary semaphores used to emulate the time points waited by the submission
+// are not submitted to GPU yet.
+//
+// ### GPU -> CPU (via `vkWaitSemaphores`)
+//
+// Without timeline semaphore, we need to use fences to let CPU wait on GPU
+// progress. So this direction can be emulated by `vkWaitFences`. It means we
+// need to associate a `VkFence` with the given waited timeline semaphores.
+// Because we don't know whether a particular `vkQueueSubmit` with timeline
+// semaphores will be later waited on by CPU beforehand, we need to bundle each
+// of them with a `VkFence` just in case they will be waited on later.
+//
+// ### CPU -> GPU (via `vkSignalSemaphore`)
+//
+// This direction can be handled by bumping the signaled timeline value and
+// scan the self-managed queue to submit more work to GPU if possible.
+//
+// ### CPU -> CPU (via `vkWaitSemaphores`)
+//
+// This is similar to CPU -> GPU direction; we just need to enable other threads
+// on CPU side and let them progress.
+//
+// The implementation is inspired by the Vulkan-ExtensionLayer project:
+// https://github.com/KhronosGroup/Vulkan-ExtensionLayer. We don't handle all
+// the aspects of the full spec though given that IREE only uses a subset of
+// synchronization primitives. So this should not be treated as a full
+// emulation of the Vulkan spec and thus does not substitute
+// Vulkan-ExtensionLayer.
+iree_status_t iree_hal_vulkan_emulated_semaphore_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ iree::hal::vulkan::TimePointSemaphorePool* semaphore_pool,
+ iree_host_size_t command_queue_count,
+ iree::hal::vulkan::CommandQueue** command_queues, uint64_t initial_value,
+ iree_hal_semaphore_t** out_semaphore);
+
+// Acquires a binary semaphore for waiting on the timeline to advance to the
+// given |value|. The semaphore returned won't be waited by anyone else.
+// |wait_fence| is the fence associated with the queue submission that waiting
+// on this semaphore.
+//
+// Returns VK_NULL_HANDLE if there are no available semaphores for the given
+// |value|.
+iree_status_t iree_hal_vulkan_emulated_semaphore_acquire_wait_handle(
+ iree_hal_semaphore_t* semaphore, uint64_t value,
+ const iree::ref_ptr<iree::hal::vulkan::TimePointFence>& wait_fence,
+ VkSemaphore* out_handle);
+
+// Cancels the waiting attempt on the given binary |semaphore|. This allows
+// the |semaphore| to be waited by others.
+iree_status_t iree_hal_vulkan_emulated_semaphore_cancel_wait_handle(
+ iree_hal_semaphore_t* semaphore, VkSemaphore handle);
+
+// Acquires a binary semaphore for signaling the timeline to the given |value|.
+// |value| must be smaller than the current timeline value. |signal_fence| is
+// the fence associated with the queue submission that signals this semaphore.
+iree_status_t iree_hal_vulkan_emulated_semaphore_acquire_signal_handle(
+ iree_hal_semaphore_t* semaphore, uint64_t value,
+ const iree::ref_ptr<iree::hal::vulkan::TimePointFence>& signal_fence,
+ VkSemaphore* out_handle);
+
+// Performs a multi-wait on one or more semaphores.
+// By default this is an all-wait but |wait_flags| may contain
+// VK_SEMAPHORE_WAIT_ANY_BIT to change to an any-wait.
+//
+// Returns IREE_STATUS_DEADLINE_EXCEEDED if the wait does not complete before
+// |deadline_ns| elapses.
+iree_status_t iree_hal_vulkan_emulated_semaphore_multi_wait(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout,
+ VkSemaphoreWaitFlags wait_flags);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_ENUMLATED_SEMAPHORE_H_
diff --git a/runtime/src/iree/hal/vulkan/extensibility_util.cc b/runtime/src/iree/hal/vulkan/extensibility_util.cc
new file mode 100644
index 0000000..a3574b0
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/extensibility_util.cc
@@ -0,0 +1,233 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/extensibility_util.h"
+
+#include <cstdint>
+#include <cstring>
+
+#include "iree/hal/vulkan/status_util.h"
+
+// Returns true if |layers| contains a layer matching |layer_name|.
+static bool iree_hal_vulkan_layer_list_contains(uint32_t layer_count,
+ const VkLayerProperties* layers,
+ const char* layer_name) {
+ for (uint32_t i = 0; i < layer_count; ++i) {
+ if (strcmp(layer_name, layers[i].layerName) == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static iree_status_t iree_hal_vulkan_match_available_layers(
+ iree_host_size_t available_layers_count,
+ const VkLayerProperties* available_layers,
+ const iree_hal_vulkan_string_list_t* required_layers,
+ const iree_hal_vulkan_string_list_t* optional_layers,
+ iree_hal_vulkan_string_list_t* out_enabled_layers) {
+ memset(out_enabled_layers->values, 0,
+ (required_layers->count + optional_layers->count) *
+ sizeof(out_enabled_layers->values[0]));
+
+ for (iree_host_size_t i = 0; i < required_layers->count; ++i) {
+ const char* layer_name = required_layers->values[i];
+ if (!iree_hal_vulkan_layer_list_contains(available_layers_count,
+ available_layers, layer_name)) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "required layer %s not available", layer_name);
+ }
+ out_enabled_layers->values[out_enabled_layers->count++] = layer_name;
+ }
+
+ for (iree_host_size_t i = 0; i < optional_layers->count; ++i) {
+ const char* layer_name = optional_layers->values[i];
+ if (iree_hal_vulkan_layer_list_contains(available_layers_count,
+ available_layers, layer_name)) {
+ out_enabled_layers->values[out_enabled_layers->count++] = layer_name;
+ }
+ }
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_vulkan_match_available_instance_layers(
+ const iree::hal::vulkan::DynamicSymbols* syms,
+ const iree_hal_vulkan_string_list_t* required_layers,
+ const iree_hal_vulkan_string_list_t* optional_layers, iree::Arena* arena,
+ iree_hal_vulkan_string_list_t* out_enabled_layers) {
+ uint32_t layer_property_count = 0;
+ VK_RETURN_IF_ERROR(
+ syms->vkEnumerateInstanceLayerProperties(&layer_property_count, NULL),
+ "vkEnumerateInstanceLayerProperties");
+ VkLayerProperties* layer_properties =
+ (VkLayerProperties*)arena->AllocateBytes(layer_property_count *
+ sizeof(VkLayerProperties));
+ VK_RETURN_IF_ERROR(syms->vkEnumerateInstanceLayerProperties(
+ &layer_property_count, layer_properties),
+ "vkEnumerateInstanceLayerProperties");
+ out_enabled_layers->count = 0;
+ out_enabled_layers->values = (const char**)arena->AllocateBytes(
+ (required_layers->count + optional_layers->count) *
+ sizeof(out_enabled_layers->values[0]));
+ return iree_hal_vulkan_match_available_layers(
+ layer_property_count, layer_properties, required_layers, optional_layers,
+ out_enabled_layers);
+}
+
+// Returns true if |extensions| contains a layer matching |extension_name|.
+static bool iree_hal_vulkan_extension_list_contains(
+ uint32_t extension_count, const VkExtensionProperties* extensions,
+ const char* extension_name) {
+ for (uint32_t i = 0; i < extension_count; ++i) {
+ if (strcmp(extension_name, extensions[i].extensionName) == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static iree_status_t iree_hal_vulkan_match_available_extensions(
+ iree_host_size_t available_extension_count,
+ const VkExtensionProperties* available_extensions,
+ const iree_hal_vulkan_string_list_t* required_extensions,
+ const iree_hal_vulkan_string_list_t* optional_extensions,
+ iree_hal_vulkan_string_list_t* out_enabled_extensions) {
+ memset(out_enabled_extensions->values, 0,
+ (required_extensions->count + optional_extensions->count) *
+ sizeof(out_enabled_extensions->values[0]));
+
+ for (iree_host_size_t i = 0; i < required_extensions->count; ++i) {
+ const char* extension_name = required_extensions->values[i];
+ if (!iree_hal_vulkan_extension_list_contains(
+ available_extension_count, available_extensions, extension_name)) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "required extension %s not available",
+ extension_name);
+ }
+ out_enabled_extensions->values[out_enabled_extensions->count++] =
+ extension_name;
+ }
+
+ for (iree_host_size_t i = 0; i < optional_extensions->count; ++i) {
+ const char* extension_name = optional_extensions->values[i];
+ if (iree_hal_vulkan_extension_list_contains(
+ available_extension_count, available_extensions, extension_name)) {
+ out_enabled_extensions->values[out_enabled_extensions->count++] =
+ extension_name;
+ }
+ }
+
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_vulkan_match_available_instance_extensions(
+ const iree::hal::vulkan::DynamicSymbols* syms,
+ const iree_hal_vulkan_string_list_t* required_extensions,
+ const iree_hal_vulkan_string_list_t* optional_extensions,
+ iree::Arena* arena, iree_hal_vulkan_string_list_t* out_enabled_extensions) {
+ uint32_t extension_property_count = 0;
+ VK_RETURN_IF_ERROR(syms->vkEnumerateInstanceExtensionProperties(
+ NULL, &extension_property_count, NULL),
+ "vkEnumerateInstanceExtensionProperties");
+ VkExtensionProperties* extension_properties =
+ (VkExtensionProperties*)arena->AllocateBytes(
+ extension_property_count * sizeof(VkExtensionProperties));
+ VK_RETURN_IF_ERROR(syms->vkEnumerateInstanceExtensionProperties(
+ NULL, &extension_property_count, extension_properties),
+ "vkEnumerateInstanceExtensionProperties");
+ out_enabled_extensions->count = 0;
+ out_enabled_extensions->values = (const char**)arena->AllocateBytes(
+ (required_extensions->count + optional_extensions->count) *
+ sizeof(out_enabled_extensions->values[0]));
+ return iree_hal_vulkan_match_available_extensions(
+ extension_property_count, extension_properties, required_extensions,
+ optional_extensions, out_enabled_extensions);
+}
+
+iree_status_t iree_hal_vulkan_match_available_device_extensions(
+ const iree::hal::vulkan::DynamicSymbols* syms,
+ VkPhysicalDevice physical_device,
+ const iree_hal_vulkan_string_list_t* required_extensions,
+ const iree_hal_vulkan_string_list_t* optional_extensions,
+ iree::Arena* arena, iree_hal_vulkan_string_list_t* out_enabled_extensions) {
+ uint32_t extension_property_count = 0;
+ VK_RETURN_IF_ERROR(
+ syms->vkEnumerateDeviceExtensionProperties(
+ physical_device, NULL, &extension_property_count, NULL),
+ "vkEnumerateDeviceExtensionProperties");
+ VkExtensionProperties* extension_properties =
+ (VkExtensionProperties*)arena->AllocateBytes(
+ extension_property_count * sizeof(VkExtensionProperties));
+ VK_RETURN_IF_ERROR(syms->vkEnumerateDeviceExtensionProperties(
+ physical_device, NULL, &extension_property_count,
+ extension_properties),
+ "vkEnumerateDeviceExtensionProperties");
+ out_enabled_extensions->count = 0;
+ out_enabled_extensions->values = (const char**)arena->AllocateBytes(
+ (required_extensions->count + optional_extensions->count) *
+ sizeof(out_enabled_extensions->values[0]));
+ return iree_hal_vulkan_match_available_extensions(
+ extension_property_count, extension_properties, required_extensions,
+ optional_extensions, out_enabled_extensions);
+}
+
+iree_hal_vulkan_instance_extensions_t
+iree_hal_vulkan_populate_enabled_instance_extensions(
+ const iree_hal_vulkan_string_list_t* enabled_extensions) {
+ iree_hal_vulkan_instance_extensions_t extensions;
+ memset(&extensions, 0, sizeof(extensions));
+ for (iree_host_size_t i = 0; i < enabled_extensions->count; ++i) {
+ const char* extension_name = enabled_extensions->values[i];
+ if (strcmp(extension_name, VK_EXT_DEBUG_UTILS_EXTENSION_NAME) == 0) {
+ extensions.debug_utils = true;
+ }
+ }
+ return extensions;
+}
+
+iree_hal_vulkan_device_extensions_t
+iree_hal_vulkan_populate_enabled_device_extensions(
+ const iree_hal_vulkan_string_list_t* enabled_extensions) {
+ iree_hal_vulkan_device_extensions_t extensions;
+ memset(&extensions, 0, sizeof(extensions));
+ for (iree_host_size_t i = 0; i < enabled_extensions->count; ++i) {
+ const char* extension_name = enabled_extensions->values[i];
+ if (strcmp(extension_name, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME) == 0) {
+ extensions.push_descriptors = true;
+ } else if (strcmp(extension_name,
+ VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME) == 0) {
+ extensions.timeline_semaphore = true;
+ } else if (strcmp(extension_name, VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME) ==
+ 0) {
+ extensions.host_query_reset = true;
+ } else if (strcmp(extension_name,
+ VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) == 0) {
+ extensions.calibrated_timestamps = true;
+ }
+ }
+ return extensions;
+}
+
+iree_hal_vulkan_device_extensions_t
+iree_hal_vulkan_infer_enabled_device_extensions(
+ const iree::hal::vulkan::DynamicSymbols* device_syms) {
+ iree_hal_vulkan_device_extensions_t extensions;
+ memset(&extensions, 0, sizeof(extensions));
+ if (device_syms->vkCmdPushDescriptorSetKHR) {
+ extensions.push_descriptors = true;
+ }
+ if (device_syms->vkSignalSemaphore || device_syms->vkSignalSemaphoreKHR) {
+ extensions.timeline_semaphore = true;
+ }
+ if (device_syms->vkResetQueryPoolEXT) {
+ extensions.host_query_reset = true;
+ }
+ if (device_syms->vkGetCalibratedTimestampsEXT) {
+ extensions.calibrated_timestamps = true;
+ }
+ return extensions;
+}
diff --git a/runtime/src/iree/hal/vulkan/extensibility_util.h b/runtime/src/iree/hal/vulkan/extensibility_util.h
new file mode 100644
index 0000000..f436988
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/extensibility_util.h
@@ -0,0 +1,97 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_EXTENSIBILITY_UTIL_H_
+#define IREE_HAL_VULKAN_EXTENSIBILITY_UTIL_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/vulkan/api.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/util/arena.h"
+
+// A list of NUL-terminated strings (so they can be passed directly to Vulkan).
+typedef struct iree_hal_vulkan_string_list_t {
+ iree_host_size_t count;
+ const char** values;
+} iree_hal_vulkan_string_list_t;
+
+// Populates |out_enabled_layers| with all layers that are both available in the
+// implementation and |required_layers| and |optional_layers| lists.
+// |out_enabled_layers| must have capacity at least the sum of
+// |required_layers|.count and |optional_layer|.count.
+// Returns failure if any |required_layers| are unavailable.
+iree_status_t iree_hal_vulkan_match_available_instance_layers(
+ const iree::hal::vulkan::DynamicSymbols* syms,
+ const iree_hal_vulkan_string_list_t* required_layers,
+ const iree_hal_vulkan_string_list_t* optional_layers, iree::Arena* arena,
+ iree_hal_vulkan_string_list_t* out_enabled_layers);
+
+// Populates |out_enabled_extensions| with all extensions that are both
+// available in the implementation and |required_extensions| and
+// |optional_extensions| lists. |out_enabled_extensions| must have capacity at
+// least the sum of |required_extensions|.count and |optional_extensions|.count.
+// Returns failure if any |required_extensions| are unavailable.
+iree_status_t iree_hal_vulkan_match_available_instance_extensions(
+ const iree::hal::vulkan::DynamicSymbols* syms,
+ const iree_hal_vulkan_string_list_t* required_extensions,
+ const iree_hal_vulkan_string_list_t* optional_extensions,
+ iree::Arena* arena, iree_hal_vulkan_string_list_t* out_enabled_extensions);
+
+// Populates |out_enabled_extensions| with all extensions that are both
+// available in the implementation and |required_extensions| and
+// |optional_extensions| lists. |out_enabled_extensions| must have capacity at
+// least the sum of |required_extensions|.count and |optional_extensions|.count.
+// Returns failure if any |required_extensions| are unavailable.
+iree_status_t iree_hal_vulkan_match_available_device_extensions(
+ const iree::hal::vulkan::DynamicSymbols* syms,
+ VkPhysicalDevice physical_device,
+ const iree_hal_vulkan_string_list_t* required_extensions,
+ const iree_hal_vulkan_string_list_t* optional_extensions,
+ iree::Arena* arena, iree_hal_vulkan_string_list_t* out_enabled_extensions);
+
+// Bits for enabled instance extensions.
+// We must use this to query support instead of just detecting symbol names as
+// ICDs will resolve the functions sometimes even if they don't support the
+// extension (or we didn't ask for it to be enabled).
+typedef struct iree_hal_vulkan_instance_extensions_t {
+ // VK_EXT_debug_utils is enabled and a debug messenger is registered.
+ // https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/chap44.html#VK_EXT_debug_utils
+ bool debug_utils : 1;
+} iree_hal_vulkan_instance_extensions_t;
+
+// Returns a bitfield with all of the provided extension names.
+iree_hal_vulkan_instance_extensions_t
+iree_hal_vulkan_populate_enabled_instance_extensions(
+ const iree_hal_vulkan_string_list_t* enabled_extension);
+
+// Bits for enabled device extensions.
+// We must use this to query support instead of just detecting symbol names as
+// ICDs will resolve the functions sometimes even if they don't support the
+// extension (or we didn't ask for it to be enabled).
+typedef struct iree_hal_vulkan_device_extensions_t {
+ // VK_KHR_push_descriptor is enabled and vkCmdPushDescriptorSetKHR is valid.
+ bool push_descriptors : 1;
+ // VK_KHR_timeline_semaphore is enabled.
+ bool timeline_semaphore : 1;
+ // VK_EXT_host_query_reset is enabled.
+ bool host_query_reset : 1;
+ // VK_EXT_calibrated_timestamps is enabled.
+ bool calibrated_timestamps : 1;
+} iree_hal_vulkan_device_extensions_t;
+
+// Returns a bitfield with all of the provided extension names.
+iree_hal_vulkan_device_extensions_t
+iree_hal_vulkan_populate_enabled_device_extensions(
+ const iree_hal_vulkan_string_list_t* enabled_extension);
+
+// Returns a bitfield with the extensions that are (likely) available on the
+// device symbols. This is less reliable than setting the bits directly when
+// the known set of extensions is available.
+iree_hal_vulkan_device_extensions_t
+iree_hal_vulkan_infer_enabled_device_extensions(
+ const iree::hal::vulkan::DynamicSymbols* device_syms);
+
+#endif // IREE_HAL_VULKAN_EXTENSIBILITY_UTIL_H_
diff --git a/runtime/src/iree/hal/vulkan/handle_util.h b/runtime/src/iree/hal/vulkan/handle_util.h
new file mode 100644
index 0000000..0cff882
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/handle_util.h
@@ -0,0 +1,166 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Helpers for wrapping Vulkan handles that don't require us to wrap every type.
+// This keeps our compilation time reasonable (as the vulkancpp library is
+// insane) while giving us nice safety around cleanup and ensuring we use
+// dynamic symbols and consistent allocators.
+//
+// Do not add functionality beyond handle management to these types. Keep our
+// Vulkan usage mostly functional and C-like to ensure minimal code size and
+// readability.
+
+#ifndef IREE_HAL_VULKAN_HANDLE_UTIL_H_
+#define IREE_HAL_VULKAN_HANDLE_UTIL_H_
+
+// clang-format off: must be included before all other headers.
+#include "iree/hal/vulkan/vulkan_headers.h" // IWYU pragma: export
+// clang-format on
+
+#include "iree/base/internal/synchronization.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/extensibility_util.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+template <class T, class U = T>
+constexpr T exchange(T& obj, U&& new_value) {
+ T old_value = std::move(obj);
+ obj = std::forward<U>(new_value);
+ return old_value;
+}
+
+class VkDeviceHandle : public RefObject<VkDeviceHandle> {
+ public:
+ VkDeviceHandle(DynamicSymbols* syms,
+ iree_hal_vulkan_device_extensions_t enabled_extensions,
+ bool owns_device, iree_allocator_t host_allocator,
+ const VkAllocationCallbacks* allocator = nullptr)
+ : syms_(add_ref(syms)),
+ enabled_extensions_(enabled_extensions),
+ owns_device_(owns_device),
+ allocator_(allocator),
+ host_allocator_(host_allocator) {}
+ ~VkDeviceHandle() { reset(); }
+
+ VkDeviceHandle(const VkDeviceHandle&) = delete;
+ VkDeviceHandle& operator=(const VkDeviceHandle&) = delete;
+ VkDeviceHandle(VkDeviceHandle&& other) noexcept
+ : value_(exchange(other.value_, static_cast<VkDevice>(VK_NULL_HANDLE))),
+ syms_(std::move(other.syms_)),
+ enabled_extensions_(other.enabled_extensions_),
+ owns_device_(other.owns_device_),
+ allocator_(other.allocator_),
+ host_allocator_(other.host_allocator_) {}
+
+ void reset() {
+ if (value_ == VK_NULL_HANDLE) return;
+ if (owns_device_) {
+ syms_->vkDestroyDevice(value_, allocator_);
+ }
+ value_ = VK_NULL_HANDLE;
+ }
+
+ VkDevice value() const noexcept { return value_; }
+ VkDevice* mutable_value() noexcept { return &value_; }
+ operator VkDevice() const noexcept { return value_; }
+
+ const ref_ptr<DynamicSymbols>& syms() const noexcept { return syms_; }
+ const VkAllocationCallbacks* allocator() const noexcept { return allocator_; }
+ iree_allocator_t host_allocator() const noexcept { return host_allocator_; }
+
+ const iree_hal_vulkan_device_extensions_t& enabled_extensions() const {
+ return enabled_extensions_;
+ }
+
+ private:
+ VkDevice value_ = VK_NULL_HANDLE;
+ ref_ptr<DynamicSymbols> syms_;
+ iree_hal_vulkan_device_extensions_t enabled_extensions_;
+ bool owns_device_;
+ const VkAllocationCallbacks* allocator_ = nullptr;
+ iree_allocator_t host_allocator_;
+};
+
+class VkCommandPoolHandle {
+ public:
+ explicit VkCommandPoolHandle(VkDeviceHandle* logical_device)
+ : logical_device_(logical_device) {
+ iree_slim_mutex_initialize(&mutex_);
+ }
+ ~VkCommandPoolHandle() {
+ reset();
+ iree_slim_mutex_deinitialize(&mutex_);
+ }
+
+ VkCommandPoolHandle(const VkCommandPoolHandle&) = delete;
+ VkCommandPoolHandle& operator=(const VkCommandPoolHandle&) = delete;
+ VkCommandPoolHandle(VkCommandPoolHandle&& other) noexcept
+ : logical_device_(std::move(other.logical_device_)),
+ value_(exchange(other.value_,
+ static_cast<VkCommandPool>(VK_NULL_HANDLE))) {}
+ VkCommandPoolHandle& operator=(VkCommandPoolHandle&& other) {
+ std::swap(logical_device_, other.logical_device_);
+ std::swap(value_, other.value_);
+ return *this;
+ }
+
+ void reset() {
+ if (value_ == VK_NULL_HANDLE) return;
+ syms()->vkDestroyCommandPool(*logical_device_, value_, allocator());
+ value_ = VK_NULL_HANDLE;
+ }
+
+ VkCommandPool value() const noexcept { return value_; }
+ VkCommandPool* mutable_value() noexcept { return &value_; }
+ operator VkCommandPool() const noexcept { return value_; }
+
+ const VkDeviceHandle* logical_device() const noexcept {
+ return logical_device_;
+ }
+ const ref_ptr<DynamicSymbols>& syms() const noexcept {
+ return logical_device_->syms();
+ }
+ const VkAllocationCallbacks* allocator() const noexcept {
+ return logical_device_->allocator();
+ }
+
+ iree_status_t Allocate(const VkCommandBufferAllocateInfo* allocate_info,
+ VkCommandBuffer* out_handle) {
+ iree_slim_mutex_lock(&mutex_);
+ iree_status_t status =
+ VK_RESULT_TO_STATUS(syms()->vkAllocateCommandBuffers(
+ *logical_device_, allocate_info, out_handle),
+ "vkAllocateCommandBuffers");
+ iree_slim_mutex_unlock(&mutex_);
+ return status;
+ }
+
+ void Free(VkCommandBuffer handle) {
+ iree_slim_mutex_lock(&mutex_);
+ syms()->vkFreeCommandBuffers(*logical_device_, value_, 1, &handle);
+ iree_slim_mutex_unlock(&mutex_);
+ }
+
+ private:
+ VkDeviceHandle* logical_device_;
+ VkCommandPool value_ = VK_NULL_HANDLE;
+
+ // Vulkan command pools are not thread safe and require external
+ // synchronization. Since we allow arbitrary threads to allocate and
+ // deallocate the HAL command buffers we need to externally synchronize.
+ iree_slim_mutex_t mutex_;
+};
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_VULKAN_HANDLE_UTIL_H_
diff --git a/runtime/src/iree/hal/vulkan/internal_vk_mem_alloc.cc b/runtime/src/iree/hal/vulkan/internal_vk_mem_alloc.cc
new file mode 100644
index 0000000..145afd9
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/internal_vk_mem_alloc.cc
@@ -0,0 +1,62 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Only compile if an external implementation has not been otherwise linked.
+#if !defined(VULKAN_MEMORY_ALLOCATOR_EXTERNAL_IMPL)
+
+#include <ostream>
+
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/logging.h"
+
+#define VMA_ASSERT IREE_DCHECK
+#define VMA_HEAVY_ASSERT IREE_DCHECK
+
+// NOTE: logging is disabled by default as unless you are debugging VMA itself
+// the information is not useful and just slows things down.
+#if 0
+#define VMA_DEBUG_LOG(...) _IREE_LOG_INFO << __VA_ARGS__
+#else
+#define VMA_DEBUG_LOG(...)
+#endif // !NDEBUG
+
+// Use iree_slim_mutex_t for VMA_MUTEX.
+class IreeVmaMutex {
+ public:
+ IreeVmaMutex() { iree_slim_mutex_initialize(&mutex_); }
+ ~IreeVmaMutex() { iree_slim_mutex_deinitialize(&mutex_); }
+
+ void Lock() { iree_slim_mutex_lock(&mutex_); }
+ void Unlock() { iree_slim_mutex_unlock(&mutex_); }
+ bool TryLock() { return iree_slim_mutex_try_lock(&mutex_); }
+
+ private:
+ iree_slim_mutex_t mutex_;
+};
+#define VMA_MUTEX IreeVmaMutex
+
+// Use iree_slim_mutex_t for VMA_RW_MUTEX.
+class IreeVmaRWMutex {
+ public:
+ IreeVmaRWMutex() { iree_slim_mutex_initialize(&mutex_); }
+ ~IreeVmaRWMutex() { iree_slim_mutex_deinitialize(&mutex_); }
+
+ void LockRead() { iree_slim_mutex_lock(&mutex_); }
+ void UnlockRead() { iree_slim_mutex_unlock(&mutex_); }
+ bool TryLockRead() { return iree_slim_mutex_try_lock(&mutex_); }
+ void LockWrite() { iree_slim_mutex_lock(&mutex_); }
+ void UnlockWrite() { iree_slim_mutex_unlock(&mutex_); }
+ bool TryLockWrite() { return iree_slim_mutex_try_lock(&mutex_); }
+
+ private:
+ iree_slim_mutex_t mutex_;
+};
+#define VMA_RW_MUTEX IreeVmaRWMutex
+
+#define VMA_IMPLEMENTATION
+#include "iree/hal/vulkan/internal_vk_mem_alloc.h"
+
+#endif // !VULKAN_MEMORY_ALLOCATOR_EXTERNAL_IMPL
diff --git a/runtime/src/iree/hal/vulkan/internal_vk_mem_alloc.h b/runtime/src/iree/hal/vulkan/internal_vk_mem_alloc.h
new file mode 100644
index 0000000..1f50682
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/internal_vk_mem_alloc.h
@@ -0,0 +1,23 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_INTERNAL_VK_MEM_ALLOC_H_
+#define IREE_HAL_VULKAN_INTERNAL_VK_MEM_ALLOC_H_
+
+#include "iree/hal/vulkan/vulkan_headers.h"
+
+// Force all Vulkan calls to go through an indirect pVulkanFunctions interface.
+// https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/configuration.html
+#define VMA_STATIC_VULKAN_FUNCTIONS 0
+
+// Prevent VMA from querying for dynamic functions we may not have provided.
+// We want to be able to print nice errors or decide whether something is ok
+// to be omitted and not have VMA poking around where it shouldn't.
+#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0
+
+#include <vk_mem_alloc.h> // IWYU pragma: export
+
+#endif // IREE_HAL_VULKAN_INTERNAL_VK_MEM_ALLOC_H_
diff --git a/runtime/src/iree/hal/vulkan/native_descriptor_set.cc b/runtime/src/iree/hal/vulkan/native_descriptor_set.cc
new file mode 100644
index 0000000..e4ab3ef
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/native_descriptor_set.cc
@@ -0,0 +1,92 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/native_descriptor_set.h"
+
+#include <cstddef>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+
+using namespace iree::hal::vulkan;
+
+typedef struct iree_hal_vulkan_native_descriptor_set_t {
+ iree_hal_resource_t resource;
+ VkDeviceHandle* logical_device;
+ VkDescriptorSet handle;
+} iree_hal_vulkan_native_descriptor_set_t;
+
+namespace {
+extern const iree_hal_descriptor_set_vtable_t
+ iree_hal_vulkan_native_descriptor_set_vtable;
+} // namespace
+
+static iree_hal_vulkan_native_descriptor_set_t*
+iree_hal_vulkan_native_descriptor_set_cast(
+ iree_hal_descriptor_set_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value,
+ &iree_hal_vulkan_native_descriptor_set_vtable);
+ return (iree_hal_vulkan_native_descriptor_set_t*)base_value;
+}
+
+iree_status_t iree_hal_vulkan_native_descriptor_set_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device, VkDescriptorSet handle,
+ iree_hal_descriptor_set_t** out_descriptor_set) {
+ IREE_ASSERT_ARGUMENT(logical_device);
+ IREE_ASSERT_ARGUMENT(handle);
+ IREE_ASSERT_ARGUMENT(out_descriptor_set);
+ *out_descriptor_set = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_vulkan_native_descriptor_set_t* descriptor_set = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(logical_device->host_allocator(),
+ sizeof(*descriptor_set), (void**)&descriptor_set);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_vulkan_native_descriptor_set_vtable,
+ &descriptor_set->resource);
+ descriptor_set->logical_device = logical_device;
+ descriptor_set->handle = handle;
+ *out_descriptor_set = (iree_hal_descriptor_set_t*)descriptor_set;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_vulkan_native_descriptor_set_destroy(
+ iree_hal_descriptor_set_t* base_descriptor_set) {
+ iree_hal_vulkan_native_descriptor_set_t* descriptor_set =
+ iree_hal_vulkan_native_descriptor_set_cast(base_descriptor_set);
+ iree_allocator_t host_allocator =
+ descriptor_set->logical_device->host_allocator();
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // TODO(benvanik): return to pool. For now we rely on the descriptor cache to
+ // reset entire pools at once via via vkResetDescriptorPool so we don't need
+ // to do anything here (the VkDescriptorSet handle will just be invalidated).
+ // In the future if we want to have generational collection/defragmentation
+ // of the descriptor cache we'll want to allow both pooled and unpooled
+ // descriptors and clean them up here appropriately.
+
+ iree_allocator_free(host_allocator, descriptor_set);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+VkDescriptorSet iree_hal_vulkan_native_descriptor_set_handle(
+ iree_hal_descriptor_set_t* base_descriptor_set) {
+ iree_hal_vulkan_native_descriptor_set_t* descriptor_set =
+ iree_hal_vulkan_native_descriptor_set_cast(base_descriptor_set);
+ return descriptor_set->handle;
+}
+
+namespace {
+const iree_hal_descriptor_set_vtable_t
+ iree_hal_vulkan_native_descriptor_set_vtable = {
+ /*.destroy=*/iree_hal_vulkan_native_descriptor_set_destroy,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/vulkan/native_descriptor_set.h b/runtime/src/iree/hal/vulkan/native_descriptor_set.h
new file mode 100644
index 0000000..128a1c6
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/native_descriptor_set.h
@@ -0,0 +1,31 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_NATIVE_DESCRIPTOR_SET_H_
+#define IREE_HAL_VULKAN_NATIVE_DESCRIPTOR_SET_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/handle_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a native Vulkan VkDescriptorSet object.
+iree_status_t iree_hal_vulkan_native_descriptor_set_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device, VkDescriptorSet handle,
+ iree_hal_descriptor_set_t** out_descriptor_set);
+
+// Returns the native Vulkan VkDescriptorSet handle.
+VkDescriptorSet iree_hal_vulkan_native_descriptor_set_handle(
+ iree_hal_descriptor_set_t* base_descriptor_set);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_NATIVE_DESCRIPTOR_SET_H_
diff --git a/runtime/src/iree/hal/vulkan/native_descriptor_set_layout.cc b/runtime/src/iree/hal/vulkan/native_descriptor_set_layout.cc
new file mode 100644
index 0000000..87965c4
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/native_descriptor_set_layout.cc
@@ -0,0 +1,162 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/native_descriptor_set_layout.h"
+
+#include <cstddef>
+#include <cstdint>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/extensibility_util.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+using namespace iree::hal::vulkan;
+
+typedef struct iree_hal_vulkan_native_descriptor_set_layout_t {
+ iree_hal_resource_t resource;
+ VkDeviceHandle* logical_device;
+ VkDescriptorSetLayout handle;
+} iree_hal_vulkan_native_descriptor_set_layout_t;
+
+namespace {
+extern const iree_hal_descriptor_set_layout_vtable_t
+ iree_hal_vulkan_native_descriptor_set_layout_vtable;
+} // namespace
+
+static iree_hal_vulkan_native_descriptor_set_layout_t*
+iree_hal_vulkan_native_descriptor_set_layout_cast(
+ iree_hal_descriptor_set_layout_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value,
+ &iree_hal_vulkan_native_descriptor_set_layout_vtable);
+ return (iree_hal_vulkan_native_descriptor_set_layout_t*)base_value;
+}
+
+static iree_status_t iree_hal_vulkan_create_descriptor_set_layout(
+ VkDeviceHandle* logical_device,
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ VkDescriptorSetLayout* out_handle) {
+ VkDescriptorSetLayoutCreateInfo create_info;
+ create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
+ create_info.pNext = NULL;
+ create_info.flags = 0;
+ if (usage_type == IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_PUSH_ONLY &&
+ logical_device->enabled_extensions().push_descriptors) {
+ // Note that we can *only* use push descriptor sets if we set this create
+ // flag. If push descriptors aren't supported we emulate them with normal
+ // descriptors so it's fine to have kPushOnly without support.
+ create_info.flags |=
+ VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
+ }
+
+ VkDescriptorSetLayoutBinding* native_bindings = NULL;
+ if (binding_count > 0) {
+ // TODO(benvanik): avoid this allocation if possible (inline_array).
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc(
+ logical_device->host_allocator(),
+ binding_count * sizeof(VkDescriptorSetLayoutBinding),
+ (void**)&native_bindings));
+ for (iree_host_size_t i = 0; i < binding_count; ++i) {
+ VkDescriptorSetLayoutBinding* native_binding = &native_bindings[i];
+ native_binding->binding = bindings[i].binding;
+ native_binding->descriptorType =
+ static_cast<VkDescriptorType>(bindings[i].type);
+ native_binding->descriptorCount = 1;
+ native_binding->stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
+ native_binding->pImmutableSamplers = NULL;
+ }
+ }
+ create_info.bindingCount = (uint32_t)binding_count;
+ create_info.pBindings = native_bindings;
+
+ iree_status_t status =
+ VK_RESULT_TO_STATUS(logical_device->syms()->vkCreateDescriptorSetLayout(
+ *logical_device, &create_info,
+ logical_device->allocator(), out_handle),
+ "vkCreateDescriptorSetLayout");
+
+ iree_allocator_free(logical_device->host_allocator(), native_bindings);
+ return status;
+}
+
+static void iree_hal_vulkan_destroy_descriptor_set_layout(
+ VkDeviceHandle* logical_device, VkDescriptorSetLayout handle) {
+ if (handle == VK_NULL_HANDLE) return;
+ logical_device->syms()->vkDestroyDescriptorSetLayout(
+ *logical_device, handle, logical_device->allocator());
+}
+
+iree_status_t iree_hal_vulkan_native_descriptor_set_layout_create(
+ VkDeviceHandle* logical_device,
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ iree_hal_descriptor_set_layout_t** out_descriptor_set_layout) {
+ IREE_ASSERT_ARGUMENT(logical_device);
+ IREE_ASSERT_ARGUMENT(!binding_count || bindings);
+ IREE_ASSERT_ARGUMENT(out_descriptor_set_layout);
+ *out_descriptor_set_layout = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ VkDescriptorSetLayout handle = VK_NULL_HANDLE;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_vulkan_create_descriptor_set_layout(
+ logical_device, usage_type, binding_count, bindings, &handle));
+
+ iree_hal_vulkan_native_descriptor_set_layout_t* descriptor_set_layout = NULL;
+ iree_status_t status = iree_allocator_malloc(logical_device->host_allocator(),
+ sizeof(*descriptor_set_layout),
+ (void**)&descriptor_set_layout);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(
+ &iree_hal_vulkan_native_descriptor_set_layout_vtable,
+ &descriptor_set_layout->resource);
+ descriptor_set_layout->logical_device = logical_device;
+ descriptor_set_layout->handle = handle;
+ *out_descriptor_set_layout =
+ (iree_hal_descriptor_set_layout_t*)descriptor_set_layout;
+ } else {
+ iree_hal_vulkan_destroy_descriptor_set_layout(logical_device, handle);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_vulkan_native_descriptor_set_layout_destroy(
+ iree_hal_descriptor_set_layout_t* base_descriptor_set_layout) {
+ iree_hal_vulkan_native_descriptor_set_layout_t* descriptor_set_layout =
+ iree_hal_vulkan_native_descriptor_set_layout_cast(
+ base_descriptor_set_layout);
+ iree_allocator_t host_allocator =
+ descriptor_set_layout->logical_device->host_allocator();
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_vulkan_destroy_descriptor_set_layout(
+ descriptor_set_layout->logical_device, descriptor_set_layout->handle);
+ iree_allocator_free(host_allocator, descriptor_set_layout);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+VkDescriptorSetLayout iree_hal_vulkan_native_descriptor_set_layout_handle(
+ iree_hal_descriptor_set_layout_t* base_descriptor_set_layout) {
+ iree_hal_vulkan_native_descriptor_set_layout_t* descriptor_set_layout =
+ iree_hal_vulkan_native_descriptor_set_layout_cast(
+ base_descriptor_set_layout);
+ return descriptor_set_layout->handle;
+}
+
+namespace {
+const iree_hal_descriptor_set_layout_vtable_t
+ iree_hal_vulkan_native_descriptor_set_layout_vtable = {
+ /*.destroy=*/iree_hal_vulkan_native_descriptor_set_layout_destroy,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/vulkan/native_descriptor_set_layout.h b/runtime/src/iree/hal/vulkan/native_descriptor_set_layout.h
new file mode 100644
index 0000000..0faa9d5
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/native_descriptor_set_layout.h
@@ -0,0 +1,34 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_NATIVE_DESCRIPTOR_SET_LAYOUT_H_
+#define IREE_HAL_VULKAN_NATIVE_DESCRIPTOR_SET_LAYOUT_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/handle_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a native Vulkan VkDescriptorSetLayout object.
+iree_status_t iree_hal_vulkan_native_descriptor_set_layout_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ iree_hal_descriptor_set_layout_t** out_descriptor_set_layout);
+
+// Returns the native Vulkan VkDescriptorSetLayout handle.
+VkDescriptorSetLayout iree_hal_vulkan_native_descriptor_set_layout_handle(
+ iree_hal_descriptor_set_layout_t* base_descriptor_set_layout);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_NATIVE_DESCRIPTOR_SET_LAYOUT_H_
diff --git a/runtime/src/iree/hal/vulkan/native_event.cc b/runtime/src/iree/hal/vulkan/native_event.cc
new file mode 100644
index 0000000..09dd2be
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/native_event.cc
@@ -0,0 +1,103 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/native_event.h"
+
+#include <cstddef>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+using namespace iree::hal::vulkan;
+
+typedef struct iree_hal_vulkan_native_event_t {
+ iree_hal_resource_t resource;
+ VkDeviceHandle* logical_device;
+ VkEvent handle;
+} iree_hal_vulkan_native_event_t;
+
+namespace {
+extern const iree_hal_event_vtable_t iree_hal_vulkan_native_event_vtable;
+} // namespace
+
+static iree_hal_vulkan_native_event_t* iree_hal_vulkan_native_event_cast(
+ iree_hal_event_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_vulkan_native_event_vtable);
+ return (iree_hal_vulkan_native_event_t*)base_value;
+}
+
+static iree_status_t iree_hal_vulkan_create_event(
+ VkDeviceHandle* logical_device, VkEvent* out_handle) {
+ VkEventCreateInfo create_info;
+ create_info.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
+ create_info.pNext = NULL;
+ create_info.flags = 0;
+ return VK_RESULT_TO_STATUS(logical_device->syms()->vkCreateEvent(
+ *logical_device, &create_info,
+ logical_device->allocator(), out_handle),
+ "vkCreateEvent");
+}
+
+static void iree_hal_vulkan_destroy_event(VkDeviceHandle* logical_device,
+ VkEvent handle) {
+ if (handle == VK_NULL_HANDLE) return;
+ logical_device->syms()->vkDestroyEvent(*logical_device, handle,
+ logical_device->allocator());
+}
+
+iree_status_t iree_hal_vulkan_native_event_create(
+ VkDeviceHandle* logical_device, iree_hal_event_t** out_event) {
+ IREE_ASSERT_ARGUMENT(logical_device);
+ IREE_ASSERT_ARGUMENT(out_event);
+ *out_event = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ VkEvent handle = VK_NULL_HANDLE;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_vulkan_create_event(logical_device, &handle));
+
+ iree_hal_vulkan_native_event_t* event = NULL;
+ iree_status_t status = iree_allocator_malloc(logical_device->host_allocator(),
+ sizeof(*event), (void**)&event);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_vulkan_native_event_vtable,
+ &event->resource);
+ event->logical_device = logical_device;
+ event->handle = handle;
+ *out_event = (iree_hal_event_t*)event;
+ } else {
+ iree_hal_vulkan_destroy_event(logical_device, handle);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_vulkan_native_event_destroy(iree_hal_event_t* base_event) {
+ iree_hal_vulkan_native_event_t* event =
+ iree_hal_vulkan_native_event_cast(base_event);
+ iree_allocator_t host_allocator = event->logical_device->host_allocator();
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_vulkan_destroy_event(event->logical_device, event->handle);
+ iree_allocator_free(host_allocator, event);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+VkEvent iree_hal_vulkan_native_event_handle(
+ const iree_hal_event_t* base_event) {
+ return ((const iree_hal_vulkan_native_event_t*)base_event)->handle;
+}
+
+namespace {
+const iree_hal_event_vtable_t iree_hal_vulkan_native_event_vtable = {
+ /*.destroy=*/iree_hal_vulkan_native_event_destroy,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/vulkan/native_event.h b/runtime/src/iree/hal/vulkan/native_event.h
new file mode 100644
index 0000000..bb641da
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/native_event.h
@@ -0,0 +1,30 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_NATIVE_EVENT_H_
+#define IREE_HAL_VULKAN_NATIVE_EVENT_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/handle_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a native Vulkan VkEvent object.
+iree_status_t iree_hal_vulkan_native_event_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ iree_hal_event_t** out_event);
+
+// Returns Vulkan event handle.
+VkEvent iree_hal_vulkan_native_event_handle(const iree_hal_event_t* event);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_NATIVE_EVENT_H_
diff --git a/runtime/src/iree/hal/vulkan/native_executable.cc b/runtime/src/iree/hal/vulkan/native_executable.cc
new file mode 100644
index 0000000..8d8d35a
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/native_executable.cc
@@ -0,0 +1,353 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/native_executable.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/dynamic_symbol_tables.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/handle_util.h"
+#include "iree/hal/vulkan/native_executable_layout.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+// flatcc schemas:
+#include "iree/base/internal/flatcc/parsing.h"
+#include "iree/schemas/spirv_executable_def_reader.h"
+#include "iree/schemas/spirv_executable_def_verifier.h"
+
+using namespace iree::hal::vulkan;
+
+typedef struct iree_hal_vulkan_entry_point_t {
+ VkPipeline pipeline;
+ iree_string_view_t name;
+} iree_hal_vulkan_entry_point_t;
+
+static iree_status_t iree_hal_vulkan_create_shader_module(
+ VkDeviceHandle* logical_device, iree_const_byte_span_t code,
+ VkShaderModule* out_shader_module) {
+ IREE_TRACE_SCOPE();
+ VkShaderModuleCreateInfo create_info;
+ create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+ create_info.pNext = NULL;
+ create_info.flags = 0;
+ create_info.codeSize = code.data_length;
+ create_info.pCode = (const uint32_t*)code.data;
+ VK_RETURN_IF_ERROR(logical_device->syms()->vkCreateShaderModule(
+ *logical_device, &create_info,
+ logical_device->allocator(), out_shader_module),
+ "vkCreateShaderModule");
+ return iree_ok_status();
+}
+
+static void iree_hal_vulkan_destroy_shader_module(
+ VkDeviceHandle* logical_device, VkShaderModule handle) {
+ if (handle == VK_NULL_HANDLE) return;
+ logical_device->syms()->vkDestroyShaderModule(*logical_device, handle,
+ logical_device->allocator());
+}
+
+static iree_status_t iree_hal_vulkan_create_pipelines(
+ VkDeviceHandle* logical_device, VkPipelineCache pipeline_cache,
+ const iree_hal_executable_params_t* executable_params,
+ iree_SpirVExecutableDef_table_t executable_def,
+ VkShaderModule shader_module, iree_host_size_t pipeline_count,
+ iree_hal_vulkan_entry_point_t* out_entry_points) {
+ IREE_TRACE_SCOPE();
+ uint8_t* scratch_memory = NULL;
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc(
+ logical_device->host_allocator(),
+ pipeline_count * sizeof(VkComputePipelineCreateInfo) +
+ executable_params->constant_count * sizeof(VkSpecializationMapEntry),
+ (void**)&scratch_memory));
+ VkComputePipelineCreateInfo* create_infos =
+ (VkComputePipelineCreateInfo*)scratch_memory;
+ VkSpecializationMapEntry* spec_map_entries =
+ (VkSpecializationMapEntry*)(scratch_memory +
+ pipeline_count *
+ sizeof(VkComputePipelineCreateInfo));
+
+ VkSpecializationInfo spec_info;
+ memset(&spec_info, 0, sizeof(spec_info));
+ spec_info.mapEntryCount = executable_params->constant_count;
+ spec_info.pMapEntries = spec_map_entries;
+ spec_info.dataSize = executable_params->constant_count * sizeof(uint32_t);
+ spec_info.pData = executable_params->constants;
+ for (iree_host_size_t i = 0; i < executable_params->constant_count; ++i) {
+ spec_map_entries[i].constantID = i;
+ spec_map_entries[i].offset = i * sizeof(uint32_t);
+ spec_map_entries[i].size = sizeof(uint32_t);
+ }
+
+ flatbuffers_string_vec_t entry_points_vec =
+ iree_SpirVExecutableDef_entry_points_get(executable_def);
+ for (iree_host_size_t entry_ordinal = 0; entry_ordinal < pipeline_count;
+ ++entry_ordinal) {
+ VkComputePipelineCreateInfo* create_info = &create_infos[entry_ordinal];
+ create_info->sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
+ create_info->pNext = NULL;
+ create_info->flags = 0;
+ if (!iree_all_bits_set(
+ executable_params->caching_mode,
+ IREE_HAL_EXECUTABLE_CACHING_MODE_ALLOW_OPTIMIZATION)) {
+ create_info->flags |= VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT;
+ }
+ if (entry_ordinal == 0) {
+ create_info->flags |= VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT;
+ } else {
+ create_info->flags |= VK_PIPELINE_CREATE_DERIVATIVE_BIT;
+ }
+ create_info->layout = iree_hal_vulkan_native_executable_layout_handle(
+ executable_params->executable_layouts[entry_ordinal]);
+ create_info->basePipelineHandle = VK_NULL_HANDLE;
+ create_info->basePipelineIndex = 0;
+
+ VkPipelineShaderStageCreateInfo* stage_create_info = &create_info->stage;
+ stage_create_info->sType =
+ VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+ stage_create_info->pNext = NULL;
+ stage_create_info->flags = 0;
+ stage_create_info->stage = VK_SHADER_STAGE_COMPUTE_BIT;
+ stage_create_info->module = shader_module;
+ stage_create_info->pName =
+ flatbuffers_string_vec_at(entry_points_vec, entry_ordinal);
+ stage_create_info->pSpecializationInfo = &spec_info;
+ }
+
+ VkPipeline* pipelines =
+ (VkPipeline*)iree_alloca(pipeline_count * sizeof(VkPipeline));
+ iree_status_t status = VK_RESULT_TO_STATUS(
+ logical_device->syms()->vkCreateComputePipelines(
+ *logical_device, pipeline_cache, (uint32_t)pipeline_count,
+ create_infos, logical_device->allocator(), pipelines),
+ "vkCreateComputePipelines");
+ if (iree_status_is_ok(status)) {
+ for (iree_host_size_t i = 0; i < pipeline_count; ++i) {
+ out_entry_points[i].pipeline = pipelines[i];
+ }
+ }
+
+ iree_allocator_free(logical_device->host_allocator(), scratch_memory);
+ return status;
+}
+
+static void iree_hal_vulkan_destroy_pipeline(VkDeviceHandle* logical_device,
+ VkPipeline handle) {
+ IREE_TRACE_SCOPE();
+ if (handle == VK_NULL_HANDLE) return;
+ logical_device->syms()->vkDestroyPipeline(*logical_device, handle,
+ logical_device->allocator());
+}
+
+// Verifies the structure of the flatbuffer so that we can avoid doing so during
+// runtime. There are still some conditions we must be aware of (such as omitted
+// names on functions with internal linkage), however we shouldn't need to
+// bounds check anything within the flatbuffer after this succeeds.
+static iree_status_t iree_hal_spirv_executable_flatbuffer_verify(
+ iree_const_byte_span_t flatbuffer_data,
+ iree_host_size_t expected_entry_point_count) {
+ if (!flatbuffer_data.data || flatbuffer_data.data_length < 16) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "flatbuffer data is not present or less than 16 bytes (%zu total)",
+ flatbuffer_data.data_length);
+ }
+
+ // Run flatcc generated verification. This ensures all pointers are in-bounds
+ // and that we can safely walk the file, but not that the actual contents of
+ // the flatbuffer meet our expectations.
+ int verify_ret = iree_SpirVExecutableDef_verify_as_root(
+ flatbuffer_data.data, flatbuffer_data.data_length);
+ if (verify_ret != flatcc_verify_ok) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "flatbuffer verification failed: %s",
+ flatcc_verify_error_string(verify_ret));
+ }
+
+ iree_SpirVExecutableDef_table_t executable_def =
+ iree_SpirVExecutableDef_as_root(flatbuffer_data.data);
+
+ flatbuffers_string_vec_t entry_points_vec =
+ iree_SpirVExecutableDef_entry_points_get(executable_def);
+ size_t entry_point_count = flatbuffers_string_vec_len(entry_points_vec);
+ if (entry_point_count != expected_entry_point_count) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "executable provides %zu entry points but caller "
+ "provided %zu; must match",
+ entry_point_count, expected_entry_point_count);
+ }
+
+ for (size_t i = 0; i < entry_point_count; ++i) {
+ if (!flatbuffers_string_len(
+ flatbuffers_string_vec_at(entry_points_vec, i))) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "executable entry point %zu has no name", i);
+ }
+ }
+
+ if (flatbuffers_uint32_vec_len(
+ iree_SpirVExecutableDef_code_get(executable_def)) == 0) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "executable SPIR-V code is missing/empty");
+ }
+
+ return iree_ok_status();
+}
+
+typedef struct iree_hal_vulkan_native_executable_t {
+ iree_hal_resource_t resource;
+ VkDeviceHandle* logical_device;
+ iree_host_size_t entry_point_count;
+ iree_hal_vulkan_entry_point_t entry_points[];
+} iree_hal_vulkan_native_executable_t;
+
+namespace {
+extern const iree_hal_executable_vtable_t
+ iree_hal_vulkan_native_executable_vtable;
+} // namespace
+
+static iree_hal_vulkan_native_executable_t*
+iree_hal_vulkan_native_executable_cast(iree_hal_executable_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_vulkan_native_executable_vtable);
+ return (iree_hal_vulkan_native_executable_t*)base_value;
+}
+
+iree_status_t iree_hal_vulkan_native_executable_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ VkPipelineCache pipeline_cache,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable) {
+ IREE_ASSERT_ARGUMENT(logical_device);
+ IREE_ASSERT_ARGUMENT(executable_params);
+ IREE_ASSERT_ARGUMENT(out_executable);
+ *out_executable = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Verify and fetch the executable flatbuffer wrapper.
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_spirv_executable_flatbuffer_verify(
+ executable_params->executable_data,
+ executable_params->executable_layout_count));
+ iree_SpirVExecutableDef_table_t executable_def =
+ iree_SpirVExecutableDef_as_root(executable_params->executable_data.data);
+
+ // Create the shader module.
+ flatbuffers_uint32_vec_t code_vec =
+ iree_SpirVExecutableDef_code_get(executable_def);
+ VkShaderModule shader_module = VK_NULL_HANDLE;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_vulkan_create_shader_module(
+ logical_device,
+ iree_make_const_byte_span(
+ code_vec,
+ flatbuffers_uint32_vec_len(code_vec) * sizeof(uint32_t)),
+ &shader_module));
+
+ // Create pipelines for each entry point.
+ flatbuffers_string_vec_t entry_points_vec =
+ iree_SpirVExecutableDef_entry_points_get(executable_def);
+ iree_host_size_t entry_point_count =
+ flatbuffers_string_vec_len(entry_points_vec);
+
+ iree_hal_vulkan_native_executable_t* executable = NULL;
+ iree_host_size_t total_size =
+ sizeof(*executable) +
+ entry_point_count * sizeof(*executable->entry_points);
+ iree_status_t status = iree_allocator_malloc(logical_device->host_allocator(),
+ total_size, (void**)&executable);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_vulkan_native_executable_vtable,
+ &executable->resource);
+ executable->logical_device = logical_device;
+ executable->entry_point_count = entry_point_count;
+ memset(executable->entry_points, 0,
+ entry_point_count * sizeof(*executable->entry_points));
+ }
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_vulkan_create_pipelines(
+ logical_device, pipeline_cache, executable_params, executable_def,
+ shader_module, executable->entry_point_count, executable->entry_points);
+ }
+ iree_hal_vulkan_destroy_shader_module(logical_device, shader_module);
+
+ if (iree_status_is_ok(status)) {
+ flatbuffers_string_vec_t entry_points_vec =
+ iree_SpirVExecutableDef_entry_points_get(executable_def);
+ for (iree_host_size_t i = 0; i < entry_point_count; ++i) {
+ flatbuffers_string_t name =
+ flatbuffers_string_vec_at(entry_points_vec, i);
+ executable->entry_points[i].name =
+ iree_make_string_view(name, flatbuffers_string_len(name));
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, name);
+ }
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_executable = (iree_hal_executable_t*)executable;
+ } else {
+ iree_hal_executable_destroy((iree_hal_executable_t*)executable);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_vulkan_native_executable_destroy(
+ iree_hal_executable_t* base_executable) {
+ iree_hal_vulkan_native_executable_t* executable =
+ iree_hal_vulkan_native_executable_cast(base_executable);
+ iree_allocator_t host_allocator =
+ executable->logical_device->host_allocator();
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ for (iree_host_size_t i = 0; i < executable->entry_point_count; ++i) {
+ iree_hal_vulkan_destroy_pipeline(executable->logical_device,
+ executable->entry_points[i].pipeline);
+ }
+ iree_allocator_free(host_allocator, executable);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_hal_vulkan_native_executable_entry_point_source_location(
+ iree_hal_executable_t* base_executable, iree_host_size_t entry_ordinal,
+ iree_hal_vulkan_source_location_t* out_source_location) {
+ iree_hal_vulkan_native_executable_t* executable =
+ iree_hal_vulkan_native_executable_cast(base_executable);
+ memset(out_source_location, 0, sizeof(*out_source_location));
+ if (entry_ordinal >= executable->entry_point_count) {
+ return;
+ }
+ out_source_location->func_name = executable->entry_points[entry_ordinal].name;
+
+ // TODO(benvanik): plumb through file name/line for the MLIR function.
+ out_source_location->file_name = out_source_location->func_name;
+ out_source_location->line = 0;
+}
+
+iree_status_t iree_hal_vulkan_native_executable_pipeline_for_entry_point(
+ iree_hal_executable_t* base_executable, iree_host_size_t entry_ordinal,
+ VkPipeline* out_pipeline_handle) {
+ iree_hal_vulkan_native_executable_t* executable =
+ iree_hal_vulkan_native_executable_cast(base_executable);
+ if (entry_ordinal >= executable->entry_point_count) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "invalid entry point ordinal %zu", entry_ordinal);
+ }
+ *out_pipeline_handle = executable->entry_points[entry_ordinal].pipeline;
+ return iree_ok_status();
+}
+
+namespace {
+const iree_hal_executable_vtable_t iree_hal_vulkan_native_executable_vtable = {
+ /*.destroy=*/iree_hal_vulkan_native_executable_destroy,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/vulkan/native_executable.h b/runtime/src/iree/hal/vulkan/native_executable.h
new file mode 100644
index 0000000..e7f3c98
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/native_executable.h
@@ -0,0 +1,52 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_NATIVE_EXECUTABLE_H_
+#define IREE_HAL_VULKAN_NATIVE_EXECUTABLE_H_
+
+// clang-format off: must be included before all other headers.
+#include "iree/hal/vulkan/vulkan_headers.h"
+// clang-format on
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/handle_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_hal_vulkan_source_location_t {
+ iree_string_view_t file_name;
+ int line;
+ iree_string_view_t func_name;
+} iree_hal_vulkan_source_location_t;
+
+// Creates a wrapper for one or more VkPipelines that are sourced from the same
+// IREE executable. Each of the pipelines will share the same shader module
+// and just differs by the entry point into the shader module they reference.
+iree_status_t iree_hal_vulkan_native_executable_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ VkPipelineCache pipeline_cache,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable);
+
+// Returns the source location for the given entry point. May be empty if not
+// available.
+void iree_hal_vulkan_native_executable_entry_point_source_location(
+ iree_hal_executable_t* executable, iree_host_size_t entry_ordinal,
+ iree_hal_vulkan_source_location_t* out_source_location);
+
+// Returns the cached VkPipeline for the given executable |entry_ordinal|.
+iree_status_t iree_hal_vulkan_native_executable_pipeline_for_entry_point(
+ iree_hal_executable_t* executable, iree_host_size_t entry_ordinal,
+ VkPipeline* out_pipeline_handle);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_NATIVE_EXECUTABLE_H_
diff --git a/runtime/src/iree/hal/vulkan/native_executable_layout.cc b/runtime/src/iree/hal/vulkan/native_executable_layout.cc
new file mode 100644
index 0000000..572c8bd
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/native_executable_layout.cc
@@ -0,0 +1,175 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/native_executable_layout.h"
+
+#include <cstddef>
+#include <cstdint>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/dynamic_symbol_tables.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/native_descriptor_set_layout.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+using namespace iree::hal::vulkan;
+
+typedef struct iree_hal_vulkan_native_executable_layout_t {
+ iree_hal_resource_t resource;
+ VkDeviceHandle* logical_device;
+ VkPipelineLayout handle;
+ iree_host_size_t set_layout_count;
+ iree_hal_descriptor_set_layout_t* set_layouts[];
+} iree_hal_vulkan_native_executable_layout_t;
+
+namespace {
+extern const iree_hal_executable_layout_vtable_t
+ iree_hal_vulkan_native_executable_layout_vtable;
+} // namespace
+
+static iree_hal_vulkan_native_executable_layout_t*
+iree_hal_vulkan_native_executable_layout_cast(
+ iree_hal_executable_layout_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value,
+ &iree_hal_vulkan_native_executable_layout_vtable);
+ return (iree_hal_vulkan_native_executable_layout_t*)base_value;
+}
+
+static iree_status_t iree_hal_vulkan_create_pipeline_layout(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ iree_host_size_t push_constant_count, iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ VkPipelineLayout* out_handle) {
+ VkDescriptorSetLayout* set_layout_handles =
+ (VkDescriptorSetLayout*)iree_alloca(set_layout_count *
+ sizeof(VkDescriptorSetLayout));
+ for (iree_host_size_t i = 0; i < set_layout_count; ++i) {
+ set_layout_handles[i] =
+ iree_hal_vulkan_native_descriptor_set_layout_handle(set_layouts[i]);
+ }
+
+ VkPushConstantRange push_constant_ranges[1];
+ push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
+ push_constant_ranges[0].offset = 0;
+ push_constant_ranges[0].size =
+ (uint32_t)(push_constant_count * sizeof(uint32_t));
+
+ VkPipelineLayoutCreateInfo create_info;
+ create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
+ create_info.pNext = nullptr;
+ create_info.flags = 0;
+ create_info.setLayoutCount = (uint32_t)set_layout_count;
+ create_info.pSetLayouts = set_layout_handles;
+ create_info.pushConstantRangeCount = push_constant_count > 0 ? 1 : 0;
+ create_info.pPushConstantRanges = push_constant_ranges;
+
+ return VK_RESULT_TO_STATUS(logical_device->syms()->vkCreatePipelineLayout(
+ *logical_device, &create_info,
+ logical_device->allocator(), out_handle),
+ "vkCreatePipelineLayout");
+}
+
+static void iree_hal_vulkan_destroy_pipeline_layout(
+ VkDeviceHandle* logical_device, VkPipelineLayout handle) {
+ if (handle == VK_NULL_HANDLE) return;
+ logical_device->syms()->vkDestroyPipelineLayout(*logical_device, handle,
+ logical_device->allocator());
+}
+
+iree_status_t iree_hal_vulkan_native_executable_layout_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ iree_host_size_t push_constant_count, iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ iree_hal_executable_layout_t** out_executable_layout) {
+ IREE_ASSERT_ARGUMENT(logical_device);
+ IREE_ASSERT_ARGUMENT(!set_layout_count || set_layouts);
+ IREE_ASSERT_ARGUMENT(out_executable_layout);
+ *out_executable_layout = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ VkPipelineLayout handle = VK_NULL_HANDLE;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_vulkan_create_pipeline_layout(
+ logical_device, push_constant_count, set_layout_count,
+ set_layouts, &handle));
+
+ iree_hal_vulkan_native_executable_layout_t* executable_layout = NULL;
+ iree_host_size_t total_size =
+ sizeof(*executable_layout) +
+ set_layout_count * sizeof(*executable_layout->set_layouts);
+ iree_status_t status = iree_allocator_malloc(
+ logical_device->host_allocator(), total_size, (void**)&executable_layout);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(
+ &iree_hal_vulkan_native_executable_layout_vtable,
+ &executable_layout->resource);
+ executable_layout->logical_device = logical_device;
+ executable_layout->handle = handle;
+ executable_layout->set_layout_count = set_layout_count;
+ for (iree_host_size_t i = 0; i < set_layout_count; ++i) {
+ executable_layout->set_layouts[i] = set_layouts[i];
+ iree_hal_descriptor_set_layout_retain(set_layouts[i]);
+ }
+ *out_executable_layout = (iree_hal_executable_layout_t*)executable_layout;
+ } else {
+ iree_hal_vulkan_destroy_pipeline_layout(logical_device, handle);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_vulkan_native_executable_layout_destroy(
+ iree_hal_executable_layout_t* base_executable_layout) {
+ iree_hal_vulkan_native_executable_layout_t* executable_layout =
+ iree_hal_vulkan_native_executable_layout_cast(base_executable_layout);
+ iree_allocator_t host_allocator =
+ executable_layout->logical_device->host_allocator();
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_vulkan_destroy_pipeline_layout(executable_layout->logical_device,
+ executable_layout->handle);
+ for (iree_host_size_t i = 0; i < executable_layout->set_layout_count; ++i) {
+ iree_hal_descriptor_set_layout_release(executable_layout->set_layouts[i]);
+ }
+ iree_allocator_free(host_allocator, executable_layout);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+VkPipelineLayout iree_hal_vulkan_native_executable_layout_handle(
+ iree_hal_executable_layout_t* base_executable_layout) {
+ iree_hal_vulkan_native_executable_layout_t* executable_layout =
+ iree_hal_vulkan_native_executable_layout_cast(base_executable_layout);
+ return executable_layout->handle;
+}
+
+iree_host_size_t iree_hal_vulkan_native_executable_layout_set_count(
+ iree_hal_executable_layout_t* base_executable_layout) {
+ iree_hal_vulkan_native_executable_layout_t* executable_layout =
+ iree_hal_vulkan_native_executable_layout_cast(base_executable_layout);
+ return executable_layout->set_layout_count;
+}
+
+iree_hal_descriptor_set_layout_t* iree_hal_vulkan_native_executable_layout_set(
+ iree_hal_executable_layout_t* base_executable_layout,
+ iree_host_size_t set_index) {
+ iree_hal_vulkan_native_executable_layout_t* executable_layout =
+ iree_hal_vulkan_native_executable_layout_cast(base_executable_layout);
+ if (IREE_UNLIKELY(set_index >= executable_layout->set_layout_count)) {
+ return NULL;
+ }
+ return executable_layout->set_layouts[set_index];
+}
+
+namespace {
+const iree_hal_executable_layout_vtable_t
+ iree_hal_vulkan_native_executable_layout_vtable = {
+ /*.destroy=*/iree_hal_vulkan_native_executable_layout_destroy,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/vulkan/native_executable_layout.h b/runtime/src/iree/hal/vulkan/native_executable_layout.h
new file mode 100644
index 0000000..7f9e5af
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/native_executable_layout.h
@@ -0,0 +1,47 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_NATIVE_EXECUTABLE_LAYOUT_H_
+#define IREE_HAL_VULKAN_NATIVE_EXECUTABLE_LAYOUT_H_
+
+// clang-format off: must be included before all other headers.
+#include "iree/hal/vulkan/vulkan_headers.h"
+// clang-format on
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/handle_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a VkPipelineLayout-based executable layout composed of one or more
+// descriptor set layouts.
+iree_status_t iree_hal_vulkan_native_executable_layout_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ iree_host_size_t push_constant_count, iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ iree_hal_executable_layout_t** out_executable_layout);
+
+// Returns the native VkPipelineLayout handle for the executable layout.
+VkPipelineLayout iree_hal_vulkan_native_executable_layout_handle(
+ iree_hal_executable_layout_t* executable_layout);
+
+// Returns the total number of descriptor sets within the layout.
+iree_host_size_t iree_hal_vulkan_native_executable_layout_set_count(
+ iree_hal_executable_layout_t* executable_layout);
+
+// Returns the descriptor set layout with the given |set_index|.
+iree_hal_descriptor_set_layout_t* iree_hal_vulkan_native_executable_layout_set(
+ iree_hal_executable_layout_t* executable_layout,
+ iree_host_size_t set_index);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_NATIVE_EXECUTABLE_LAYOUT_H_
diff --git a/runtime/src/iree/hal/vulkan/native_semaphore.cc b/runtime/src/iree/hal/vulkan/native_semaphore.cc
new file mode 100644
index 0000000..4ef4f36
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/native_semaphore.cc
@@ -0,0 +1,279 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/native_semaphore.h"
+
+#include <cstddef>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/dynamic_symbol_tables.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+// The maximum valid payload value of an iree_hal_semaphore_t.
+// Payload values larger than this indicate that the semaphore has failed.
+//
+// This originates from Vulkan having a lower-bound of INT_MAX for
+// maxTimelineSemaphoreValueDifference and many Android devices only supporting
+// that lower-bound. At ~100 signals per second it'll take 1.5+ years to
+// saturate. We may increase this value at some point but so long as there are
+// some devices in the wild that may have this limitation we can ensure better
+// consistency across the backends by observing this.
+//
+// The major mitigation here is that in proper usage of IREE there are no
+// semaphores that are implicitly referenced by multiple VMs (each creates their
+// own internally) and in a multitenant system each session should have its own
+// semaphores - so even if the process lives for years it's highly unlikely any
+// particular session does. Whatever, 640K is enough for anyone.
+//
+// See:
+// https://vulkan.gpuinfo.org/displayextensionproperty.php?name=maxTimelineSemaphoreValueDifference
+#define IREE_HAL_VULKAN_SEMAPHORE_MAX_VALUE (2147483647ull - 1)
+
+using namespace iree::hal::vulkan;
+
+typedef struct iree_hal_vulkan_native_semaphore_t {
+ iree_hal_resource_t resource;
+ VkDeviceHandle* logical_device;
+ VkSemaphore handle;
+ iree_atomic_intptr_t failure_status;
+} iree_hal_vulkan_native_semaphore_t;
+
+namespace {
+extern const iree_hal_semaphore_vtable_t
+ iree_hal_vulkan_native_semaphore_vtable;
+} // namespace
+
+static iree_hal_vulkan_native_semaphore_t*
+iree_hal_vulkan_native_semaphore_cast(iree_hal_semaphore_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_vulkan_native_semaphore_vtable);
+ return (iree_hal_vulkan_native_semaphore_t*)base_value;
+}
+
+iree_status_t iree_hal_vulkan_native_semaphore_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device, uint64_t initial_value,
+ iree_hal_semaphore_t** out_semaphore) {
+ IREE_ASSERT_ARGUMENT(logical_device);
+ IREE_ASSERT_ARGUMENT(out_semaphore);
+ *out_semaphore = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ VkSemaphoreTypeCreateInfo timeline_create_info;
+ timeline_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO;
+ timeline_create_info.pNext = NULL;
+ timeline_create_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE;
+ timeline_create_info.initialValue = initial_value;
+
+ VkSemaphoreCreateInfo create_info;
+ create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+ create_info.pNext = &timeline_create_info;
+ create_info.flags = 0;
+ VkSemaphore handle = VK_NULL_HANDLE;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, VK_RESULT_TO_STATUS(logical_device->syms()->vkCreateSemaphore(
+ *logical_device, &create_info,
+ logical_device->allocator(), &handle),
+ "vkCreateSemaphore"));
+
+ iree_hal_vulkan_native_semaphore_t* semaphore = NULL;
+ iree_status_t status = iree_allocator_malloc(
+ logical_device->host_allocator(), sizeof(*semaphore), (void**)&semaphore);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_vulkan_native_semaphore_vtable,
+ &semaphore->resource);
+ semaphore->logical_device = logical_device;
+ semaphore->handle = handle;
+ iree_atomic_store_intptr(&semaphore->failure_status, 0,
+ iree_memory_order_release);
+ *out_semaphore = (iree_hal_semaphore_t*)semaphore;
+ } else {
+ logical_device->syms()->vkDestroySemaphore(*logical_device, handle,
+ logical_device->allocator());
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_vulkan_native_semaphore_destroy(
+ iree_hal_semaphore_t* base_semaphore) {
+ iree_hal_vulkan_native_semaphore_t* semaphore =
+ iree_hal_vulkan_native_semaphore_cast(base_semaphore);
+ iree_allocator_t host_allocator = semaphore->logical_device->host_allocator();
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_free((iree_status_t)iree_atomic_load_intptr(
+ &semaphore->failure_status, iree_memory_order_acquire));
+ semaphore->logical_device->syms()->vkDestroySemaphore(
+ *semaphore->logical_device, semaphore->handle,
+ semaphore->logical_device->allocator());
+ iree_allocator_free(host_allocator, semaphore);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+VkSemaphore iree_hal_vulkan_native_semaphore_handle(
+ iree_hal_semaphore_t* base_semaphore) {
+ iree_hal_vulkan_native_semaphore_t* semaphore =
+ iree_hal_vulkan_native_semaphore_cast(base_semaphore);
+ return semaphore->handle;
+}
+
+static iree_status_t iree_hal_vulkan_native_semaphore_query(
+ iree_hal_semaphore_t* base_semaphore, uint64_t* out_value) {
+ iree_hal_vulkan_native_semaphore_t* semaphore =
+ iree_hal_vulkan_native_semaphore_cast(base_semaphore);
+ *out_value = 0;
+
+ uint64_t value = 0;
+ IREE_RETURN_IF_ERROR(VK_RESULT_TO_STATUS(
+ semaphore->logical_device->syms()->vkGetSemaphoreCounterValue(
+ *semaphore->logical_device, semaphore->handle, &value),
+ "vkGetSemaphoreCounterValue"));
+
+ if (value > IREE_HAL_VULKAN_SEMAPHORE_MAX_VALUE) {
+ iree_status_t failure_status = (iree_status_t)iree_atomic_load_intptr(
+ &semaphore->failure_status, iree_memory_order_acquire);
+ if (iree_status_is_ok(failure_status)) {
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "overflowed timeline semaphore max value");
+ }
+ return iree_status_clone(failure_status);
+ }
+
+ *out_value = value;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_native_semaphore_signal(
+ iree_hal_semaphore_t* base_semaphore, uint64_t new_value) {
+ iree_hal_vulkan_native_semaphore_t* semaphore =
+ iree_hal_vulkan_native_semaphore_cast(base_semaphore);
+
+ VkSemaphoreSignalInfo signal_info;
+ signal_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO;
+ signal_info.pNext = NULL;
+ signal_info.semaphore = semaphore->handle;
+ signal_info.value = new_value;
+ return VK_RESULT_TO_STATUS(
+ semaphore->logical_device->syms()->vkSignalSemaphore(
+ *semaphore->logical_device, &signal_info),
+ "vkSignalSemaphore");
+}
+
+static void iree_hal_vulkan_native_semaphore_fail(
+ iree_hal_semaphore_t* base_semaphore, iree_status_t status) {
+ iree_hal_vulkan_native_semaphore_t* semaphore =
+ iree_hal_vulkan_native_semaphore_cast(base_semaphore);
+
+ // Try to set our local status - we only preserve the first failure so only
+ // do this if we are going from a valid semaphore to a failed one.
+ iree_status_t old_status = iree_ok_status();
+ if (!iree_atomic_compare_exchange_strong_intptr(
+ &semaphore->failure_status, (intptr_t*)&old_status, (intptr_t)status,
+ iree_memory_order_seq_cst, iree_memory_order_seq_cst)) {
+ // Previous status was not OK; drop our new status.
+ IREE_IGNORE_ERROR(status);
+ return;
+ }
+
+ VkSemaphoreSignalInfo signal_info;
+ signal_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO;
+ signal_info.pNext = NULL;
+ signal_info.semaphore = semaphore->handle;
+ signal_info.value = IREE_HAL_VULKAN_SEMAPHORE_MAX_VALUE + 1;
+ // NOTE: we don't care about the result in case of failures as we are
+ // failing and the caller will likely be tearing everything down anyway.
+ semaphore->logical_device->syms()->vkSignalSemaphore(
+ *semaphore->logical_device, &signal_info);
+}
+
+iree_status_t iree_hal_vulkan_native_semaphore_multi_wait(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout,
+ VkSemaphoreWaitFlags wait_flags) {
+ if (semaphore_list->count == 0) return iree_ok_status();
+
+ iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
+
+ uint64_t timeout_ns;
+ if (deadline_ns == IREE_TIME_INFINITE_FUTURE) {
+ timeout_ns = UINT64_MAX;
+ } else if (deadline_ns == IREE_TIME_INFINITE_PAST) {
+ timeout_ns = 0;
+ } else {
+ iree_time_t now_ns = iree_time_now();
+ if (deadline_ns < now_ns) {
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+ timeout_ns = (uint64_t)(deadline_ns - now_ns);
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ VkSemaphore* semaphore_handles =
+ (VkSemaphore*)iree_alloca(semaphore_list->count * sizeof(VkSemaphore));
+ for (iree_host_size_t i = 0; i < semaphore_list->count; ++i) {
+ semaphore_handles[i] =
+ iree_hal_vulkan_native_semaphore_handle(semaphore_list->semaphores[i]);
+ }
+
+ VkSemaphoreWaitInfo wait_info;
+ wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO;
+ wait_info.pNext = nullptr;
+ wait_info.flags = wait_flags;
+ wait_info.semaphoreCount = semaphore_list->count;
+ wait_info.pSemaphores = semaphore_handles;
+ wait_info.pValues = semaphore_list->payload_values;
+ static_assert(
+ sizeof(wait_info.pValues[0]) == sizeof(semaphore_list->payload_values[0]),
+ "payload value type must match vulkan expected size");
+
+ // NOTE: this may fail with a timeout (VK_TIMEOUT) or in the case of a
+ // device loss event may return either VK_SUCCESS *or* VK_ERROR_DEVICE_LOST.
+ // We may want to explicitly query for device loss after a successful wait
+ // to ensure we consistently return errors.
+ VkResult result = logical_device->syms()->vkWaitSemaphores(
+ *logical_device, &wait_info, timeout_ns);
+
+ IREE_TRACE_ZONE_END(z0);
+
+ if (result == VK_SUCCESS) {
+ return iree_ok_status();
+ } else if (result == VK_ERROR_DEVICE_LOST) {
+ // Nothing we do now matters.
+ return VK_RESULT_TO_STATUS(result, "vkWaitSemaphores");
+ } else if (result == VK_TIMEOUT) {
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+ return VK_RESULT_TO_STATUS(result, "vkWaitSemaphores");
+}
+
+static iree_status_t iree_hal_vulkan_native_semaphore_wait(
+ iree_hal_semaphore_t* base_semaphore, uint64_t value,
+ iree_timeout_t timeout) {
+ iree_hal_vulkan_native_semaphore_t* semaphore =
+ iree_hal_vulkan_native_semaphore_cast(base_semaphore);
+ iree_hal_semaphore_list_t semaphore_list = {
+ /*.count=*/1,
+ /*.semaphores=*/&base_semaphore,
+ /*.payload_values=*/&value,
+ };
+ return iree_hal_vulkan_native_semaphore_multi_wait(
+ semaphore->logical_device, &semaphore_list, timeout, 0);
+}
+
+namespace {
+const iree_hal_semaphore_vtable_t iree_hal_vulkan_native_semaphore_vtable = {
+ /*.destroy=*/iree_hal_vulkan_native_semaphore_destroy,
+ /*.query=*/iree_hal_vulkan_native_semaphore_query,
+ /*.signal=*/iree_hal_vulkan_native_semaphore_signal,
+ /*.fail=*/iree_hal_vulkan_native_semaphore_fail,
+ /*.wait=*/iree_hal_vulkan_native_semaphore_wait,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/vulkan/native_semaphore.h b/runtime/src/iree/hal/vulkan/native_semaphore.h
new file mode 100644
index 0000000..91580de
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/native_semaphore.h
@@ -0,0 +1,46 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_NATIVE_SEMAPHORE_H_
+#define IREE_HAL_VULKAN_NATIVE_SEMAPHORE_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/handle_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a timeline semaphore implemented using the native VkSemaphore type.
+// This may require emulation pre-Vulkan 1.2 when timeline semaphores were only
+// an extension.
+iree_status_t iree_hal_vulkan_native_semaphore_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device, uint64_t initial_value,
+ iree_hal_semaphore_t** out_semaphore);
+
+// Returns the Vulkan timeline semaphore handle.
+VkSemaphore iree_hal_vulkan_native_semaphore_handle(
+ iree_hal_semaphore_t* semaphore);
+
+// Performs a multi-wait on one or more semaphores.
+// By default this is an all-wait but |wait_flags| may contain
+// VK_SEMAPHORE_WAIT_ANY_BIT to change to an any-wait.
+//
+// Returns IREE_STATUS_DEADLINE_EXCEEDED if the wait does not complete before
+// |deadline_ns| elapses.
+iree_status_t iree_hal_vulkan_native_semaphore_multi_wait(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout,
+ VkSemaphoreWaitFlags wait_flags);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_NATIVE_SEMAPHORE_H_
diff --git a/runtime/src/iree/hal/vulkan/nop_executable_cache.cc b/runtime/src/iree/hal/vulkan/nop_executable_cache.cc
new file mode 100644
index 0000000..fdd5348
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/nop_executable_cache.cc
@@ -0,0 +1,101 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/nop_executable_cache.h"
+
+#include <cstddef>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/dynamic_symbol_tables.h"
+#include "iree/hal/vulkan/native_executable.h"
+
+using namespace iree::hal::vulkan;
+
+typedef struct iree_hal_vulkan_nop_executable_cache_t {
+ iree_hal_resource_t resource;
+ VkDeviceHandle* logical_device;
+} iree_hal_vulkan_nop_executable_cache_t;
+
+namespace {
+extern const iree_hal_executable_cache_vtable_t
+ iree_hal_vulkan_nop_executable_cache_vtable;
+} // namespace
+
+static iree_hal_vulkan_nop_executable_cache_t*
+iree_hal_vulkan_nop_executable_cache_cast(
+ iree_hal_executable_cache_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value,
+ &iree_hal_vulkan_nop_executable_cache_vtable);
+ return (iree_hal_vulkan_nop_executable_cache_t*)base_value;
+}
+
+iree_status_t iree_hal_vulkan_nop_executable_cache_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ iree_string_view_t identifier,
+ iree_hal_executable_cache_t** out_executable_cache) {
+ IREE_ASSERT_ARGUMENT(out_executable_cache);
+ *out_executable_cache = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_vulkan_nop_executable_cache_t* executable_cache = NULL;
+ iree_status_t status = iree_allocator_malloc(logical_device->host_allocator(),
+ sizeof(*executable_cache),
+ (void**)&executable_cache);
+ if (iree_status_is_ok(status)) {
+ iree_hal_resource_initialize(&iree_hal_vulkan_nop_executable_cache_vtable,
+ &executable_cache->resource);
+ executable_cache->logical_device = logical_device;
+
+ *out_executable_cache = (iree_hal_executable_cache_t*)executable_cache;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_vulkan_nop_executable_cache_destroy(
+ iree_hal_executable_cache_t* base_executable_cache) {
+ iree_hal_vulkan_nop_executable_cache_t* executable_cache =
+ iree_hal_vulkan_nop_executable_cache_cast(base_executable_cache);
+ iree_allocator_t host_allocator =
+ executable_cache->logical_device->host_allocator();
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(host_allocator, executable_cache);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static bool iree_hal_vulkan_nop_executable_cache_can_prepare_format(
+ iree_hal_executable_cache_t* base_executable_cache,
+ iree_hal_executable_caching_mode_t caching_mode,
+ iree_string_view_t executable_format) {
+ return iree_string_view_equal(executable_format,
+ iree_make_cstring_view("SPVE"));
+}
+
+static iree_status_t iree_hal_vulkan_nop_executable_cache_prepare_executable(
+ iree_hal_executable_cache_t* base_executable_cache,
+ const iree_hal_executable_params_t* executable_params,
+ iree_hal_executable_t** out_executable) {
+ iree_hal_vulkan_nop_executable_cache_t* executable_cache =
+ iree_hal_vulkan_nop_executable_cache_cast(base_executable_cache);
+ return iree_hal_vulkan_native_executable_create(
+ executable_cache->logical_device,
+ /*pipeline_cache=*/VK_NULL_HANDLE, executable_params, out_executable);
+}
+
+namespace {
+const iree_hal_executable_cache_vtable_t
+ iree_hal_vulkan_nop_executable_cache_vtable = {
+ /*.destroy=*/iree_hal_vulkan_nop_executable_cache_destroy,
+ /*.can_prepare_format=*/
+ iree_hal_vulkan_nop_executable_cache_can_prepare_format,
+ /*.prepare_executable=*/
+ iree_hal_vulkan_nop_executable_cache_prepare_executable,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/vulkan/nop_executable_cache.h b/runtime/src/iree/hal/vulkan/nop_executable_cache.h
new file mode 100644
index 0000000..7a3e10b
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/nop_executable_cache.h
@@ -0,0 +1,30 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_NOP_EXECUTABLE_CACHE_H_
+#define IREE_HAL_VULKAN_NOP_EXECUTABLE_CACHE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/handle_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a no-op executable cache that does not cache at all.
+// This is useful to isolate pipeline caching behavior and verify compilation
+// behavior.
+iree_status_t iree_hal_vulkan_nop_executable_cache_create(
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ iree_string_view_t identifier,
+ iree_hal_executable_cache_t** out_executable_cache);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_NOP_EXECUTABLE_CACHE_H_
diff --git a/runtime/src/iree/hal/vulkan/registration/BUILD b/runtime/src/iree/hal/vulkan/registration/BUILD
new file mode 100644
index 0000000..8706fbb
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/registration/BUILD
@@ -0,0 +1,45 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_cmake_extra_content", "iree_runtime_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_cmake_extra_content(
+ content = """
+if(${IREE_HAL_DRIVER_VULKAN})
+""",
+ inline = True,
+)
+
+iree_runtime_cc_library(
+ name = "registration",
+ srcs = ["driver_module.cc"],
+ hdrs = ["driver_module.h"],
+ defines = [
+ "IREE_HAL_HAVE_VULKAN_DRIVER_MODULE=1",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:cc",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal:flags",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/vulkan",
+ ],
+)
+
+iree_cmake_extra_content(
+ content = """
+endif()
+""",
+ inline = True,
+)
diff --git a/runtime/src/iree/hal/vulkan/registration/CMakeLists.txt b/runtime/src/iree/hal/vulkan/registration/CMakeLists.txt
new file mode 100644
index 0000000..14854c0
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/registration/CMakeLists.txt
@@ -0,0 +1,37 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/vulkan/registration/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+if(${IREE_HAL_DRIVER_VULKAN})
+
+iree_cc_library(
+ NAME
+ registration
+ HDRS
+ "driver_module.h"
+ SRCS
+ "driver_module.cc"
+ DEPS
+ iree::base
+ iree::base::cc
+ iree::base::core_headers
+ iree::base::internal::flags
+ iree::base::tracing
+ iree::hal
+ iree::hal::vulkan
+ DEFINES
+ "IREE_HAL_HAVE_VULKAN_DRIVER_MODULE=1"
+ PUBLIC
+)
+
+endif()
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/hal/vulkan/registration/driver_module.cc b/runtime/src/iree/hal/vulkan/registration/driver_module.cc
new file mode 100644
index 0000000..2692a31
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/registration/driver_module.cc
@@ -0,0 +1,125 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/registration/driver_module.h"
+
+#include <cinttypes>
+#include <cstddef>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/flags.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/api.h"
+
+#define IREE_HAL_VULKAN_1_X_DRIVER_ID 0x564C4B31u // VLK1
+
+IREE_FLAG(bool, vulkan_validation_layers, true,
+ "Enables standard Vulkan validation layers.");
+IREE_FLAG(bool, vulkan_debug_utils, true,
+ "Enables VK_EXT_debug_utils, records markers, and logs errors.");
+
+IREE_FLAG(int32_t, vulkan_default_index, 0,
+ "Index of the default Vulkan device.");
+
+IREE_FLAG(bool, vulkan_force_timeline_semaphore_emulation, false,
+ "Uses timeline semaphore emulation even if native support exists.");
+
+IREE_FLAG(bool, vulkan_tracing, true,
+ "Enables Vulkan tracing (if IREE tracing is enabled).");
+
+static iree_status_t iree_hal_vulkan_create_driver_with_flags(
+ iree_string_view_t identifier, iree_allocator_t allocator,
+ iree_hal_driver_t** out_driver) {
+ IREE_TRACE_SCOPE();
+
+ // Setup driver options from flags. We do this here as we want to enable other
+ // consumers that may not be using modules/command line flags to be able to
+ // set their options however they want.
+ iree_hal_vulkan_driver_options_t driver_options;
+ iree_hal_vulkan_driver_options_initialize(&driver_options);
+
+// TODO(benvanik): make this a flag - it's useful for testing the same binary
+// against multiple versions of Vulkan.
+#if defined(IREE_PLATFORM_ANDROID)
+ // TODO(#4494): let's see when we can always enable timeline semaphores.
+ driver_options.api_version = VK_API_VERSION_1_1;
+#else
+ driver_options.api_version = VK_API_VERSION_1_2;
+#endif // IREE_PLATFORM_ANDROID
+
+ if (FLAG_vulkan_validation_layers) {
+ driver_options.requested_features |=
+ IREE_HAL_VULKAN_FEATURE_ENABLE_VALIDATION_LAYERS;
+ }
+ if (FLAG_vulkan_debug_utils) {
+ driver_options.requested_features |=
+ IREE_HAL_VULKAN_FEATURE_ENABLE_DEBUG_UTILS;
+ }
+ if (FLAG_vulkan_tracing) {
+ driver_options.requested_features |= IREE_HAL_VULKAN_FEATURE_ENABLE_TRACING;
+ }
+
+ driver_options.default_device_index = FLAG_vulkan_default_index;
+
+ if (FLAG_vulkan_force_timeline_semaphore_emulation) {
+ driver_options.device_options.flags |=
+ IREE_HAL_VULKAN_DEVICE_FORCE_TIMELINE_SEMAPHORE_EMULATION;
+ }
+
+ // Load the Vulkan library. This will fail if the library cannot be found or
+ // does not have the expected functions.
+ iree_hal_vulkan_syms_t* syms = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_vulkan_syms_create_from_system_loader(allocator, &syms));
+
+ iree_status_t status = iree_hal_vulkan_driver_create(
+ identifier, &driver_options, syms, allocator, out_driver);
+
+ iree_hal_vulkan_syms_release(syms);
+ return status;
+}
+
+static iree_status_t iree_hal_vulkan_driver_factory_enumerate(
+ void* self, const iree_hal_driver_info_t** out_driver_infos,
+ iree_host_size_t* out_driver_info_count) {
+ // NOTE: we could query supported vulkan versions or featuresets here.
+ static const iree_hal_driver_info_t driver_infos[1] = {{
+ /*driver_id=*/IREE_HAL_VULKAN_1_X_DRIVER_ID,
+ /*driver_name=*/iree_make_cstring_view("vulkan"),
+ /*full_name=*/iree_make_cstring_view("Vulkan 1.x (dynamic)"),
+ }};
+ *out_driver_info_count = IREE_ARRAYSIZE(driver_infos);
+ *out_driver_infos = driver_infos;
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_driver_factory_try_create(
+ void* self, iree_hal_driver_id_t driver_id, iree_allocator_t allocator,
+ iree_hal_driver_t** out_driver) {
+ if (driver_id != IREE_HAL_VULKAN_1_X_DRIVER_ID) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "no driver with ID %016" PRIu64
+ " is provided by this factory",
+ driver_id);
+ }
+
+ // When we expose more than one driver (different vulkan versions, etc) we
+ // can name them here:
+ iree_string_view_t identifier = iree_make_cstring_view("vulkan");
+
+ return iree_hal_vulkan_create_driver_with_flags(identifier, allocator,
+ out_driver);
+}
+
+IREE_API_EXPORT iree_status_t
+iree_hal_vulkan_driver_module_register(iree_hal_driver_registry_t* registry) {
+ static const iree_hal_driver_factory_t factory = {
+ /*self=*/NULL,
+ iree_hal_vulkan_driver_factory_enumerate,
+ iree_hal_vulkan_driver_factory_try_create,
+ };
+ return iree_hal_driver_registry_register_factory(registry, &factory);
+}
diff --git a/runtime/src/iree/hal/vulkan/registration/driver_module.h b/runtime/src/iree/hal/vulkan/registration/driver_module.h
new file mode 100644
index 0000000..e6c3cf7
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/registration/driver_module.h
@@ -0,0 +1,24 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_REGISTRATION_DRIVER_MODULE_H_
+#define IREE_HAL_VULKAN_REGISTRATION_DRIVER_MODULE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+IREE_API_EXPORT iree_status_t
+iree_hal_vulkan_driver_module_register(iree_hal_driver_registry_t* registry);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_REGISTRATION_DRIVER_MODULE_H_
diff --git a/runtime/src/iree/hal/vulkan/serializing_command_queue.cc b/runtime/src/iree/hal/vulkan/serializing_command_queue.cc
new file mode 100644
index 0000000..92e0a64
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/serializing_command_queue.cc
@@ -0,0 +1,428 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/serializing_command_queue.h"
+
+#include <cstdint>
+#include <memory>
+#include <type_traits>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/direct_command_buffer.h"
+#include "iree/hal/vulkan/emulated_semaphore.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/tracing.h"
+#include "iree/hal/vulkan/util/arena.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+namespace {
+
+// Tries to prepare all necessary binary `VKSemaphore`s for emulating the time
+// points as specified in the given submission |batch_wait_semaphores| and
+// |batch_signal_semaphores|, then returns true if possible so that the
+// batch is ready to be submitted to GPU.
+// |wait_semaphores| and |signal_semaphores| will be filled with the binary
+// `VkSemaphores` on success.
+iree_status_t TryToPrepareSemaphores(
+ const std::vector<SemaphoreValue>& batch_wait_semaphores,
+ const std::vector<SemaphoreValue>& batch_signal_semaphores,
+ const ref_ptr<TimePointFence>& batch_fence,
+ std::vector<VkSemaphore>* wait_semaphores,
+ std::vector<VkSemaphore>* signal_semaphores, bool* out_ready_to_submit) {
+ IREE_TRACE_SCOPE0("TryToPrepareSemaphores");
+ *out_ready_to_submit = false;
+
+ wait_semaphores->clear();
+ for (const auto& timeline_semaphore : batch_wait_semaphores) {
+ // Query first to progress this timeline semaphore to the furthest.
+ uint64_t signaled_value = 0;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_semaphore_query(timeline_semaphore.first, &signaled_value));
+
+ // If it's already signaled to a value greater than we require here,
+ // we can just ignore this semaphore now.
+ if (signaled_value >= timeline_semaphore.second) {
+ continue;
+ }
+
+ // Otherwise try to get a binary semaphore for this time point so that
+ // we can wait on.
+ // TODO(antiagainst): if this fails we need to cancel.
+ VkSemaphore wait_semaphore = VK_NULL_HANDLE;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_emulated_semaphore_acquire_wait_handle(
+ timeline_semaphore.first, timeline_semaphore.second, batch_fence,
+ &wait_semaphore));
+ wait_semaphores->push_back(wait_semaphore);
+
+ if (wait_semaphore == VK_NULL_HANDLE) {
+ // We cannot wait on this time point yet: there are no previous semaphores
+ // submitted to the GPU that can signal a value greater than what's
+ // desired here.
+
+ // Cancel the wait so others may make progress.
+ // TODO(antiagainst): if any of these fail we need to cancel.
+ for (iree_host_size_t i = 0; i < batch_wait_semaphores.size(); ++i) {
+ if (!wait_semaphores->at(i)) break;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_vulkan_emulated_semaphore_cancel_wait_handle(
+ batch_wait_semaphores[i].first, wait_semaphores->at(i)));
+ }
+
+ // This batch cannot be submitted to GPU yet.
+ return iree_ok_status();
+ }
+ }
+
+ // We've collected all necessary binary semaphores for each timeline we need
+ // to wait on. Now prepare binary semaphores for signaling.
+ signal_semaphores->clear();
+ for (const auto& timeline_semaphore : batch_signal_semaphores) {
+ // SerializingCommandQueue only works with EmulatedTimelineSemaphore.
+ VkSemaphore signal_semaphore = VK_NULL_HANDLE;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_vulkan_emulated_semaphore_acquire_signal_handle(
+ timeline_semaphore.first, timeline_semaphore.second, batch_fence,
+ &signal_semaphore));
+ signal_semaphores->push_back(signal_semaphore);
+ }
+
+ // Good to submit!
+ *out_ready_to_submit = true;
+ return iree_ok_status();
+}
+
+// Prepares `VkSubmitInfo` to submit the given list of |command_buffers| that
+// waiting on |wait_semaphores| and signalling |signal_semaphores|. Necessary
+// structures are allocated from |arena| and the result `VkSubmitInfo` is
+// written to |submit_info|.
+void PrepareSubmitInfo(
+ const std::vector<VkSemaphore>& wait_semaphore_handles,
+ const std::vector<VkCommandBuffer>& command_buffer_handles,
+ const std::vector<VkSemaphore>& signal_semaphore_handles,
+ VkSubmitInfo* submit_info, Arena* arena) {
+ // TODO(benvanik): see if we can go to finer-grained stages.
+ // For example, if this was just queue ownership transfers then we can use
+ // the pseudo-stage of VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT.
+ auto wait_dst_stage_masks =
+ arena->AllocateSpan<VkPipelineStageFlags>(wait_semaphore_handles.size());
+ for (size_t i = 0, e = wait_semaphore_handles.size(); i < e; ++i) {
+ wait_dst_stage_masks[i] =
+ VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+ }
+
+ // NOTE: this code does some very weird things - the handles we take in as
+ // args are mutated in-place after this function is called so we can't
+ // reference them here. If we were going to preserve this code post-Vulkan 1.2
+ // then we'd really want to rework all of this to properly use the arena from
+ // the start instead of all this span tomfoolery.
+ auto wait_semaphores =
+ arena->AllocateSpan<VkSemaphore>(wait_semaphore_handles.size());
+ for (size_t i = 0, e = wait_semaphore_handles.size(); i < e; ++i) {
+ wait_semaphores[i] = wait_semaphore_handles[i];
+ }
+ auto command_buffers =
+ arena->AllocateSpan<VkCommandBuffer>(command_buffer_handles.size());
+ for (size_t i = 0, e = command_buffer_handles.size(); i < e; ++i) {
+ command_buffers[i] = command_buffer_handles[i];
+ }
+ auto signal_semaphores =
+ arena->AllocateSpan<VkSemaphore>(signal_semaphore_handles.size());
+ for (size_t i = 0, e = signal_semaphore_handles.size(); i < e; ++i) {
+ signal_semaphores[i] = signal_semaphore_handles[i];
+ }
+
+ submit_info->sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ submit_info->pNext = nullptr;
+ submit_info->waitSemaphoreCount =
+ static_cast<uint32_t>(wait_semaphores.size());
+ submit_info->pWaitSemaphores = wait_semaphores.data();
+ submit_info->pWaitDstStageMask = wait_dst_stage_masks.data();
+ submit_info->commandBufferCount =
+ static_cast<uint32_t>(command_buffers.size());
+ submit_info->pCommandBuffers = command_buffers.data();
+ submit_info->signalSemaphoreCount =
+ static_cast<uint32_t>(signal_semaphores.size());
+ submit_info->pSignalSemaphores = signal_semaphores.data();
+}
+
+} // namespace
+
+SerializingCommandQueue::SerializingCommandQueue(
+ VkDeviceHandle* logical_device,
+ iree_hal_command_category_t supported_categories, VkQueue queue,
+ TimePointFencePool* fence_pool)
+ : CommandQueue(logical_device, supported_categories, queue),
+ fence_pool_(fence_pool) {}
+
+SerializingCommandQueue::~SerializingCommandQueue() = default;
+
+iree_status_t SerializingCommandQueue::Submit(
+ iree_host_size_t batch_count, const iree_hal_submission_batch_t* batches) {
+ IREE_TRACE_SCOPE0("SerializingCommandQueue::Submit");
+
+ IntrusiveList<std::unique_ptr<FencedSubmission>> new_submissions;
+ for (iree_host_size_t i = 0; i < batch_count; ++i) {
+ const iree_hal_submission_batch_t* batch = &batches[i];
+
+ // Grab a fence for this submission first. This will be used to check the
+ // progress of emulated timeline semaphores later.
+ auto submission = std::make_unique<FencedSubmission>();
+ IREE_RETURN_IF_ERROR(fence_pool_->Acquire(&submission->fence));
+
+ submission->wait_semaphores.resize(batch->wait_semaphores.count);
+ for (iree_host_size_t j = 0; j < batch->wait_semaphores.count; ++j) {
+ submission->wait_semaphores[j] = {
+ batch->wait_semaphores.semaphores[j],
+ batch->wait_semaphores.payload_values[j]};
+ }
+
+ submission->command_buffers.resize(batch->command_buffer_count);
+ for (iree_host_size_t j = 0; j < batch->command_buffer_count; ++j) {
+ submission->command_buffers[j] =
+ iree_hal_vulkan_direct_command_buffer_handle(
+ batch->command_buffers[j]);
+ }
+
+ submission->signal_semaphores.resize(batch->signal_semaphores.count);
+ for (iree_host_size_t j = 0; j < batch->signal_semaphores.count; ++j) {
+ submission->signal_semaphores[j] = {
+ batch->signal_semaphores.semaphores[j],
+ batch->signal_semaphores.payload_values[j]};
+ }
+
+ new_submissions.push_back(std::move(submission));
+ }
+
+ iree_slim_mutex_lock(&queue_mutex_);
+ deferred_submissions_.merge_from(&new_submissions);
+ iree_status_t status = ProcessDeferredSubmissions();
+ iree_slim_mutex_unlock(&queue_mutex_);
+ return status;
+}
+
+iree_status_t SerializingCommandQueue::ProcessDeferredSubmissions(
+ bool* out_work_submitted) {
+ IREE_TRACE_SCOPE0("SerializingCommandQueue::ProcessDeferredSubmissions");
+
+ // Try to process the submissions and if we hit a stopping point during the
+ // process where we need to yield we take the remaining submissions and
+ // re-enqueue them.
+ IntrusiveList<std::unique_ptr<FencedSubmission>> remaining_submissions;
+ iree_status_t status =
+ TryProcessDeferredSubmissions(remaining_submissions, out_work_submitted);
+ while (!remaining_submissions.empty()) {
+ deferred_submissions_.push_back(
+ remaining_submissions.take(remaining_submissions.front()));
+ }
+
+ return status;
+}
+
+iree_status_t SerializingCommandQueue::TryProcessDeferredSubmissions(
+ IntrusiveList<std::unique_ptr<FencedSubmission>>& remaining_submissions,
+ bool* out_work_submitted) {
+ if (out_work_submitted) *out_work_submitted = false;
+
+ Arena arena(4 * 1024);
+ std::vector<VkSubmitInfo> submit_infos;
+ std::vector<VkFence> submit_fences;
+ while (!deferred_submissions_.empty()) {
+ FencedSubmission* submission = deferred_submissions_.front();
+ ref_ptr<TimePointFence>& fence = submission->fence;
+
+ std::vector<VkSemaphore> wait_semaphores;
+ std::vector<VkSemaphore> signal_semaphores;
+ bool ready_to_submit = false;
+ IREE_RETURN_IF_ERROR(TryToPrepareSemaphores(
+ submission->wait_semaphores, submission->signal_semaphores, fence,
+ &wait_semaphores, &signal_semaphores, &ready_to_submit));
+ if (ready_to_submit) {
+ submit_infos.emplace_back();
+ PrepareSubmitInfo(wait_semaphores, submission->command_buffers,
+ signal_semaphores, &submit_infos.back(), &arena);
+
+ submit_fences.push_back(fence->value());
+ pending_fences_.emplace_back(std::move(fence));
+ deferred_submissions_.pop_front();
+ } else {
+ // We need to defer the submission until later.
+ remaining_submissions.push_back(deferred_submissions_.take(submission));
+ }
+ }
+ if (submit_infos.empty()) {
+ if (out_work_submitted) *out_work_submitted = false;
+ return iree_ok_status();
+ }
+
+ // Note: We might be able to batch the submission but it involves non-trivial
+ // fence handling. We can handle that if really needed.
+ for (size_t i = 0, e = submit_infos.size(); i < e; ++i) {
+ VK_RETURN_IF_ERROR(
+ syms()->vkQueueSubmit(queue_, /*submitCount=*/1, &submit_infos[i],
+ submit_fences[i]),
+ "vkQueueSubmit");
+ }
+
+ if (out_work_submitted) *out_work_submitted = true;
+ return iree_ok_status();
+}
+
+iree_status_t SerializingCommandQueue::WaitIdle(iree_timeout_t timeout) {
+ iree_status_t status = iree_ok_status();
+
+ iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
+
+ if (deadline_ns == IREE_TIME_INFINITE_FUTURE) {
+ IREE_TRACE_SCOPE0("SerializingCommandQueue::WaitIdle#vkQueueWaitIdle");
+ // Fast path for using vkQueueWaitIdle, which is usually cheaper (as it
+ // requires fewer calls into the driver).
+
+ iree_slim_mutex_lock(&queue_mutex_);
+
+ // Complete all pending work on the queue.
+ status =
+ VK_RESULT_TO_STATUS(syms()->vkQueueWaitIdle(queue_), "vkQueueWaitIdle");
+ if (!iree_status_is_ok(status)) {
+ iree_slim_mutex_unlock(&queue_mutex_);
+ return status;
+ }
+ pending_fences_.clear();
+
+ // Submit and complete all deferred work.
+ while (!deferred_submissions_.empty()) {
+ bool work_submitted = false;
+ status = ProcessDeferredSubmissions(&work_submitted);
+ if (!iree_status_is_ok(status)) break;
+ if (work_submitted) {
+ status = VK_RESULT_TO_STATUS(syms()->vkQueueWaitIdle(queue_),
+ "vkQueueWaitIdle");
+ if (!iree_status_is_ok(status)) break;
+ pending_fences_.clear();
+ }
+ }
+
+ iree_slim_mutex_unlock(&queue_mutex_);
+
+ iree_hal_vulkan_tracing_context_collect(tracing_context(), VK_NULL_HANDLE);
+ return status;
+ }
+
+ IREE_TRACE_SCOPE0("SerializingCommandQueue::WaitIdle#Fence");
+
+ // Keep trying to submit more workload to the GPU until reaching the deadline.
+ iree_slim_mutex_lock(&queue_mutex_);
+ do {
+ status = ProcessDeferredSubmissions();
+ bool has_deferred_submissions = !deferred_submissions_.empty();
+ std::vector<VkFence> fence_handles(pending_fences_.size());
+ for (size_t i = 0; i < pending_fences_.size(); ++i) {
+ fence_handles[i] = pending_fences_[i]->value();
+ }
+ if (!iree_status_is_ok(status)) {
+ break; // unable to process submissions
+ } else if (!has_deferred_submissions && fence_handles.empty()) {
+ break; // no more work - idle achieved
+ }
+
+ uint64_t timeout_ns;
+ if (deadline_ns == IREE_TIME_INFINITE_FUTURE) {
+ timeout_ns = UINT64_MAX;
+ } else if (deadline_ns == IREE_TIME_INFINITE_PAST) {
+ timeout_ns = 0;
+ } else {
+ // Convert to relative time in nanoseconds.
+ // The implementation may not wait with this granularity (like by 10000x).
+ iree_time_t now_ns = iree_time_now();
+ if (deadline_ns < now_ns) {
+ return iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+ timeout_ns = (uint64_t)(deadline_ns - now_ns);
+ }
+ VkResult result = syms()->vkWaitForFences(
+ *logical_device_, static_cast<uint32_t>(fence_handles.size()),
+ fence_handles.data(),
+ /*waitAll=*/VK_TRUE, timeout_ns);
+
+ switch (result) {
+ case VK_SUCCESS:
+ pending_fences_.clear();
+ break;
+ case VK_TIMEOUT:
+ status = iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ break;
+ default:
+ status = VK_RESULT_TO_STATUS(result, "vkWaitForFences");
+ break;
+ }
+ // As long as there is submitted or deferred work still pending.
+ } while (iree_status_is_ok(status));
+ iree_slim_mutex_unlock(&queue_mutex_);
+ return status;
+}
+
+iree_status_t SerializingCommandQueue::AdvanceQueueSubmission() {
+ // The returned value just indicates whether there were newly ready
+ // submissions gotten submitted to the GPU. Other callers might be
+ // interested in that information but for this API we just want to advance
+ // queue submisison if possible. So we ignore it here.
+ iree_slim_mutex_lock(&queue_mutex_);
+ iree_status_t status = ProcessDeferredSubmissions();
+ iree_slim_mutex_unlock(&queue_mutex_);
+ return status;
+}
+
+void SerializingCommandQueue::AbortQueueSubmission() {
+ iree_slim_mutex_lock(&queue_mutex_);
+
+ // We have fences in deferred_submissions_ but they are not submitted to GPU
+ // yet so we don't need to reset.
+ deferred_submissions_.clear();
+
+ std::vector<VkFence> fence_handles(pending_fences_.size());
+ for (size_t i = 0; i < pending_fences_.size(); ++i) {
+ fence_handles[i] = pending_fences_[i]->value();
+ }
+
+ syms()->vkWaitForFences(*logical_device_,
+ static_cast<uint32_t>(fence_handles.size()),
+ fence_handles.data(),
+ /*waitAll=*/VK_TRUE, /*timeout=*/UINT64_MAX);
+
+ // Clear the list. Fences will be automatically returned back to the queue
+ // after refcount reaches 0.
+ pending_fences_.clear();
+
+ iree_slim_mutex_unlock(&queue_mutex_);
+}
+
+void SerializingCommandQueue::SignalFences(const std::vector<VkFence>& fences) {
+ const auto span_contains = [fences](VkFence fence) {
+ for (VkFence f : fences) {
+ if (f == fence) return true;
+ }
+ return false;
+ };
+
+ iree_slim_mutex_lock(&queue_mutex_);
+ auto it = pending_fences_.begin();
+ while (it != pending_fences_.end()) {
+ if (span_contains((*it)->value())) {
+ it = pending_fences_.erase(it);
+ } else {
+ ++it;
+ }
+ }
+ iree_slim_mutex_unlock(&queue_mutex_);
+}
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
diff --git a/runtime/src/iree/hal/vulkan/serializing_command_queue.h b/runtime/src/iree/hal/vulkan/serializing_command_queue.h
new file mode 100644
index 0000000..949deca
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/serializing_command_queue.h
@@ -0,0 +1,103 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_SERIALIZING_COMMAND_QUEUE_H_
+#define IREE_HAL_VULKAN_SERIALIZING_COMMAND_QUEUE_H_
+
+// clang-format off: must be included before all other headers.
+#include "iree/hal/vulkan/vulkan_headers.h"
+// clang-format on
+
+#include <stddef.h>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/command_queue.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/handle_util.h"
+#include "iree/hal/vulkan/timepoint_util.h"
+#include "iree/hal/vulkan/util/intrusive_list.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+using SemaphoreValue = std::pair<iree_hal_semaphore_t*, uint64_t>;
+
+// A command queue that potentially defers and serializes command buffer
+// submission to the GPU.
+//
+// This command queue is designed to be used together with emulated timeline
+// semaphores. Timeline semaphores can follow wait-before-signal submission
+// order but binary `VkSemaphore` cannot. So when emulating timeline semaphores
+// with binary `VkSemaphore`s and `VkFence`s, we need to make sure no
+// wait-before-signal submission order occur for binary `VkSemaphore`s. The way
+// to enforce that is to defer the submission until we can be certain that the
+// `VkSemaphore`s emulating time points in the timeline are all *submitted* to
+// the GPU.
+class SerializingCommandQueue final : public CommandQueue {
+ public:
+ SerializingCommandQueue(VkDeviceHandle* logical_device,
+ iree_hal_command_category_t supported_categories,
+ VkQueue queue, TimePointFencePool* fence_pool);
+ ~SerializingCommandQueue() override;
+
+ const ref_ptr<DynamicSymbols>& syms() const {
+ return logical_device_->syms();
+ }
+
+ iree_status_t Submit(iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches) override;
+
+ iree_status_t WaitIdle(iree_timeout_t timeout) override;
+
+ // Releases all deferred submissions ready to submit to the GPU.
+ iree_status_t AdvanceQueueSubmission();
+
+ // Aborts all deferred submissions and waits for submitted work to complete.
+ void AbortQueueSubmission();
+
+ // Informs this queue that the given |fences| are known to have signaled.
+ void SignalFences(const std::vector<VkFence>& fences);
+
+ private:
+ // A submission batch together with the fence to singal its status.
+ struct FencedSubmission : public IntrusiveLinkBase<void> {
+ std::vector<SemaphoreValue> wait_semaphores;
+ std::vector<VkCommandBuffer> command_buffers;
+ std::vector<SemaphoreValue> signal_semaphores;
+ ref_ptr<TimePointFence> fence;
+ };
+
+ // Processes deferred submissions in this queue and returns whether there are
+ // new workload submitted to the GPU if no errors happen.
+ iree_status_t ProcessDeferredSubmissions(bool* out_work_submitted = NULL);
+ iree_status_t TryProcessDeferredSubmissions(
+ IntrusiveList<std::unique_ptr<FencedSubmission>>& remaining_submissions,
+ bool* out_work_submitted);
+
+ TimePointFencePool* fence_pool_;
+
+ // A list of fences that are submitted to GPU.
+ std::vector<ref_ptr<TimePointFence>> pending_fences_ IREE_GUARDED_BY(mutex_);
+ // A list of deferred submissions that haven't been submitted to GPU.
+ IntrusiveList<std::unique_ptr<FencedSubmission>> deferred_submissions_
+ IREE_GUARDED_BY(mutex_);
+};
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_VULKAN_SERIALIZING_COMMAND_QUEUE_H_
diff --git a/runtime/src/iree/hal/vulkan/status_util.c b/runtime/src/iree/hal/vulkan/status_util.c
new file mode 100644
index 0000000..e61008c
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/status_util.c
@@ -0,0 +1,260 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/status_util.h"
+
+iree_status_t iree_hal_vulkan_result_to_status(VkResult result,
+ const char* file,
+ uint32_t line) {
+ switch (result) {
+ // Success codes.
+ case VK_SUCCESS:
+ // Command successfully completed.
+ return iree_ok_status();
+ case VK_NOT_READY:
+ // A fence or query has not yet completed.
+ return iree_ok_status();
+ case VK_TIMEOUT:
+ // A wait operation has not completed in the specified time.
+ return iree_ok_status();
+ case VK_EVENT_SET:
+ // An event is signaled.
+ return iree_ok_status();
+ case VK_EVENT_RESET:
+ // An event is unsignaled.
+ return iree_ok_status();
+ case VK_INCOMPLETE:
+ // A return array was too small for the result.
+ return iree_ok_status();
+ case VK_SUBOPTIMAL_KHR:
+ // A swapchain no longer matches the surface properties exactly, but can
+ // still be used to present to the surface successfully.
+ return iree_ok_status();
+
+ // Error codes.
+ case VK_ERROR_OUT_OF_HOST_MEMORY:
+ // A host memory allocation has failed.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_RESOURCE_EXHAUSTED,
+ "VK_ERROR_OUT_OF_HOST_MEMORY");
+ case VK_ERROR_OUT_OF_DEVICE_MEMORY:
+ // A device memory allocation has failed.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_RESOURCE_EXHAUSTED,
+ "VK_ERROR_OUT_OF_DEVICE_MEMORY");
+ case VK_ERROR_INITIALIZATION_FAILED:
+ // Initialization of an object could not be completed for
+ // implementation-specific reasons.
+ return iree_make_status_with_location(file, line, IREE_STATUS_UNAVAILABLE,
+ "VK_ERROR_INITIALIZATION_FAILED");
+ case VK_ERROR_DEVICE_LOST:
+ // The logical or physical device has been lost.
+ //
+ // A logical device may become lost for a number of
+ // implementation-specific reasons, indicating that pending and future
+ // command execution may fail and cause resources and backing memory to
+ // become undefined.
+ //
+ // Typical reasons for device loss will include things like execution
+ // timing out (to prevent denial of service), power management events,
+ // platform resource management, or implementation errors.
+ //
+ // When this happens, certain commands will return
+ // VK_ERROR_DEVICE_LOST (see Error Codes for a list of such
+ // commands). After any such event, the logical device is considered lost.
+ // It is not possible to reset the logical device to a non-lost state,
+ // however the lost state is specific to a logical device (VkDevice), and
+ // the corresponding physical device (VkPhysicalDevice) may be otherwise
+ // unaffected.
+ //
+ // In some cases, the physical device may also be lost, and attempting to
+ // create a new logical device will fail, returning VK_ERROR_DEVICE_LOST.
+ // This is usually indicative of a problem with the underlying
+ // implementation, or its connection to the host. If the physical device
+ // has not been lost, and a new logical device is successfully created
+ // from that physical device, it must be in the non-lost state.
+ //
+ // Whilst logical device loss may be recoverable, in the case of physical
+ // device loss, it is unlikely that an application will be able to recover
+ // unless additional, unaffected physical devices exist on the system. The
+ // error is largely informational and intended only to inform the user
+ // that a platform issue has occurred, and should be investigated further.
+ // For example, underlying hardware may have developed a fault or become
+ // physically disconnected from the rest of the system. In many cases,
+ // physical device loss may cause other more serious issues such as the
+ // operating system crashing; in which case it may not be reported via the
+ // Vulkan API.
+ //
+ // Undefined behavior caused by an application error may cause a device to
+ // become lost. However, such undefined behavior may also cause
+ // unrecoverable damage to the process, and it is then not guaranteed that
+ // the API objects, including the VkPhysicalDevice or the VkInstance are
+ // still valid or that the error is recoverable.
+ //
+ // When a device is lost, its child objects are not implicitly destroyed
+ // and their handles are still valid. Those objects must still be
+ // destroyed before their parents or the device can be destroyed (see the
+ // Object Lifetime section). The host address space corresponding to
+ // device memory mapped using vkMapMemory is still valid, and host memory
+ // accesses to these mapped regions are still valid, but the contents are
+ // undefined. It is still legal to call any API command on the device and
+ // child objects.
+ //
+ // Once a device is lost, command execution may fail, and commands that
+ // return a VkResult may return VK_ERROR_DEVICE_LOST.
+ // Commands that do not allow run-time errors must still operate correctly
+ // for valid usage and, if applicable, return valid data.
+ //
+ // Commands that wait indefinitely for device execution (namely
+ // vkDeviceWaitIdle, vkQueueWaitIdle, vkWaitForFences with a maximum
+ // timeout, and vkGetQueryPoolResults with the VK_QUERY_RESULT_WAIT_BIT
+ // bit set in flags) must return in finite time even in the case
+ // of a lost device, and return either VK_SUCCESS or
+ // VK_ERROR_DEVICE_LOST. For any command that may return
+ // VK_ERROR_DEVICE_LOST, for the purpose of determining whether a
+ // command buffer is in the pending state, or whether resources are
+ // considered in-use by the device, a return value of
+ // VK_ERROR_DEVICE_LOST is equivalent to VK_SUCCESS.
+ return iree_make_status_with_location(file, line, IREE_STATUS_INTERNAL,
+ "VK_ERROR_DEVICE_LOST");
+ case VK_ERROR_MEMORY_MAP_FAILED:
+ // Mapping of a memory object has failed.
+ return iree_make_status_with_location(file, line, IREE_STATUS_INTERNAL,
+ "VK_ERROR_MEMORY_MAP_FAILED");
+ case VK_ERROR_LAYER_NOT_PRESENT:
+ // A requested layer is not present or could not be loaded.
+ return iree_make_status_with_location(
+ file, line, IREE_STATUS_UNIMPLEMENTED, "VK_ERROR_LAYER_NOT_PRESENT");
+ case VK_ERROR_EXTENSION_NOT_PRESENT:
+ // A requested extension is not supported.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_UNIMPLEMENTED,
+ "VK_ERROR_EXTENSION_NOT_PRESENT");
+ case VK_ERROR_FEATURE_NOT_PRESENT:
+ // A requested feature is not supported.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_UNIMPLEMENTED,
+ "VK_ERROR_FEATURE_NOT_PRESENT");
+ case VK_ERROR_INCOMPATIBLE_DRIVER:
+ // The requested version of Vulkan is not supported by the driver or is
+ // otherwise incompatible for implementation-specific reasons.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_FAILED_PRECONDITION,
+ "VK_ERROR_INCOMPATIBLE_DRIVER");
+ case VK_ERROR_TOO_MANY_OBJECTS:
+ // Too many objects of the type have already been created.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_RESOURCE_EXHAUSTED,
+ "VK_ERROR_TOO_MANY_OBJECTS");
+ case VK_ERROR_FORMAT_NOT_SUPPORTED:
+ // A requested format is not supported on this device.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_UNIMPLEMENTED,
+ "VK_ERROR_FORMAT_NOT_SUPPORTED");
+ case VK_ERROR_FRAGMENTED_POOL:
+ // A pool allocation has failed due to fragmentation of the pool’s
+ // memory. This must only be returned if no attempt to allocate host
+ // or device memory was made to accommodate the new allocation.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_RESOURCE_EXHAUSTED,
+ "VK_ERROR_FRAGMENTED_POOL");
+ case VK_ERROR_OUT_OF_POOL_MEMORY:
+ // A pool memory allocation has failed. This must only be returned if no
+ // attempt to allocate host or device memory was made to accommodate the
+ // new allocation. If the failure was definitely due to fragmentation of
+ // the pool, VK_ERROR_FRAGMENTED_POOL should be returned instead.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_RESOURCE_EXHAUSTED,
+ "VK_ERROR_OUT_OF_POOL_MEMORY");
+ case VK_ERROR_INVALID_EXTERNAL_HANDLE:
+ // An external handle is not a valid handle of the specified type.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_INVALID_ARGUMENT,
+ "VK_ERROR_INVALID_EXTERNAL_HANDLE");
+ case VK_ERROR_SURFACE_LOST_KHR:
+ // A surface is no longer available.
+ return iree_make_status_with_location(file, line, IREE_STATUS_UNAVAILABLE,
+ "VK_ERROR_SURFACE_LOST_KHR");
+ case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
+ // The requested window is already in use by Vulkan or another API in a
+ // manner which prevents it from being used again.
+ return iree_make_status_with_location(
+ file, line, IREE_STATUS_INVALID_ARGUMENT,
+ "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR");
+ case VK_ERROR_OUT_OF_DATE_KHR:
+ // A surface has changed in such a way that it is no longer compatible
+ // with the swapchain, and further presentation requests using the
+ // swapchain will fail. Applications must query the new surface properties
+ // and recreate their swapchain if they wish to continue presenting to the
+ // surface.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_FAILED_PRECONDITION,
+ "VK_ERROR_OUT_OF_DATE_KHR");
+ case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
+ // The display used by a swapchain does not use the same presentable image
+ // layout, or is incompatible in a way that prevents sharing an image.
+ return iree_make_status_with_location(
+ file, line, IREE_STATUS_INVALID_ARGUMENT,
+ "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR");
+ case VK_ERROR_VALIDATION_FAILED_EXT:
+ // Validation layer testing failed. It is not expected that an
+ // application would see this this error code during normal use of the
+ // validation layers.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_INVALID_ARGUMENT,
+ "VK_ERROR_VALIDATION_FAILED_EXT");
+ case VK_ERROR_INVALID_SHADER_NV:
+ // One or more shaders failed to compile or link. More details are
+ // reported back to the application when the validation layer is enabled
+ // using the extension VK_EXT_debug_report.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_INVALID_ARGUMENT,
+ "VK_ERROR_INVALID_SHADER_NV");
+ case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
+ // When creating an image with
+ // VkImageDrmFormatModifierExplicitCreateInfoEXT, it is the application’s
+ // responsibility to satisfy all Valid Usage requirements. However, the
+ // implementation must validate that the provided pPlaneLayouts, when
+ // combined with the provided drmFormatModifier and other creation
+ // parameters in VkImageCreateInfo and its pNext chain, produce a valid
+ // image. (This validation is necessarily implementation-dependent and
+ // outside the scope of Vulkan, and therefore not described by Valid Usage
+ // requirements). If this validation fails, then vkCreateImage returns
+ // VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT.
+ return iree_make_status_with_location(
+ file, line, IREE_STATUS_INVALID_ARGUMENT,
+ "VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT");
+ case VK_ERROR_FRAGMENTATION_EXT:
+ // A descriptor pool creation has failed due to fragmentation.
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_RESOURCE_EXHAUSTED,
+ "VK_ERROR_FRAGMENTATION_EXT");
+ case VK_ERROR_NOT_PERMITTED_EXT:
+ // When creating a queue, the caller does not have sufficient privileges
+ // to request to acquire a priority above the default priority
+ // (VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT).
+ return iree_make_status_with_location(file, line,
+ IREE_STATUS_PERMISSION_DENIED,
+ "VK_ERROR_NOT_PERMITTED_EXT");
+ case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
+ // A buffer creation failed because the requested address is not
+ // available.
+ return iree_make_status_with_location(
+ file, line, IREE_STATUS_OUT_OF_RANGE,
+ "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT");
+ case VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT:
+ // An operation on a swapchain created with
+ // VK_FULL_SCREEN_EXCLUSIVE_APPLICATION_CONTROLLED_EXT failed as it did
+ // not have exlusive full-screen access. This may occur due to
+ // implementation-dependent reasons, outside of the application’s control.
+ return iree_make_status_with_location(
+ file, line, IREE_STATUS_UNAVAILABLE,
+ "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT");
+ default:
+ return iree_make_status_with_location(file, line, IREE_STATUS_UNKNOWN,
+ "VkResult=%u", (uint32_t)result);
+ }
+}
diff --git a/runtime/src/iree/hal/vulkan/status_util.h b/runtime/src/iree/hal/vulkan/status_util.h
new file mode 100644
index 0000000..3e22946
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/status_util.h
@@ -0,0 +1,93 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_STATUS_UTIL_H_
+#define IREE_HAL_VULKAN_STATUS_UTIL_H_
+
+// clang-format off: must be included before all other headers.
+#include "iree/hal/vulkan/vulkan_headers.h"
+// clang-format on
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Converts a VkResult to an iree_status_t.
+//
+// Usage:
+// iree_status_t status = VK_RESULT_TO_STATUS(vkDoThing(...));
+#define VK_RESULT_TO_STATUS(expr, ...) \
+ iree_hal_vulkan_result_to_status((expr), __FILE__, __LINE__)
+
+// IREE_RETURN_IF_ERROR but implicitly converts the VkResult return value to
+// a Status.
+//
+// Usage:
+// VK_RETURN_IF_ERROR(vkDoThing(...), "message");
+#define VK_RETURN_IF_ERROR(expr, ...) \
+ IREE_RETURN_IF_ERROR( \
+ iree_hal_vulkan_result_to_status(expr, __FILE__, __LINE__), __VA_ARGS__)
+
+// IREE_CHECK_OK but implicitly converts the VkResults return value to a
+// Status and checks that it is OkStatus.
+//
+// Usage:
+// VK_CHECK_OK(vkDoThing(...));
+#define VK_CHECK_OK(expr) \
+ IREE_CHECK_OK(iree_hal_vulkan_result_to_status(expr, __FILE__, __LINE__))
+
+// Converts a VkResult to a Status object.
+//
+// Vulkan considers the following as "success codes" and users should ensure
+// they first check the result prior to converting:
+//
+// - VK_SUCCESS -> OkStatus()
+// - VK_NOT_READY -> OkStatus()
+// - VK_TIMEOUT -> OkStatus()
+// - VK_EVENT_SET -> OkStatus()
+// - VK_EVENT_RESET -> OkStatus()
+// - VK_INCOMPLETE -> OkStatus()
+// - VK_SUBOPTIMAL_KHR -> OkStatus()
+//
+// The rest are considered as "error codes":
+//
+// - VK_ERROR_OUT_OF_HOST_MEMORY -> ResourceExhaustedError("VK...")
+// - VK_ERROR_OUT_OF_DEVICE_MEMORY -> ResourceExhaustedError("VK...")
+// - VK_ERROR_INITIALIZATION_FAILED -> InternalError("VK...")
+// - VK_ERROR_DEVICE_LOST -> InternalError("VK...")
+// - VK_ERROR_MEMORY_MAP_FAILED -> InternalError("VK...")
+// - VK_ERROR_LAYER_NOT_PRESENT -> NotFoundError("VK...")
+// - VK_ERROR_EXTENSION_NOT_PRESENT -> NotFoundError("VK...")
+// - VK_ERROR_FEATURE_NOT_PRESENT -> NotFoundError("VK...")
+// - VK_ERROR_INCOMPATIBLE_DRIVER -> FailedPreconditionError("VK...")
+// - VK_ERROR_TOO_MANY_OBJECTS -> ResourceExhaustedError("VK...")
+// - VK_ERROR_FORMAT_NOT_SUPPORTED -> UnimplementedError("VK...")
+// - VK_ERROR_FRAGMENTED_POOL -> ResourceExhaustedError("VK...")
+// - VK_ERROR_OUT_OF_POOL_MEMORY -> ResourceExhaustedError("VK...")
+// - VK_ERROR_INVALID_EXTERNAL_HANDLE -> InvalidArgumentError("VK...")
+// - VK_ERROR_SURFACE_LOST_KHR -> InternalError("VK...")
+// - VK_ERROR_NATIVE_WINDOW_IN_USE_KHR -> InternalError("VK...")
+// - VK_ERROR_OUT_OF_DATE_KHR -> InternalError("VK...")
+// - VK_ERROR_INCOMPATIBLE_DISPLAY_KHR -> InternalError("VK...")
+// - VK_ERROR_VALIDATION_FAILED_EXT -> InternalError("VK...")
+// - VK_ERROR_INVALID_SHADER_NV -> InternalError("VK...")
+// - VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT -> InternalError
+// - VK_ERROR_FRAGMENTATION_EXT -> ResourceExhaustedError("VK...")
+// - VK_ERROR_NOT_PERMITTED_EXT -> PermissionDeniedError("VK...")
+// - VK_ERROR_INVALID_DEVICE_ADDRESS_EXT -> OutOfRangeError("VK...")
+// - VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT -> InternalError("VK...")
+iree_status_t iree_hal_vulkan_result_to_status(VkResult result,
+ const char* file, uint32_t line);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_STATUS_UTIL_H_
diff --git a/runtime/src/iree/hal/vulkan/timepoint_util.cc b/runtime/src/iree/hal/vulkan/timepoint_util.cc
new file mode 100644
index 0000000..531d897
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/timepoint_util.cc
@@ -0,0 +1,262 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/timepoint_util.h"
+
+#include <memory>
+
+#include "iree/base/logging.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/status_util.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+namespace {
+
+class RaiiLocker {
+ public:
+ explicit RaiiLocker(iree_slim_mutex_t* mu)
+ IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis)
+ : mu_(mu) {
+ iree_slim_mutex_lock(mu_);
+ }
+ ~RaiiLocker() IREE_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis) {
+ iree_slim_mutex_unlock(mu_);
+ }
+
+ private:
+ iree_slim_mutex_t* mu_;
+};
+
+} // namespace
+
+// static
+void TimePointFence::Delete(TimePointFence* ptr) {
+ ptr->ResetStatus();
+ ptr->pool()->ReleaseResolved(ptr);
+}
+
+VkResult TimePointFence::GetStatus() {
+ RaiiLocker locker(&status_mutex_);
+ if (status_ == VK_NOT_READY) {
+ const auto& device = pool()->logical_device();
+ status_ = device->syms()->vkGetFenceStatus(*device, fence_);
+ }
+ return status_;
+}
+
+void TimePointFence::ResetStatus() {
+ RaiiLocker locker(&status_mutex_);
+ status_ = VK_NOT_READY;
+}
+
+// static
+iree_status_t TimePointFencePool::Create(VkDeviceHandle* logical_device,
+ TimePointFencePool** out_pool) {
+ IREE_TRACE_SCOPE0("TimePointFencePool::Create");
+ ref_ptr<TimePointFencePool> pool(new TimePointFencePool(logical_device));
+ iree_slim_mutex_initialize(&(pool->mutex_));
+ IREE_RETURN_IF_ERROR(pool->PreallocateFences());
+ *out_pool = pool.release();
+ return iree_ok_status();
+}
+
+TimePointFencePool::~TimePointFencePool() {
+ IREE_TRACE_SCOPE0("TimePointFencePool::dtor");
+
+ iree_slim_mutex_lock(&mutex_);
+
+ int free_count = 0;
+ for (auto* fence : free_fences_) {
+ syms()->vkDestroyFence(*logical_device_, fence->value(),
+ logical_device_->allocator());
+ ++free_count;
+ }
+ IREE_DCHECK_EQ(free_count, kMaxInFlightFenceCount);
+ free_fences_.clear();
+
+ iree_slim_mutex_unlock(&mutex_);
+ iree_slim_mutex_deinitialize(&mutex_);
+}
+
+iree_status_t TimePointFencePool::Acquire(ref_ptr<TimePointFence>* out_fence) {
+ IREE_TRACE_SCOPE0("TimePointFencePool::Acquire");
+
+ RaiiLocker locker(&mutex_);
+ if (free_fences_.empty()) {
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "fence pool out of free fences");
+ }
+
+ // To acquire from the pool, we:
+ // 1) Pop from the front of the queue (reference count of 0);
+ // 2) Release the unique_ptr, since lifetime will be managed by ref counts;
+ // 3) Return as a raw RefObject with a reference count of 1;
+ // When the reference count goes back to 0, it will be returned to the pool,
+ // wrapped with unique_ptr.
+ // When the pool is destroyed, all free fences are freed by unique_ptr
+ // automatically.
+ std::unique_ptr<TimePointFence> fence =
+ free_fences_.take(free_fences_.front());
+ *out_fence = add_ref(fence.release());
+ return iree_ok_status();
+}
+
+void TimePointFencePool::ReleaseResolved(TimePointFence* fence) {
+ IREE_TRACE_SCOPE0("TimePointFencePool::ReleaseResolved");
+ VkFence f = fence->value();
+ syms()->vkResetFences(*logical_device_, 1, &f);
+ RaiiLocker locker(&mutex_);
+ free_fences_.push_back(std::unique_ptr<TimePointFence>(fence));
+}
+
+TimePointFencePool::TimePointFencePool(VkDeviceHandle* logical_device)
+ : logical_device_(logical_device) {}
+
+const ref_ptr<DynamicSymbols>& TimePointFencePool::syms() const {
+ return logical_device_->syms();
+}
+
+iree_status_t TimePointFencePool::PreallocateFences() {
+ IREE_TRACE_SCOPE0("TimePointFencePool::PreallocateFences");
+
+ VkFenceCreateInfo create_info;
+ create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+ create_info.pNext = nullptr;
+ create_info.flags = 0;
+
+ std::array<std::unique_ptr<TimePointFence>, kMaxInFlightFenceCount> fences;
+ {
+ RaiiLocker locker(&mutex_);
+ for (int i = 0; i < fences.size(); ++i) {
+ VkFence fence = VK_NULL_HANDLE;
+ VK_RETURN_IF_ERROR(
+ syms()->vkCreateFence(*logical_device_, &create_info,
+ logical_device_->allocator(), &fence),
+ "vkCreateFence");
+ fences[i] = std::make_unique<TimePointFence>(this, fence);
+ }
+ }
+
+ for (int i = 0; i < fences.size(); ++i) {
+ // The `TimePointFence`s was created with an initial ref-count of one.
+ // Decrease explicitly to zero so that later we can rely on the ref-count
+ // reaching zero to auto-release the `TimePointFence` back to the free
+ // list. As a nice side effect, this will also initialize the free list
+ // with all newly created fences.
+ // TODO: Might want to avoid acquiring and releasing the mutex for each
+ // fence.
+ fences[i].release()->ReleaseReference();
+ }
+
+ return iree_ok_status();
+}
+
+// static
+iree_status_t TimePointSemaphorePool::Create(
+ VkDeviceHandle* logical_device, TimePointSemaphorePool** out_pool) {
+ IREE_TRACE_SCOPE0("TimePointSemaphorePool::Create");
+ ref_ptr<TimePointSemaphorePool> pool(
+ new TimePointSemaphorePool(logical_device));
+ iree_slim_mutex_initialize(&(pool->mutex_));
+ IREE_RETURN_IF_ERROR(pool->PreallocateSemaphores());
+ *out_pool = pool.release();
+ return iree_ok_status();
+}
+
+TimePointSemaphorePool::~TimePointSemaphorePool() {
+ IREE_TRACE_SCOPE0("TimePointSemaphorePool::dtor");
+
+ iree_slim_mutex_lock(&mutex_);
+
+ IREE_DCHECK_EQ(free_semaphores_.size(), kMaxInFlightSemaphoreCount);
+ free_semaphores_.clear();
+
+ for (auto& semaphore : storage_) {
+ syms()->vkDestroySemaphore(*logical_device_, semaphore.semaphore,
+ logical_device_->allocator());
+ }
+
+ iree_slim_mutex_unlock(&mutex_);
+ iree_slim_mutex_deinitialize(&mutex_);
+}
+
+iree_status_t TimePointSemaphorePool::Acquire(
+ TimePointSemaphore** out_semaphore) {
+ IREE_TRACE_SCOPE0("TimePointSemaphorePool::Acquire");
+
+ RaiiLocker locker(&mutex_);
+ if (free_semaphores_.empty()) {
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "semaphore pool out of free semaphores");
+ }
+
+ *out_semaphore = free_semaphores_.front();
+ free_semaphores_.pop_front();
+ return iree_ok_status();
+}
+
+void TimePointSemaphorePool::ReleaseResolved(
+ IntrusiveList<TimePointSemaphore>* semaphores) {
+ IREE_TRACE_SCOPE0("TimePointSemaphorePool::ReleaseResolved");
+
+ for (auto* semaphore : *semaphores) {
+ IREE_DCHECK(!semaphore->signal_fence && !semaphore->wait_fence);
+ semaphore->value = UINT64_MAX;
+ }
+
+ RaiiLocker locker(&mutex_);
+ free_semaphores_.merge_from(semaphores);
+}
+
+void TimePointSemaphorePool::ReleaseUnresolved(
+ IntrusiveList<TimePointSemaphore>* semaphores) {
+ IREE_TRACE_SCOPE0("TimePointSemaphorePool::ReleaseUnresolved");
+
+ for (auto* semaphore : *semaphores) {
+ semaphore->signal_fence = nullptr;
+ semaphore->wait_fence = nullptr;
+ semaphore->value = UINT64_MAX;
+ }
+
+ RaiiLocker locker(&mutex_);
+ free_semaphores_.merge_from(semaphores);
+}
+
+TimePointSemaphorePool::TimePointSemaphorePool(VkDeviceHandle* logical_device)
+ : logical_device_(logical_device) {}
+
+const ref_ptr<DynamicSymbols>& TimePointSemaphorePool::syms() const {
+ return logical_device_->syms();
+}
+
+iree_status_t TimePointSemaphorePool::PreallocateSemaphores() {
+ IREE_TRACE_SCOPE0("TimePointSemaphorePool::PreallocateSemaphores");
+
+ VkSemaphoreCreateInfo create_info;
+ create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+ create_info.pNext = nullptr;
+ create_info.flags = 0;
+
+ RaiiLocker locker(&mutex_);
+ for (int i = 0; i < kMaxInFlightSemaphoreCount; ++i) {
+ auto* semaphore = &storage_[i];
+ VK_RETURN_IF_ERROR(syms()->vkCreateSemaphore(*logical_device_, &create_info,
+ logical_device_->allocator(),
+ &semaphore->semaphore),
+ "vkCreateSemaphore");
+ free_semaphores_.push_back(semaphore);
+ }
+
+ return iree_ok_status();
+}
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
diff --git a/runtime/src/iree/hal/vulkan/timepoint_util.h b/runtime/src/iree/hal/vulkan/timepoint_util.h
new file mode 100644
index 0000000..6eea90b
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/timepoint_util.h
@@ -0,0 +1,214 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_TIMEPOINT_UTIL_H_
+#define IREE_HAL_VULKAN_TIMEPOINT_UTIL_H_
+
+// clang-format off: must be included before all other headers.
+#include "iree/hal/vulkan/vulkan_headers.h"
+// clang-format on
+
+#include <stdint.h>
+
+#include <array>
+#include <memory>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/status_cc.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/handle_util.h"
+#include "iree/hal/vulkan/util/intrusive_list.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+namespace iree {
+namespace hal {
+namespace vulkan {
+
+class TimePointFencePool;
+class TimePointSemaphorePool;
+
+// A fence used for tracking progress of timeline semaphores.
+//
+// Each queue submission gets a new `VkFence` associated with it so that we can
+// later query the `VkFence` on CPU to know what time points were signaled for
+// timeline semaphores.
+//
+// Ref-counting allows the fence to be associated with multiple time points from
+// different timelines without worrying about ownership complexity.
+//
+// This is expected to used together with `TimePointFencePool` and must be
+// externally synchronized via `TimePointFencePool`'s mutex.
+class TimePointFence final : public RefObject<TimePointFence>,
+ public IntrusiveLinkBase<void> {
+ public:
+ TimePointFence(TimePointFencePool* pool, VkFence fence)
+ : pool_(pool), fence_(fence) {
+ iree_slim_mutex_initialize(&status_mutex_);
+ }
+
+ ~TimePointFence() { iree_slim_mutex_deinitialize(&status_mutex_); }
+
+ TimePointFence(TimePointFence&& that) = delete;
+ TimePointFence& operator=(TimePointFence&&) = delete;
+
+ TimePointFence(const TimePointFence&) = delete;
+ TimePointFence& operator=(const TimePointFence&) = delete;
+
+ // Returns this fence to the pool on destruction.
+ static void Delete(TimePointFence* ptr);
+
+ VkFence value() const noexcept { return fence_; }
+ operator VkFence() const noexcept { return fence_; }
+
+ // Gets the status of this fence object. This might issue an Vulkan API call
+ // under the hood.
+ VkResult GetStatus();
+
+ // Resets the status to unsignaled (VK_NOT_READY).
+ void ResetStatus();
+
+ // Returns the pool from which this fence comes.
+ TimePointFencePool* pool() const { return pool_; }
+
+ private:
+ // The pool from which this fence comes.
+ TimePointFencePool* pool_;
+
+ // Allocated fence that associated with a bunch of time point(s) of
+ // timeline(s). This is passed to queue submission so that we can track the
+ // timeline(s) progress on CPU and schedule work.
+ VkFence fence_;
+
+ // The fence's status.
+ iree_slim_mutex_t status_mutex_;
+ VkResult status_ IREE_GUARDED_BY(status_mutex_) = VK_NOT_READY;
+};
+
+// A semaphore used for emulating a specific time point of timeline semaphores.
+//
+// Each signaled time point in a timeline semaphore is emulated with a new
+// binary `VkSemaphore` associated with queue submission. These time point
+// semaphores are stored in `EmulatedTimelineSemaphore` to quickly scan and
+// process signaled values.
+//
+// This is expected to used together with `TimePointSemaphorePool` and
+// `EmulatedTimelineSemaphore` and must be externally synchronized via their
+// mutexes.
+struct TimePointSemaphore final : public IntrusiveLinkBase<void> {
+ // Allocated binary semaphore that represents a time point in the timeline.
+ // This is passed to queue submission.
+ VkSemaphore semaphore = VK_NULL_HANDLE;
+
+ // Value of the timeline should be at when the binary semaphore is signaled.
+ uint64_t value = UINT64_MAX;
+
+ // The fence associated with the queue submission signaling this semaphore.
+ // nullptr means this binary semaphore has not been submitted to GPU.
+ ref_ptr<TimePointFence> signal_fence = nullptr;
+
+ // The fence associated with the queue submission waiting this semaphore.
+ // nullptr means this binary semaphore has not been waited by any queue
+ // submission.
+ ref_ptr<TimePointFence> wait_fence = nullptr;
+};
+
+// A pool of `VkFence`s that can be used by `EmulatedTimelineSemaphore` to track
+// timeline progress on CPU. Each `VkFence` can be used to query the status of
+// all the semaphores in the same submission to a `VkQueue`.
+class TimePointFencePool final : public RefObject<TimePointFencePool> {
+ public:
+ static constexpr int kMaxInFlightFenceCount = 64;
+
+ // Creates a new pool and pre-allocates `kMaxInFlightFenceCount` fences.
+ static iree_status_t Create(VkDeviceHandle* logical_device,
+ TimePointFencePool** out_pool);
+
+ ~TimePointFencePool();
+
+ // Acquires a fence from the pool for use by the caller. The fence is
+ // guaranteed to be in unsignaled state and not in-flight on GPU.
+ //
+ // Returns RESOURCE_EXHAUSTED if the pool has no more available fences.
+ // Callers are expected to handle this by waiting on previous fences or for
+ // complete device idle. Yes, that's as bad as it sounds, and if we start
+ // seeing that we should bump up the max count.
+ iree_status_t Acquire(ref_ptr<TimePointFence>* out_fence);
+
+ // Releases one fence back to the pool. The fence must either be signaled or
+ // not be in flight on GPU.
+ void ReleaseResolved(TimePointFence* fence);
+
+ VkDeviceHandle* logical_device() const { return logical_device_; }
+
+ private:
+ explicit TimePointFencePool(VkDeviceHandle* logical_device);
+
+ const ref_ptr<DynamicSymbols>& syms() const;
+
+ iree_status_t PreallocateFences();
+
+ VkDeviceHandle* logical_device_;
+
+ iree_slim_mutex_t mutex_;
+
+ // Track via unique_ptr, since IntrusiveList doesn't manage memory itself.
+ IntrusiveList<std::unique_ptr<TimePointFence>> free_fences_
+ IREE_GUARDED_BY(mutex_);
+};
+
+// A pool of `VkSemaphore`s that can be used by `EmulatedTimelineSemaphore` to
+// simulate individual timeline value signaling.
+class TimePointSemaphorePool final : public RefObject<TimePointSemaphorePool> {
+ public:
+ static constexpr int kMaxInFlightSemaphoreCount = 64;
+
+ // Creates a new pool and pre-allocates `kMaxInFlightSemaphoreCount` binary
+ // semaphores.
+ static iree_status_t Create(VkDeviceHandle* logical_device,
+ TimePointSemaphorePool** out_pool);
+
+ ~TimePointSemaphorePool();
+
+ // Acquires a binary semaphore from the pool for use by the caller. The
+ // semaphore is guaranteed to be in unsignaled state and not in-flight on GPU.
+ //
+ // Returns RESOURCE_EXHAUSTED if the pool has no more available semaphores.
+ // Callers are expected to handle this by waiting on previous fences or for
+ // complete device idle. Yes, that's as bad as it sounds, and if we start
+ // seeing that we should bump up the max count.
+ iree_status_t Acquire(TimePointSemaphore** out_semaphore);
+
+ // Releases one or more semaphores back to the pool. The binary semaphore must
+ // be unsignaled and not in flight on GPU.
+ void ReleaseResolved(IntrusiveList<TimePointSemaphore>* semaphores);
+
+ // Releases one or more semaphores back to the pool. These may be in any state
+ // and will be assumed as untouchable; the pool will unconditionally recycle
+ // them.
+ void ReleaseUnresolved(IntrusiveList<TimePointSemaphore>* semaphores);
+
+ private:
+ explicit TimePointSemaphorePool(VkDeviceHandle* logical_device);
+
+ const ref_ptr<DynamicSymbols>& syms() const;
+
+ iree_status_t PreallocateSemaphores();
+
+ VkDeviceHandle* logical_device_;
+
+ iree_slim_mutex_t mutex_;
+
+ std::array<TimePointSemaphore, kMaxInFlightSemaphoreCount> storage_
+ IREE_GUARDED_BY(mutex_);
+ IntrusiveList<TimePointSemaphore> free_semaphores_ IREE_GUARDED_BY(mutex_);
+};
+
+} // namespace vulkan
+} // namespace hal
+} // namespace iree
+
+#endif // IREE_HAL_VULKAN_TIMEPOINT_UTIL_H_
diff --git a/runtime/src/iree/hal/vulkan/tracing.cc b/runtime/src/iree/hal/vulkan/tracing.cc
new file mode 100644
index 0000000..ead88e5
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/tracing.cc
@@ -0,0 +1,667 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/tracing.h"
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+#include "third_party/tracy/Tracy.hpp"
+#include "third_party/tracy/client/TracyProfiler.hpp"
+#include "third_party/tracy/common/TracyAlloc.hpp"
+
+// Total number of queries the per-queue query pool will contain. This
+// translates to the maximum number of outstanding queries before collection is
+// required.
+#define IREE_HAL_VULKAN_TRACING_DEFAULT_QUERY_CAPACITY (32 * 1024)
+
+// Total number of queries that can be read back from the API in a single
+// collection.
+#define IREE_HAL_VULKAN_TRACING_READBACK_QUERY_CAPACITY (8 * 1024)
+
+// Number of times we will query the max_deviation from calibrated timestamps.
+// The more we do the better confidence we have in a lower-bound.
+#define IREE_HAL_VULKAN_TRACING_MAX_DEVIATION_PROBE_COUNT 32
+
+typedef struct iree_hal_vulkan_timestamp_query_t {
+ uint64_t timestamp;
+ uint64_t availability; // non-zero if available
+} iree_hal_vulkan_timestamp_query_t;
+
+struct iree_hal_vulkan_tracing_context_t {
+ // Device and queue the context represents.
+ iree::hal::vulkan::VkDeviceHandle* logical_device;
+ VkQueue queue;
+ iree_allocator_t host_allocator;
+
+ // Maintenance queue that supports dispatch commands and can be used to reset
+ // queries.
+ VkQueue maintenance_dispatch_queue;
+ // Command pool that serves command buffers compatible with the
+ // |maintenance_dispatch_queue|.
+ iree::hal::vulkan::VkCommandPoolHandle* maintenance_command_pool;
+
+ // A unique GPU zone ID allocated from Tracy.
+ // There is a global limit of 255 GPU zones (ID 255 is special).
+ uint8_t id;
+
+ // Defines how the timestamps are interpreted (device-specific, posix, QPC).
+ // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkTimeDomainEXT.html
+ VkTimeDomainEXT time_domain;
+
+ // Maximum expected deviation between CPU and GPU timestamps based on an
+ // average computed at startup. Calibration events that exceed this value are
+ // discarded.
+ uint64_t max_expected_deviation;
+
+ // Vulkan-reported CPU timestamp of the last calibration.
+ // Used to detect when drift occurs and we need to notify tracy.
+ uint64_t previous_cpu_time;
+
+ // Pool of query instances that we treat as a backing store for a ringbuffer.
+ VkQueryPool query_pool;
+
+ // Indices into |query_pool| defining a ringbuffer.
+ uint32_t query_head;
+ uint32_t query_tail;
+ uint32_t query_capacity;
+
+ // Readback storage; large enough to get a decent chunk of queries back from
+ // the API in one shot.
+ //
+ // Data is stored as [[timestamp, availability], ...].
+ // Availability will be non-zero if the timestamp is valid. Since we put all
+ // timestamps in order once we reach an unavailable timestamp we can bail
+ // and leave that for future collections.
+ iree_hal_vulkan_timestamp_query_t
+ readback_buffer[IREE_HAL_VULKAN_TRACING_READBACK_QUERY_CAPACITY];
+};
+
+// Allocates and begins a command buffer and returns its handle.
+// Returns VK_NULL_HANDLE if allocation fails.
+static VkCommandBuffer iree_hal_vulkan_tracing_begin_command_buffer(
+ iree_hal_vulkan_tracing_context_t* context) {
+ const auto& syms = context->logical_device->syms();
+
+ VkCommandBufferAllocateInfo command_buffer_info;
+ memset(&command_buffer_info, 0, sizeof(command_buffer_info));
+ command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+ command_buffer_info.commandPool = *context->maintenance_command_pool;
+ command_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+ command_buffer_info.commandBufferCount = 1;
+ VkCommandBuffer command_buffer = VK_NULL_HANDLE;
+ IREE_IGNORE_ERROR(context->maintenance_command_pool->Allocate(
+ &command_buffer_info, &command_buffer));
+ if (!command_buffer) return VK_NULL_HANDLE;
+
+ VkCommandBufferBeginInfo begin_info;
+ memset(&begin_info, 0, sizeof(begin_info));
+ begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+ syms->vkBeginCommandBuffer(command_buffer, &begin_info);
+
+ return command_buffer;
+}
+
+// Ends and submits |command_buffer| and waits for it to complete.
+static void iree_hal_vulkan_tracing_submit_command_buffer(
+ iree_hal_vulkan_tracing_context_t* context,
+ VkCommandBuffer command_buffer) {
+ const auto& syms = context->logical_device->syms();
+
+ syms->vkEndCommandBuffer(command_buffer);
+
+ VkSubmitInfo submit_info;
+ memset(&submit_info, 0, sizeof(submit_info));
+ submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ submit_info.commandBufferCount = 1;
+ submit_info.pCommandBuffers = &command_buffer;
+ syms->vkQueueSubmit(context->maintenance_dispatch_queue, 1, &submit_info,
+ VK_NULL_HANDLE);
+ syms->vkQueueWaitIdle(context->maintenance_dispatch_queue);
+
+ context->maintenance_command_pool->Free(command_buffer);
+}
+
+// Synchronously resets a range of querys in a query pool.
+// This may submit commands to the queue.
+static void iree_hal_vulkan_tracing_reset_query_pool(
+ iree_hal_vulkan_tracing_context_t* context, uint32_t query_index,
+ uint32_t query_count) {
+ const auto& syms = context->logical_device->syms();
+
+ // Fast-path for when host-side vkResetQueryPool is available.
+ // This is core in Vulkan 1.2.
+ if (context->logical_device->enabled_extensions().host_query_reset) {
+ PFN_vkResetQueryPool vkResetQueryPool_fn = syms->vkResetQueryPool
+ ? syms->vkResetQueryPool
+ : syms->vkResetQueryPoolEXT;
+ if (vkResetQueryPool_fn != NULL) {
+ vkResetQueryPool_fn(*context->logical_device, context->query_pool,
+ query_index, query_count);
+ return;
+ }
+ }
+
+ // Slow-path submitting a command buffer to reset the query pool. It's obvious
+ // why vkResetQueryPool was added :)
+ VkCommandBuffer command_buffer =
+ iree_hal_vulkan_tracing_begin_command_buffer(context);
+ if (command_buffer != VK_NULL_HANDLE) {
+ syms->vkCmdResetQueryPool(command_buffer, context->query_pool, query_index,
+ query_count);
+ iree_hal_vulkan_tracing_submit_command_buffer(context, command_buffer);
+ }
+}
+
+// Attempts to get a timestamp from both the CPU and GPU that are correlated
+// with each other. Only valid when calibration is supported.
+static void iree_hal_vulkan_tracing_query_calibration_timestamps(
+ iree_hal_vulkan_tracing_context_t* context, uint64_t* out_cpu_time,
+ uint64_t* out_gpu_time) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ *out_cpu_time = 0;
+ *out_gpu_time = 0;
+
+ VkCalibratedTimestampInfoEXT timestamp_infos[2];
+ timestamp_infos[0].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
+ timestamp_infos[0].pNext = NULL;
+ timestamp_infos[0].timeDomain = VK_TIME_DOMAIN_DEVICE_EXT;
+ timestamp_infos[1].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
+ timestamp_infos[1].pNext = NULL;
+ timestamp_infos[1].timeDomain = context->time_domain;
+ uint64_t timestamps[2] = {0, 0};
+ uint64_t max_deviation = 0;
+ do {
+ context->logical_device->syms()->vkGetCalibratedTimestampsEXT(
+ *context->logical_device, IREE_ARRAYSIZE(timestamps), timestamp_infos,
+ timestamps, &max_deviation);
+ } while (max_deviation > context->max_expected_deviation);
+
+ *out_gpu_time = timestamps[0];
+ *out_cpu_time = timestamps[1];
+ switch (context->time_domain) {
+#if defined(IREE_PLATFORM_WINDOWS)
+ case VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT:
+ *out_cpu_time *= (uint64_t)(1000000000.0 / tracy::GetFrequencyQpc());
+ break;
+#else
+ case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+ case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+ // TODO(benvanik): posix calibrated timestamps - ignored for now.
+ break;
+#endif // IREE_PLATFORM_WINDOWS
+ default:
+ break;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Populates |out_cpu_time| and |out_gpu_time| with calibrated timestamps.
+// Depending on whether VK_EXT_calibrated_timestamps is available this may be
+// a guess done by ourselves (with lots of slop) or done by the driver (with
+// less slop).
+static void iree_hal_vulkan_tracing_perform_initial_calibration(
+ iree_hal_vulkan_tracing_context_t* context, uint64_t* out_cpu_time,
+ uint64_t* out_gpu_time) {
+ const auto& syms = context->logical_device->syms();
+ *out_cpu_time = 0;
+ *out_gpu_time = 0;
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_TEXT(z0,
+ context->time_domain == VK_TIME_DOMAIN_DEVICE_EXT
+ ? "VK_TIME_DOMAIN_DEVICE_EXT"
+ : "VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT");
+
+ // Attempt to get a timestamp from both the device and the host at roughly the
+ // same time. There's a gap between when we get control returned to use after
+ // submitting and waiting for idle and that will be the slop we have in the
+ // timings in the tracy UI.
+ if (context->time_domain == VK_TIME_DOMAIN_DEVICE_EXT) {
+ // Submit a device timestamp.
+ VkCommandBuffer command_buffer =
+ iree_hal_vulkan_tracing_begin_command_buffer(context);
+ if (command_buffer != VK_NULL_HANDLE) {
+ syms->vkCmdWriteTimestamp(command_buffer,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ context->query_pool, 0);
+ iree_hal_vulkan_tracing_submit_command_buffer(context, command_buffer);
+ }
+
+ // Query the timestamp from the host and the device.
+ *out_cpu_time = tracy::Profiler::GetTime();
+ syms->vkGetQueryPoolResults(
+ *context->logical_device, context->query_pool, 0, 1,
+ sizeof(*out_gpu_time), out_gpu_time, sizeof(*out_gpu_time),
+ VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
+
+ // Reset the query used.
+ iree_hal_vulkan_tracing_reset_query_pool(context, 0, 1);
+ IREE_TRACE_ZONE_END(z0);
+ return;
+ }
+
+ // From the spec:
+ // The maximum deviation may vary between calls to
+ // vkGetCalibratedTimestampsEXT even for the same set of time domains due to
+ // implementation and platform specific reasons. It is the application’s
+ // responsibility to assess whether the returned maximum deviation makes the
+ // timestamp values suitable for any particular purpose and can choose to
+ // re-issue the timestamp calibration call pursuing a lower devation value.
+ // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/vkGetCalibratedTimestampsEXT.html
+ //
+ // We perform a small number of queries here and find the minimum deviation
+ // across all of them to get an average lower bound on the maximum deviation
+ // from any particular query. We then use that as our baseline (plus some
+ // slop) to see if calibration events in the future are reasonable.
+ VkCalibratedTimestampInfoEXT timestamp_infos[2];
+ timestamp_infos[0].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
+ timestamp_infos[0].pNext = NULL;
+ timestamp_infos[0].timeDomain = VK_TIME_DOMAIN_DEVICE_EXT;
+ timestamp_infos[1].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
+ timestamp_infos[1].pNext = NULL;
+ timestamp_infos[1].timeDomain = context->time_domain;
+ uint64_t max_deviations[IREE_HAL_VULKAN_TRACING_MAX_DEVIATION_PROBE_COUNT];
+ IREE_TRACE_ZONE_BEGIN_NAMED(z1, "vkGetCalibratedTimestampsEXT");
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(max_deviations); ++i) {
+ uint64_t timestamps[2] = {0, 0};
+ syms->vkGetCalibratedTimestampsEXT(
+ *context->logical_device, IREE_ARRAYSIZE(timestamps), timestamp_infos,
+ timestamps, &max_deviations[i]);
+ }
+ IREE_TRACE_ZONE_END(z1);
+ uint64_t min_deviation = max_deviations[0];
+ for (iree_host_size_t i = 1; i < IREE_ARRAYSIZE(max_deviations); ++i) {
+ min_deviation = iree_min(min_deviation, max_deviations[i]);
+ }
+ context->max_expected_deviation = min_deviation * 3 / 2;
+
+ iree_hal_vulkan_tracing_query_calibration_timestamps(
+ context, &context->previous_cpu_time, out_gpu_time);
+ *out_cpu_time = tracy::Profiler::GetTime();
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Performs a periodic calibration (if supported) and sends the data to tracy.
+// Over time the host and device clocks may drift (especially with power events)
+// and by frequently performing this we ensure that the samples we are sending
+// to tracy are able to be correlated.
+void iree_hal_vulkan_tracing_perform_calibration(
+ iree_hal_vulkan_tracing_context_t* context) {
+ if (context->time_domain == VK_TIME_DOMAIN_DEVICE_EXT) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ uint64_t cpu_time = 0;
+ uint64_t gpu_time = 0;
+ iree_hal_vulkan_tracing_query_calibration_timestamps(context, &cpu_time,
+ &gpu_time);
+
+ uint64_t tracy_time = tracy::Profiler::GetTime();
+ if (cpu_time > context->previous_cpu_time) {
+ uint64_t cpu_delta = cpu_time - context->previous_cpu_time;
+ context->previous_cpu_time = cpu_time;
+ auto* item = tracy::Profiler::QueueSerial();
+ tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuCalibration);
+ tracy::MemWrite(&item->gpuCalibration.gpuTime, gpu_time);
+ tracy::MemWrite(&item->gpuCalibration.cpuTime, tracy_time);
+ tracy::MemWrite(&item->gpuCalibration.cpuDelta, cpu_delta);
+ tracy::MemWrite(&item->gpuCalibration.context, context->id);
+ tracy::Profiler::QueueSerialFinish();
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Prepares the VkQueryPool backing storage for our query ringbuffer.
+static void iree_hal_vulkan_tracing_prepare_query_pool(
+ iree_hal_vulkan_tracing_context_t* context) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Create a query pool with the largest query capacity it can provide.
+ VkQueryPoolCreateInfo pool_info;
+ memset(&pool_info, 0, sizeof(pool_info));
+ pool_info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
+ pool_info.queryCount = IREE_HAL_VULKAN_TRACING_DEFAULT_QUERY_CAPACITY;
+ pool_info.queryType = VK_QUERY_TYPE_TIMESTAMP;
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, pool_info.queryCount);
+ while (context->logical_device->syms()->vkCreateQueryPool(
+ *context->logical_device, &pool_info,
+ context->logical_device->allocator(),
+ &context->query_pool) != VK_SUCCESS) {
+ pool_info.queryCount /= 2;
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, pool_info.queryCount);
+ }
+ context->query_capacity = pool_info.queryCount;
+
+ // Perform initial reset of the query pool. All queries must be reset upon
+ // creation before first use.
+ iree_hal_vulkan_tracing_reset_query_pool(context, 0, context->query_capacity);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Prepares the Tracy-related GPU context that events are fed into. Each context
+// will appear as a unique plot in the tracy UI with the given |queue_name|.
+static void iree_hal_vulkan_tracing_prepare_gpu_context(
+ iree_hal_vulkan_tracing_context_t* context,
+ VkPhysicalDevice physical_device, iree_string_view_t queue_name) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Allocate the process-unique GPU context ID. There's a max of 255 available;
+ // if we are recreating devices a lot we may exceed that. Don't do that, or
+ // wrap around and get weird (but probably still usable) numbers.
+ context->id =
+ tracy::GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed);
+ if (context->id >= 255) {
+ context->id %= 255;
+ }
+
+ // The number of nanoseconds required for a timestamp query to be incremented
+ // by 1.
+ VkPhysicalDeviceProperties device_properties;
+ context->logical_device->syms()->vkGetPhysicalDeviceProperties(
+ physical_device, &device_properties);
+ float timestamp_period = device_properties.limits.timestampPeriod;
+
+ // Perform initial calibration for tracy to be able to correlate timestamps
+ // between CPU and GPU.
+ uint64_t cpu_time = 0;
+ uint64_t gpu_time = 0;
+ iree_hal_vulkan_tracing_perform_initial_calibration(context, &cpu_time,
+ &gpu_time);
+
+ uint8_t context_flags = 0;
+ if (context->time_domain != VK_TIME_DOMAIN_DEVICE_EXT) {
+ // Tell tracy we'll be passing calibrated timestamps and not to mess with
+ // the times. We'll periodically send GpuCalibration events in case the
+ // times drift.
+ context_flags |= tracy::GpuContextCalibration;
+ }
+ {
+ auto* item = tracy::Profiler::QueueSerial();
+ tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuNewContext);
+ tracy::MemWrite(&item->gpuNewContext.cpuTime, cpu_time);
+ tracy::MemWrite(&item->gpuNewContext.gpuTime, gpu_time);
+ memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
+ tracy::MemWrite(&item->gpuNewContext.period, timestamp_period);
+ tracy::MemWrite(&item->gpuNewContext.context, context->id);
+ tracy::MemWrite(&item->gpuNewContext.flags, context_flags);
+ tracy::MemWrite(&item->gpuNewContext.type, tracy::GpuContextType::Vulkan);
+ tracy::Profiler::QueueSerialFinish();
+ }
+
+ // Send the name of the context along.
+ // NOTE: Tracy will unconditionally free the name so we must clone it here.
+ // Since internally Tracy will use its own rpmalloc implementation we must
+ // make sure we allocate from the same source.
+ char* cloned_name = (char*)tracy::tracy_malloc(queue_name.size);
+ memcpy(cloned_name, queue_name.data, queue_name.size);
+ {
+ auto* item = tracy::Profiler::QueueSerial();
+ tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuContextName);
+ tracy::MemWrite(&item->gpuContextNameFat.context, context->id);
+ tracy::MemWrite(&item->gpuContextNameFat.ptr, (uint64_t)cloned_name);
+ tracy::MemWrite(&item->gpuContextNameFat.size, queue_name.size);
+ tracy::Profiler::QueueSerialFinish();
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Returns the best possible platform-supported time domain, falling back to
+// VK_TIME_DOMAIN_DEVICE_EXT. By default it is one that is only usable for
+// device-relative calculations and that we need to perform our own hacky
+// calibration on.
+static VkTimeDomainEXT iree_hal_vulkan_tracing_query_time_domain(
+ VkPhysicalDevice physical_device,
+ iree::hal::vulkan::VkDeviceHandle* logical_device) {
+ if (!logical_device->enabled_extensions().calibrated_timestamps) {
+ // Calibrated timestamps extension is not available; we'll only have the
+ // device domain.
+ return VK_TIME_DOMAIN_DEVICE_EXT;
+ }
+
+ uint32_t time_domain_count = 0;
+ if (logical_device->syms()->vkGetPhysicalDeviceCalibrateableTimeDomainsEXT(
+ physical_device, &time_domain_count, NULL) != VK_SUCCESS) {
+ return VK_TIME_DOMAIN_DEVICE_EXT;
+ }
+ VkTimeDomainEXT* time_domains = (VkTimeDomainEXT*)iree_alloca(
+ time_domain_count * sizeof(VkTimeDomainEXT));
+ if (logical_device->syms()->vkGetPhysicalDeviceCalibrateableTimeDomainsEXT(
+ physical_device, &time_domain_count, time_domains) != VK_SUCCESS) {
+ return VK_TIME_DOMAIN_DEVICE_EXT;
+ }
+
+ for (uint32_t i = 0; i < time_domain_count; i++) {
+ switch (time_domains[i]) {
+#if defined(IREE_PLATFORM_WINDOWS)
+ case VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT:
+ return time_domains[i];
+#else
+ case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+ case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+ // TODO(benvanik): support posix clock domains with some kind of math.
+ // return time_domains[i]; -- ignored
+#endif // IREE_PLATFORM_WINDOWS
+ default:
+ continue;
+ }
+ }
+ return VK_TIME_DOMAIN_DEVICE_EXT;
+}
+
+iree_status_t iree_hal_vulkan_tracing_context_allocate(
+ VkPhysicalDevice physical_device,
+ iree::hal::vulkan::VkDeviceHandle* logical_device, VkQueue queue,
+ iree_string_view_t queue_name, VkQueue maintenance_dispatch_queue,
+ iree::hal::vulkan::VkCommandPoolHandle* maintenance_command_pool,
+ iree_allocator_t host_allocator,
+ iree_hal_vulkan_tracing_context_t** out_context) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(logical_device);
+ IREE_ASSERT_ARGUMENT(out_context);
+ *out_context = NULL;
+
+ iree_hal_vulkan_tracing_context_t* context = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, sizeof(*context), (void**)&context);
+ if (iree_status_is_ok(status)) {
+ context->logical_device = logical_device;
+ context->queue = queue;
+ context->host_allocator = host_allocator;
+ context->time_domain = iree_hal_vulkan_tracing_query_time_domain(
+ physical_device, logical_device);
+ context->maintenance_dispatch_queue = maintenance_dispatch_queue;
+ context->maintenance_command_pool = maintenance_command_pool;
+
+ // Prepare the query pool and perform the initial calibration.
+ iree_hal_vulkan_tracing_prepare_query_pool(context);
+
+ // Prepare the Tracy GPU context.
+ iree_hal_vulkan_tracing_prepare_gpu_context(context, physical_device,
+ queue_name);
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_context = context;
+ } else {
+ iree_hal_vulkan_tracing_context_free(context);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_hal_vulkan_tracing_context_free(
+ iree_hal_vulkan_tracing_context_t* context) {
+ if (!context) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ if (context->query_pool != VK_NULL_HANDLE) {
+ // Always perform a collection on shutdown.
+ iree_hal_vulkan_tracing_context_collect(context, VK_NULL_HANDLE);
+
+ auto* logical_device = context->logical_device;
+ logical_device->syms()->vkDestroyQueryPool(
+ *logical_device, context->query_pool, logical_device->allocator());
+ }
+
+ iree_allocator_t host_allocator = context->host_allocator;
+ iree_allocator_free(host_allocator, context);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+uint32_t iree_hal_vulkan_tracing_context_acquire_query_id(
+ iree_hal_vulkan_tracing_context_t* context) {
+ uint32_t id = context->query_head;
+ context->query_head = (context->query_head + 1) % context->query_capacity;
+ assert(context->query_head != context->query_tail);
+ return id;
+}
+
+void iree_hal_vulkan_tracing_context_collect(
+ iree_hal_vulkan_tracing_context_t* context,
+ VkCommandBuffer command_buffer) {
+ if (!context) return;
+ if (context->query_tail == context->query_head) {
+ // No outstanding queries.
+ return;
+ }
+ IREE_TRACE_ZONE_BEGIN(z0);
+ const auto& syms = context->logical_device->syms();
+
+ while (context->query_tail != context->query_head) {
+ // Compute the contiguous range of queries ready to be read.
+ // If the ringbuffer wraps around we'll handle that in the next loop.
+ uint32_t try_query_count =
+ context->query_head < context->query_tail
+ ? context->query_capacity - context->query_tail
+ : context->query_head - context->query_tail;
+ try_query_count = iree_min(try_query_count,
+ IREE_HAL_VULKAN_TRACING_READBACK_QUERY_CAPACITY);
+
+ // Read back all of the queries. Note that we also are reading back the
+ // availability such that we can handle partial readiness of the outstanding
+ // range of queries.
+ uint32_t query_base = context->query_tail;
+ if (syms->vkGetQueryPoolResults(
+ *context->logical_device, context->query_pool, query_base,
+ try_query_count, sizeof(context->readback_buffer),
+ context->readback_buffer, sizeof(iree_hal_vulkan_timestamp_query_t),
+ VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) !=
+ VK_SUCCESS) {
+ break;
+ }
+
+ // Scan and feed the times to tracy, stopping when we hit the first
+ // unavailable query.
+ uint32_t read_query_count = 0;
+ for (uint32_t i = 0; i < try_query_count; ++i) {
+ if (context->readback_buffer[i].availability == 0) break;
+ read_query_count = i + 1;
+ auto* item = tracy::Profiler::QueueSerial();
+ tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuTime);
+ tracy::MemWrite(&item->gpuTime.gpuTime,
+ context->readback_buffer[i].timestamp);
+ tracy::MemWrite(&item->gpuTime.queryId, (uint16_t)(query_base + i));
+ tracy::MemWrite(&item->gpuTime.context, context->id);
+ tracy::Profiler::QueueSerialFinish();
+ }
+
+ // Reset the range of queries read back.
+ if (command_buffer != VK_NULL_HANDLE) {
+ syms->vkCmdResetQueryPool(command_buffer, context->query_pool, query_base,
+ read_query_count);
+ } else {
+ iree_hal_vulkan_tracing_reset_query_pool(context, query_base,
+ read_query_count);
+ }
+
+ context->query_tail += read_query_count;
+ if (context->query_tail >= context->query_capacity) {
+ context->query_tail = 0;
+ }
+ }
+
+ // Run calibration - we could do this less frequently in cases where collect
+ // is called every submission, however it's relatively cheap compared to all
+ // this other tracing overhead.
+ iree_hal_vulkan_tracing_perform_calibration(context);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_hal_vulkan_tracing_zone_begin_impl(
+ iree_hal_vulkan_tracing_context_t* context, VkCommandBuffer command_buffer,
+ const iree_tracing_location_t* src_loc) {
+ if (!context) return;
+
+ uint32_t query_id = iree_hal_vulkan_tracing_context_acquire_query_id(context);
+ context->logical_device->syms()->vkCmdWriteTimestamp(
+ command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, context->query_pool,
+ query_id);
+
+ auto* item = tracy::Profiler::QueueSerial();
+ tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuZoneBeginSerial);
+ tracy::MemWrite(&item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime());
+ tracy::MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)src_loc);
+ tracy::MemWrite(&item->gpuZoneBegin.thread, tracy::GetThreadHandle());
+ tracy::MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)query_id);
+ tracy::MemWrite(&item->gpuZoneBegin.context, context->id);
+ tracy::Profiler::QueueSerialFinish();
+}
+
+void iree_hal_vulkan_tracing_zone_begin_external_impl(
+ iree_hal_vulkan_tracing_context_t* context, VkCommandBuffer command_buffer,
+ const char* file_name, size_t file_name_length, uint32_t line,
+ const char* function_name, size_t function_name_length, const char* name,
+ size_t name_length) {
+ if (!context) return;
+
+ uint32_t query_id = iree_hal_vulkan_tracing_context_acquire_query_id(context);
+ context->logical_device->syms()->vkCmdWriteTimestamp(
+ command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, context->query_pool,
+ query_id);
+
+ const auto src_loc = tracy::Profiler::AllocSourceLocation(
+ line, file_name, file_name_length, function_name, function_name_length,
+ name, name_length);
+ auto* item = tracy::Profiler::QueueSerial();
+ tracy::MemWrite(&item->hdr.type,
+ tracy::QueueType::GpuZoneBeginAllocSrcLocSerial);
+ tracy::MemWrite(&item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime());
+ tracy::MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)src_loc);
+ tracy::MemWrite(&item->gpuZoneBegin.thread, tracy::GetThreadHandle());
+ tracy::MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)query_id);
+ tracy::MemWrite(&item->gpuZoneBegin.context, context->id);
+ tracy::Profiler::QueueSerialFinish();
+}
+
+void iree_hal_vulkan_tracing_zone_end_impl(
+ iree_hal_vulkan_tracing_context_t* context,
+ VkCommandBuffer command_buffer) {
+ if (!context) return;
+
+ uint32_t query_id = iree_hal_vulkan_tracing_context_acquire_query_id(context);
+ context->logical_device->syms()->vkCmdWriteTimestamp(
+ command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, context->query_pool,
+ query_id);
+
+ auto* item = tracy::Profiler::QueueSerial();
+ tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuZoneEndSerial);
+ tracy::MemWrite(&item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime());
+ tracy::MemWrite(&item->gpuZoneEnd.thread, tracy::GetThreadHandle());
+ tracy::MemWrite(&item->gpuZoneEnd.queryId, (uint16_t)query_id);
+ tracy::MemWrite(&item->gpuZoneEnd.context, context->id);
+ tracy::Profiler::QueueSerialFinish();
+}
+
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
diff --git a/runtime/src/iree/hal/vulkan/tracing.h b/runtime/src/iree/hal/vulkan/tracing.h
new file mode 100644
index 0000000..f43f80e
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/tracing.h
@@ -0,0 +1,174 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_TRACING_H_
+#define IREE_HAL_VULKAN_TRACING_H_
+
+// clang-format off: must be included before all other headers.
+#include "iree/hal/vulkan/vulkan_headers.h"
+// clang-format on
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/handle_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Per-queue Vulkan tracing context.
+// No-op if IREE tracing is not enabled.
+//
+// Use the IREE_VULKAN_TRACE_* macros to trace a contiguous set of command
+// buffer operations. Unlike the normal tracy macros there are no zone IDs and
+// instead each queue gets an ID allocated once and passed to all tracing
+// macros.
+//
+// Usage:
+// IREE_VULKAN_TRACE_ZONE_BEGIN(device->tracing_context, command_buffer);
+// vkCmdDispatch(command_buffer, ...);
+// IREE_VULKAN_TRACE_ZONE_END(queue->tracing_context, command_buffer);
+// ...
+// iree_hal_vulkan_tracing_context_collect(queue->tracing_context,
+// command_buffer);
+// vkQueueSubmit(...command_buffer...);
+//
+// NOTE: timestamps have non-trivial side-effecting behavior on the device:
+// inserting a timestamp is in the worst (and average) case just as bad as
+// inserting a full global execution barrier. If two command buffer operations
+// that could overlap (no barrier between them) have tracing zones placed around
+// them they will execute sequentially.
+//
+// TODO(benvanik):
+// Each queue needs a context and maintains its own query pool. In the future
+// this should be changed to have a single query pool per device to reduce
+// bookkeeping overhead.
+//
+// TODO(benvanik):
+// Both a zone begin and zone end always insert timestamps leading to N*2
+// total queries, however within command buffers the end of one zone and the
+// begin of another share the same point in time. By inserting the timestamps
+// at barriers in the command buffer the query count can be reduced to N+1.
+//
+// TODO(benvanik):
+// vkCmdCopyQueryPoolResults is really what we should be using to do this -
+// that inserts a device-side transfer to a buffer (conceptually) that is
+// in-stream with all submissions to a queue. This changes things to a push
+// model vs. the pull one in _collect and allows us to pipeline the readbacks.
+// Instead of being limited to the query pool slots we'd only be limited by
+// the size of the buffer the copy targets allowing us to perform collection
+// much more infrequently.
+//
+// Thread-compatible: external synchronization is required if using from
+// multiple threads (same as with VkQueue itself).
+typedef struct iree_hal_vulkan_tracing_context_t
+ iree_hal_vulkan_tracing_context_t;
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+// Allocates a tracing context for the given Vulkan queue.
+// Each context must only be used with the queue it was created with.
+//
+// |maintenance_dispatch_queue| may be used to perform query pool maintenance
+// tasks and must support graphics or compute commands.
+iree_status_t iree_hal_vulkan_tracing_context_allocate(
+ VkPhysicalDevice physical_device,
+ iree::hal::vulkan::VkDeviceHandle* logical_device, VkQueue queue,
+ iree_string_view_t queue_name, VkQueue maintenance_dispatch_queue,
+ iree::hal::vulkan::VkCommandPoolHandle* maintenance_command_pool,
+ iree_allocator_t host_allocator,
+ iree_hal_vulkan_tracing_context_t** out_context);
+
+// Frees a tracing context and all associated Vulkan resources.
+// All submissions using the resources must be completed prior to calling.
+void iree_hal_vulkan_tracing_context_free(
+ iree_hal_vulkan_tracing_context_t* context);
+
+// Collects in-flight timestamp queries from the queue and feeds them to tracy.
+// Must be called frequently (every submission, etc) to drain the backlog;
+// tracing may start failing if the internal ringbuffer is exceeded.
+//
+// The provided |command_buffer| may receive additional bookkeeping commands
+// that should have no impact on correctness or behavior. If VK_NULL_HANDLE is
+// provided then collection will occur synchronously.
+void iree_hal_vulkan_tracing_context_collect(
+ iree_hal_vulkan_tracing_context_t* context, VkCommandBuffer command_buffer);
+
+// Begins a normal zone derived on the calling |src_loc|.
+// Must be perfectly nested and paired with a corresponding zone end.
+void iree_hal_vulkan_tracing_zone_begin_impl(
+ iree_hal_vulkan_tracing_context_t* context, VkCommandBuffer command_buffer,
+ const iree_tracing_location_t* src_loc);
+
+// Begins an external zone using the given source information.
+// The provided strings will be copied into the tracy buffer.
+void iree_hal_vulkan_tracing_zone_begin_external_impl(
+ iree_hal_vulkan_tracing_context_t* context, VkCommandBuffer command_buffer,
+ const char* file_name, size_t file_name_length, uint32_t line,
+ const char* function_name, size_t function_name_length, const char* name,
+ size_t name_length);
+
+void iree_hal_vulkan_tracing_zone_end_impl(
+ iree_hal_vulkan_tracing_context_t* context, VkCommandBuffer command_buffer);
+
+// Begins a new zone with the parent function name.
+#define IREE_VULKAN_TRACE_ZONE_BEGIN(context, command_buffer) \
+ static const iree_tracing_location_t TracyConcat( \
+ __tracy_source_location, __LINE__) = {name_literal, __FUNCTION__, \
+ __FILE__, (uint32_t)__LINE__, 0}; \
+ iree_hal_vulkan_tracing_zone_begin_impl( \
+ context, command_buffer, \
+ &TracyConcat(__tracy_source_location, __LINE__));
+
+// Begins an externally defined zone with a dynamic source location.
+// The |file_name|, |function_name|, and optional |name| strings will be copied
+// into the trace buffer and do not need to persist.
+#define IREE_VULKAN_TRACE_ZONE_BEGIN_EXTERNAL( \
+ context, command_buffer, file_name, file_name_length, line, function_name, \
+ function_name_length, name, name_length) \
+ iree_hal_vulkan_tracing_zone_begin_external_impl( \
+ context, command_buffer, file_name, file_name_length, line, \
+ function_name, function_name_length, name, name_length)
+
+// Ends the current zone. Must be passed the |zone_id| from the _BEGIN.
+#define IREE_VULKAN_TRACE_ZONE_END(context, command_buffer) \
+ iree_hal_vulkan_tracing_zone_end_impl(context, command_buffer)
+
+#else
+
+inline iree_status_t iree_hal_vulkan_tracing_context_allocate(
+ VkPhysicalDevice physical_device,
+ iree::hal::vulkan::VkDeviceHandle* logical_device, VkQueue queue,
+ iree_string_view_t queue_name, VkQueue maintenance_dispatch_queue,
+ iree::hal::vulkan::VkCommandPoolHandle* maintenance_command_pool,
+ iree_allocator_t host_allocator,
+ iree_hal_vulkan_tracing_context_t** out_context) {
+ *out_context = NULL;
+ return iree_ok_status();
+}
+
+inline void iree_hal_vulkan_tracing_context_free(
+ iree_hal_vulkan_tracing_context_t* context) {}
+
+inline void iree_hal_vulkan_tracing_context_collect(
+ iree_hal_vulkan_tracing_context_t* context,
+ VkCommandBuffer command_buffer) {}
+
+#define IREE_VULKAN_TRACE_ZONE_BEGIN(context, command_buffer)
+#define IREE_VULKAN_TRACE_ZONE_BEGIN_EXTERNAL( \
+ context, command_buffer, file_name, file_name_length, line, function_name, \
+ function_name_length, name, name_length)
+#define IREE_VULKAN_TRACE_ZONE_END(context, command_buffer)
+
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_TRACING_H_
diff --git a/runtime/src/iree/hal/vulkan/util/BUILD b/runtime/src/iree/hal/vulkan/util/BUILD
new file mode 100644
index 0000000..c57de01
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/util/BUILD
@@ -0,0 +1,78 @@
+# Copyright 2019 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library", "iree_runtime_cc_test")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_runtime_cc_library(
+ name = "arena",
+ srcs = ["arena.cc"],
+ hdrs = ["arena.h"],
+ deps = [
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:logging",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "arena_test",
+ srcs = ["arena_test.cc"],
+ deps = [
+ ":arena",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "intrusive_list",
+ hdrs = [
+ "intrusive_list.h",
+ "intrusive_list_unique_ptr.inc",
+ ],
+ deps = [
+ "//runtime/src/iree/base:logging",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "intrusive_list_test",
+ srcs = [
+ "intrusive_list_test.cc",
+ "intrusive_list_unique_ptr_test.cc",
+ ],
+ deps = [
+ ":intrusive_list",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "ref_ptr",
+ hdrs = ["ref_ptr.h"],
+ deps = [
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:logging",
+ "//runtime/src/iree/base/internal",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "ref_ptr_test",
+ size = "small",
+ srcs = ["ref_ptr_test.cc"],
+ deps = [
+ ":ref_ptr",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
diff --git a/runtime/src/iree/hal/vulkan/util/CMakeLists.txt b/runtime/src/iree/hal/vulkan/util/CMakeLists.txt
new file mode 100644
index 0000000..1f7c7c4
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/util/CMakeLists.txt
@@ -0,0 +1,83 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/hal/vulkan/util/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ arena
+ HDRS
+ "arena.h"
+ SRCS
+ "arena.cc"
+ DEPS
+ iree::base::core_headers
+ iree::base::logging
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ arena_test
+ SRCS
+ "arena_test.cc"
+ DEPS
+ ::arena
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_library(
+ NAME
+ intrusive_list
+ HDRS
+ "intrusive_list.h"
+ "intrusive_list_unique_ptr.inc"
+ DEPS
+ iree::base::logging
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ intrusive_list_test
+ SRCS
+ "intrusive_list_test.cc"
+ "intrusive_list_unique_ptr_test.cc"
+ DEPS
+ ::intrusive_list
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_library(
+ NAME
+ ref_ptr
+ HDRS
+ "ref_ptr.h"
+ DEPS
+ iree::base::core_headers
+ iree::base::internal
+ iree::base::logging
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ ref_ptr_test
+ SRCS
+ "ref_ptr_test.cc"
+ DEPS
+ ::ref_ptr
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/hal/vulkan/util/arena.cc b/runtime/src/iree/hal/vulkan/util/arena.cc
new file mode 100644
index 0000000..187d234
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/util/arena.cc
@@ -0,0 +1,117 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/util/arena.h"
+
+#include <cstdlib>
+
+#include "iree/base/attributes.h"
+#include "iree/base/logging.h"
+
+namespace iree {
+
+namespace {
+
+// Rounds up to the next alignment value, if it is not already aligned.
+template <typename T>
+IREE_ATTRIBUTE_ALWAYS_INLINE constexpr T RoundToAlignment(
+ T value, T alignment) noexcept {
+ return ((value + alignment - 1) / alignment) * alignment;
+}
+
+} // namespace
+
+Arena::Arena(size_t block_size) : block_size_(block_size) {}
+
+Arena::~Arena() { Clear(); }
+
+void Arena::Clear() {
+ // Deallocate all memory.
+ auto block_header = block_list_head_;
+ while (block_header) {
+ auto next_block = block_header->next_block;
+ std::free(block_header);
+ block_header = next_block;
+ }
+ block_list_head_ = nullptr;
+ block_header = unused_block_list_head_;
+ while (block_header) {
+ auto next_block = block_header->next_block;
+ std::free(block_header);
+ block_header = next_block;
+ }
+ unused_block_list_head_ = nullptr;
+
+ bytes_allocated_ = 0;
+ block_bytes_allocated_ = 0;
+}
+
+void Arena::Reset() {
+ // Move all blocks to the unused list and reset allocation count only.
+ auto block_header = block_list_head_;
+ while (block_header) {
+ auto next_block = block_header->next_block;
+ block_header->bytes_allocated = 0;
+ block_header->next_block = unused_block_list_head_;
+ unused_block_list_head_ = block_header;
+ block_header = next_block;
+ }
+ block_list_head_ = nullptr;
+
+ bytes_allocated_ = 0;
+}
+
+uint8_t* Arena::AllocateBytes(size_t length) {
+ if (!length) {
+ // Guarantee zero-length allocations return nullptr.
+ return nullptr;
+ }
+
+ // Pad length allocated so we are machine word aligned.
+ // This ensures the next allocation starts at the right boundary.
+ size_t aligned_length = RoundToAlignment(length, sizeof(uintptr_t));
+
+ if (aligned_length > block_size_) {
+ // This allocation is larger than an entire block. That's bad.
+ // We could allocate this with malloc (and then keep track of those to free
+ // things), but for now let's just die.
+ IREE_CHECK(false);
+ return nullptr;
+ }
+
+ if (!block_list_head_ ||
+ block_list_head_->bytes_allocated + aligned_length > block_size_) {
+ // Check to see if we have an existing unused block we can use.
+ if (unused_block_list_head_) {
+ // Move block from unused list to main list.
+ auto block_header = unused_block_list_head_;
+ unused_block_list_head_ = block_header->next_block;
+ block_header->next_block = block_list_head_;
+ block_header->bytes_allocated = 0;
+ block_list_head_ = block_header;
+ } else {
+ // Allocate a new block.
+ auto block_ptr = reinterpret_cast<uint8_t*>(
+ std::malloc(sizeof(BlockHeader) + block_size_));
+ auto block_header = reinterpret_cast<BlockHeader*>(block_ptr);
+ block_header->next_block = block_list_head_;
+ block_header->bytes_allocated = 0;
+ block_list_head_ = block_header;
+ block_bytes_allocated_ += sizeof(BlockHeader) + block_size_;
+ }
+ }
+
+ BlockHeader* target_block = block_list_head_;
+ auto data_ptr = reinterpret_cast<uint8_t*>(target_block) +
+ sizeof(BlockHeader) + target_block->bytes_allocated;
+ target_block->bytes_allocated += aligned_length;
+
+ bytes_allocated_ += length;
+
+ return data_ptr;
+}
+
+} // namespace iree
diff --git a/runtime/src/iree/hal/vulkan/util/arena.h b/runtime/src/iree/hal/vulkan/util/arena.h
new file mode 100644
index 0000000..b891c2d
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/util/arena.h
@@ -0,0 +1,129 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_UTIL_ARENA_H_
+#define IREE_HAL_VULKAN_UTIL_ARENA_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <utility>
+
+namespace iree {
+
+template <typename T>
+class Span {
+ public:
+ Span(T* data, size_t size) noexcept : data_(data), size_(size) {}
+
+ T* data() const noexcept { return data_; }
+ size_t size() const noexcept { return size_; }
+ bool empty() const noexcept { return size() == 0; }
+
+ T& operator[](size_t i) noexcept { return *(data() + i); }
+
+ private:
+ T* data_;
+ size_t size_;
+};
+
+// Arena allocator.
+// Allocates memory from a cached block list grown at specified intervals.
+// Individual allocations cannot be freed.
+// Default constructors will be called when allocating but no destructors will
+// ever be called.
+//
+// This should be used in places where extreme dynamic memory growth is required
+// to ensure that the allocations stay close to each other in memory, are easy
+// to account for, and can be released together. For example, proto or file
+// parsing, per-batch write-once/read-once data buffers, etc.
+//
+// Usage:
+// Arena arena;
+// auto t0 = arena.Allocate<MyType>();
+class Arena {
+ public:
+ static constexpr size_t kDefaultBlockSize = 32 * 1024;
+ static constexpr size_t kBlockOverhead = sizeof(void*) + sizeof(size_t);
+
+ Arena() : Arena(kDefaultBlockSize) {}
+ explicit Arena(size_t block_size);
+ ~Arena();
+
+ // Clears all data in the arena and deallocates blocks.
+ // Use Reset to avoid reallocation.
+ void Clear();
+
+ // Resets data in the arena but does not deallocate blocks.
+ // Use Clear to reclaim memory.
+ void Reset();
+
+ // Block size, excluding the block header.
+ // This is the largest size of any allocation that can be made of the arena.
+ size_t block_size() const { return block_size_; }
+
+ // Total number of bytes that have been allocated, excluding wasted space.
+ size_t bytes_allocated() const { return bytes_allocated_; }
+ // Total number of bytes as blocks allocated, including wasted space.
+ // If this number is much higher than bytes_allocated the block size requires
+ // tuning.
+ size_t block_bytes_allocated() const { return block_bytes_allocated_; }
+
+ // Allocates an instance of the given type and calls its constructor.
+ template <typename T>
+ T* Allocate() {
+ void* storage = AllocateBytes(sizeof(T));
+ return new (storage) T();
+ }
+
+ // Allocates an instance of the given type and calls its constructor with
+ // arguments.
+ template <typename T, typename... Args>
+ T* Allocate(Args&&... args) {
+ void* storage = AllocateBytes(sizeof(T));
+ return new (storage) T(std::forward<Args>(args)...);
+ }
+
+ // Allocates an array of items and returns a span pointing to them.
+ template <typename T>
+ Span<T> AllocateSpan(size_t count) {
+ void* storage = AllocateBytes(count * sizeof(T));
+ return Span<T>(reinterpret_cast<T*>(storage), count);
+ }
+
+ // Allocates a block of raw bytes from the arena.
+ // Zero-byte allocations will return nullptr.
+ uint8_t* AllocateBytes(size_t length);
+
+ private:
+ // Block size contains the BlockHeader, so a 1024b block size will result in
+ // 1024-sizeof(BlockHeader) usable bytes.
+ size_t block_size_ = kDefaultBlockSize;
+ size_t bytes_allocated_ = 0;
+ size_t block_bytes_allocated_ = 0;
+
+ // Each block in the arena contains a prefixed header that lets us link the
+ // blocks together (to make freeing easier) as well as tracking current byte
+ // count to let us fill gaps.
+ // Immediately following the header is the actual arena data, up until the
+ // block size is reached.
+ struct BlockHeader {
+ BlockHeader* next_block;
+ size_t bytes_allocated;
+ };
+ static_assert(sizeof(BlockHeader) == kBlockOverhead, "Block header mismatch");
+
+ // Singly-linked list of allocated blocks in reverse allocation order (so
+ // the most recently allocated block is first).
+ BlockHeader* block_list_head_ = nullptr;
+
+ // Allocated but unused blocks.
+ BlockHeader* unused_block_list_head_ = nullptr;
+};
+
+} // namespace iree
+
+#endif // IREE_HAL_VULKAN_UTIL_ARENA_H_
diff --git a/runtime/src/iree/hal/vulkan/util/arena_test.cc b/runtime/src/iree/hal/vulkan/util/arena_test.cc
new file mode 100644
index 0000000..0baedf9
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/util/arena_test.cc
@@ -0,0 +1,139 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/util/arena.h"
+
+#include "iree/testing/gtest.h"
+
+namespace iree {
+namespace {
+
+// Tests basic block allocations.
+TEST(ArenaTest, BasicAllocation) {
+ Arena arena(64);
+ EXPECT_EQ(64, arena.block_size());
+ EXPECT_EQ(0, arena.bytes_allocated());
+ EXPECT_EQ(0, arena.block_bytes_allocated());
+
+ // Zero byte allocations should return nullptr and not allocate bytes.
+ auto zero_ptr = reinterpret_cast<uintptr_t>(arena.AllocateBytes(0));
+ EXPECT_EQ(0, zero_ptr);
+ EXPECT_EQ(0, arena.bytes_allocated());
+ EXPECT_EQ(0, arena.block_bytes_allocated());
+
+ arena.Clear();
+
+ // Allocations must be machine word aligned.
+ auto one_ptr = reinterpret_cast<uintptr_t>(arena.AllocateBytes(1));
+ EXPECT_NE(0, one_ptr);
+ EXPECT_EQ(0, one_ptr % sizeof(uintptr_t));
+ one_ptr = reinterpret_cast<uintptr_t>(arena.AllocateBytes(1));
+ EXPECT_NE(0, one_ptr);
+ EXPECT_EQ(0, one_ptr % sizeof(uintptr_t));
+ EXPECT_EQ(2, arena.bytes_allocated());
+ EXPECT_LT(2, arena.block_bytes_allocated());
+
+ arena.Clear();
+ EXPECT_EQ(0, arena.bytes_allocated());
+ EXPECT_EQ(0, arena.block_bytes_allocated());
+}
+
+// Tests typed allocations.
+TEST(ArenaTest, TypedAllocations) {
+ Arena arena(64);
+
+ EXPECT_NE(nullptr, arena.Allocate<int>());
+ EXPECT_EQ(4, arena.bytes_allocated());
+ EXPECT_EQ(64 + Arena::kBlockOverhead, arena.block_bytes_allocated());
+ arena.Clear();
+ EXPECT_EQ(0, arena.bytes_allocated());
+ EXPECT_EQ(0, arena.block_bytes_allocated());
+
+ struct MyType {
+ MyType() {}
+ explicit MyType(int initial_value) : value(initial_value) {}
+
+ int value = 5;
+ };
+ auto my_type_ptr = arena.Allocate<MyType>();
+ EXPECT_NE(nullptr, my_type_ptr);
+ EXPECT_EQ(sizeof(MyType), arena.bytes_allocated());
+ EXPECT_EQ(5, my_type_ptr->value); // Default ctor must be called.
+ arena.Clear();
+ EXPECT_EQ(0, arena.bytes_allocated());
+ EXPECT_EQ(0, arena.block_bytes_allocated());
+
+ my_type_ptr = arena.Allocate<MyType>(10);
+ EXPECT_NE(nullptr, my_type_ptr);
+ EXPECT_EQ(sizeof(MyType), arena.bytes_allocated());
+ EXPECT_EQ(10, my_type_ptr->value); // Ctor should have been called.
+ arena.Clear();
+ EXPECT_EQ(0, arena.bytes_allocated());
+ EXPECT_EQ(0, arena.block_bytes_allocated());
+}
+
+// Tests multiple blocks.
+TEST(ArenaTest, MultipleBlocks) {
+ Arena arena(16);
+ EXPECT_EQ(0, arena.bytes_allocated());
+ EXPECT_EQ(0, arena.block_bytes_allocated());
+
+ // Allocate one entire block.
+ EXPECT_NE(nullptr, arena.AllocateBytes(16));
+ EXPECT_EQ(16, arena.bytes_allocated());
+ EXPECT_EQ(16 + Arena::kBlockOverhead, arena.block_bytes_allocated());
+
+ // Allocate into the next block.
+ EXPECT_NE(nullptr, arena.AllocateBytes(16));
+ EXPECT_EQ(32, arena.bytes_allocated());
+ EXPECT_EQ(32 + 2 * Arena::kBlockOverhead, arena.block_bytes_allocated());
+
+ // Clear.
+ arena.Clear();
+ EXPECT_EQ(0, arena.bytes_allocated());
+ EXPECT_EQ(0, arena.block_bytes_allocated());
+
+ // Allocate again.
+ EXPECT_NE(nullptr, arena.AllocateBytes(16));
+ EXPECT_EQ(16, arena.bytes_allocated());
+ EXPECT_EQ(16 + Arena::kBlockOverhead, arena.block_bytes_allocated());
+ EXPECT_NE(nullptr, arena.AllocateBytes(16));
+ EXPECT_EQ(32, arena.bytes_allocated());
+ EXPECT_EQ(32 + 2 * Arena::kBlockOverhead, arena.block_bytes_allocated());
+}
+
+// Tests fast reset.
+TEST(ArenaTest, FastReset) {
+ Arena arena(16);
+ EXPECT_EQ(0, arena.bytes_allocated());
+ EXPECT_EQ(0, arena.block_bytes_allocated());
+
+ // Allocate one entire block.
+ EXPECT_NE(nullptr, arena.AllocateBytes(16));
+ EXPECT_EQ(16, arena.bytes_allocated());
+ EXPECT_EQ(16 + Arena::kBlockOverhead, arena.block_bytes_allocated());
+
+ // Allocate into the next block.
+ EXPECT_NE(nullptr, arena.AllocateBytes(16));
+ EXPECT_EQ(32, arena.bytes_allocated());
+ EXPECT_EQ(32 + 2 * Arena::kBlockOverhead, arena.block_bytes_allocated());
+
+ // Reset (without deallocating).
+ arena.Reset();
+ EXPECT_EQ(0, arena.bytes_allocated());
+ EXPECT_EQ(32 + 2 * Arena::kBlockOverhead, arena.block_bytes_allocated());
+
+ // Allocate again.
+ EXPECT_NE(nullptr, arena.AllocateBytes(16));
+ EXPECT_EQ(16, arena.bytes_allocated());
+ EXPECT_EQ(32 + 2 * Arena::kBlockOverhead, arena.block_bytes_allocated());
+ EXPECT_NE(nullptr, arena.AllocateBytes(16));
+ EXPECT_EQ(32, arena.bytes_allocated());
+ EXPECT_EQ(32 + 2 * Arena::kBlockOverhead, arena.block_bytes_allocated());
+}
+
+} // namespace
+} // namespace iree
diff --git a/runtime/src/iree/hal/vulkan/util/intrusive_list.h b/runtime/src/iree/hal/vulkan/util/intrusive_list.h
new file mode 100644
index 0000000..ff5d5fe
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/util/intrusive_list.h
@@ -0,0 +1,750 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Doubly linked list using element interior storage.
+// This has the performance of std::list (that means O(1) on insert and remove)
+// but performs no allocations and has better caching behavior.
+//
+// Elements are maintained in lists by way of IntrusiveListLinks, with each link
+// allowing the element to exist in one list simultaneously. In the most simple
+// case subclassing IntrusiveLinkBase will let the type be added to a list with
+// little boilerplate. If an element must be in more than one list
+// simultaneously IntrusiveListLinks can be added as members.
+//
+// Usage (simple):
+// class MySimpleElement : public IntrusiveLinkBase {};
+// IntrusiveList<MySimpleElement> list;
+// list.push_back(new MySimpleElement());
+// for (auto element : list) { ... }
+//
+// Usage (multiple lists):
+// class MultiElement {
+// public:
+// IntrusiveListLink list_link_a;
+// IntrusiveListLink list_link_b;
+// };
+// IntrusiveList<MultiElement, offsetof(MultiElement, list_link_a)> list_a;
+// IntrusiveList<MultiElement, offsetof(MultiElement, list_link_b)> list_b;
+//
+// By default elements in the list are not retained and must be kept alive
+// externally. For automatic memory management there are specializations for
+// std::unique_ptr.
+//
+// Usage (unique_ptr):
+// IntrusiveList<std::unique_ptr<MyElement>> list;
+// list.push_back(std::make_unique<MyElement>());
+// std::unique_ptr<MyElement> elm = list.take(list.front());
+//
+// This type is thread-unsafe.
+
+#ifndef IREE_HAL_VULKAN_UTIL_INTRUSIVE_LIST_H_
+#define IREE_HAL_VULKAN_UTIL_INTRUSIVE_LIST_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <utility>
+
+#include "iree/base/logging.h"
+
+namespace iree {
+
+// Define to enable extensive checks after each mutation of the intrusive list.
+// #define IREE_PARANOID_INTRUSIVE_LIST
+
+// Storage for the doubly-linked list.
+// This is embedded within all elements in an intrusive list.
+struct IntrusiveListLink {
+ IntrusiveListLink* prev = nullptr;
+ IntrusiveListLink* next = nullptr;
+
+ IntrusiveListLink() = default;
+
+ // Prevent copies.
+ IntrusiveListLink(const IntrusiveListLink&) = delete;
+ IntrusiveListLink& operator=(const IntrusiveListLink&) = delete;
+};
+
+template <class T>
+struct IntrusiveLinkBase : public T {
+ public:
+ IntrusiveListLink link;
+};
+
+template <>
+struct IntrusiveLinkBase<void> {
+ public:
+ IntrusiveListLink link;
+};
+
+// Base type for intrusive lists.
+// This is either used directly when the list is on naked pointers or
+// specialized to std::unique_ptr.
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+class IntrusiveListBase {
+ public:
+ using self_type = IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>;
+
+ IntrusiveListBase() = default;
+ virtual ~IntrusiveListBase() { clear(); }
+
+ // Prevent copies.
+ IntrusiveListBase(const IntrusiveListBase&) = delete;
+ IntrusiveListBase& operator=(const IntrusiveListBase&) = delete;
+
+ // Returns true if the list is empty.
+ // Performance: O(1)
+ constexpr bool empty() const { return head_ == nullptr; }
+
+ // Returns the total number of items in the list.
+ // Performance: O(1)
+ constexpr size_t size() const { return count_; }
+
+ // Returns true if the given item is contained within the list.
+ // Performance: O(n)
+ bool contains(T* value) const;
+
+ // Appends the contents of the given list to this one.
+ // The |other_list| is cleared.
+ // Performance: O(1)
+ void merge_from(self_type* other_list);
+
+ // Removes all items from the list.
+ // Performance: O(n)
+ void clear();
+
+ IteratorT begin() const { return IteratorT(head_); }
+ IteratorT end() const { return IteratorT(nullptr); }
+ ReverseIteratorT rbegin() const { return ReverseIteratorT(tail_); }
+ ReverseIteratorT rend() const { return ReverseIteratorT(nullptr); }
+
+ // Returns the next item in the list relative to the given item.
+ // |value| must exist in the list.
+ // Performance: O(1)
+ T* next(T* value) const;
+
+ // Returns the previous item in the list relative to the given item.
+ // |value| must exist in the list.
+ // Performance: O(1)
+ T* previous(T* value) const;
+
+ // Returns the item at the front of the list, if any.
+ // Performance: O(1)
+ T* front() const;
+
+ // Inserts an item at the front of the list.
+ // Performance: O(1)
+ void push_front(T* value);
+
+ // Removes the item at the front of the list.
+ // Performance: O(1)
+ void pop_front();
+
+ // Returns the item at the back of the list, if any.
+ // Performance: O(1)
+ T* back() const;
+
+ // Inserts an item at the back of the list.
+ // Performance: O(1)
+ void push_back(T* value);
+
+ // Removes the item at the back of the list.
+ // Performance: O(1)
+ void pop_back();
+
+ // Inserts an item into the list before the given iterator.
+ // Performance: O(1)
+ void insert(const IteratorT& it, T* value) { return insert(*it, value); }
+ void insert(T* position, T* value);
+
+ // Erases the given item from the list.
+ // Returns the item following the erased item, if any.
+ // Performance: O(1)
+ T* erase(T* value);
+
+ // Erases the item from the list at the given iterator.
+ // Performance: O(1)
+ IteratorT erase(const IteratorT& it);
+ ReverseIteratorT erase(const ReverseIteratorT& it);
+
+ // Replaces the item with a new item at the same position.
+ // |new_value| must not be contained in any list.
+ // Performance: O(1)
+ void replace(T* old_value, T* new_value);
+
+ // Sorts the list with the given comparison function.
+ // The sort function is the same as used by std::sort.
+ //
+ // Uses merge sort O(N log N) using the algorithm described here:
+ // http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html
+ void sort(bool (*compare_fn)(T* a, T* b));
+
+ protected:
+ // Called when an item is added to the list.
+ virtual void OnAdd(T* value) {}
+ // Called when an item is removed from the list.
+ virtual void OnRemove(T* value) {}
+ // Called when an item is removed and deallocated.
+ virtual void OnDeallocate(T* value) {}
+
+ // Performs expensive correctness checks on the list structure. It's too slow
+ // to use in normal builds (even dbg), so it should only be used when there's
+ // a suspected issue with an intrusive list. Define
+ // IREE_PARANOID_INTRUSIVE_LIST to enable.
+ void CheckCorrectness() const;
+
+ IntrusiveListLink* head_ = nullptr;
+ IntrusiveListLink* tail_ = nullptr;
+ size_t count_ = 0;
+};
+
+// Basic iterator for an IntrusiveList.
+template <typename T, size_t kOffset, bool kForward>
+class IntrusiveListIterator
+ : public std::iterator<std::input_iterator_tag, int> {
+ public:
+ using self_type = IntrusiveListIterator<T, kOffset, kForward>;
+
+ explicit IntrusiveListIterator(IntrusiveListLink* current)
+ : current_(current) {}
+ IntrusiveListIterator& operator++();
+ self_type operator++(int);
+ self_type& operator--();
+ self_type operator--(int);
+ bool operator==(const self_type& rhs) const;
+ bool operator!=(const self_type& rhs) const;
+ T* operator*() const;
+
+ protected:
+ IntrusiveListLink* current_;
+};
+
+// Specialized IntrusiveListBase used for unreferenced naked pointers.
+// This very thinly wraps the base type and does no special memory management.
+template <typename T, size_t kOffset>
+class IntrusiveListUnrefBase
+ : public IntrusiveListBase<T, IntrusiveListIterator<T, kOffset, true>,
+ IntrusiveListIterator<T, kOffset, false>,
+ kOffset> {
+ public:
+ using IteratorT = IntrusiveListIterator<T, kOffset, true>;
+ using ReverseIteratorT = IntrusiveListIterator<T, kOffset, false>;
+ using base_list = IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>;
+
+ using base_list::clear;
+
+ // Removes all items from the list and calls the given deleter function for
+ // each of them. The built-in OnDeallocate will not be used.
+ // Performance: O(n)
+ void clear(const std::function<void(T*)>& deleter);
+
+ private:
+ using base_list::count_;
+ using base_list::head_;
+ using base_list::tail_;
+};
+
+constexpr size_t kUseDefaultLinkOffset = std::numeric_limits<size_t>::max();
+
+// IntrusiveList for raw pointers with a specified offset.
+// Use this if there are multiple links within a type.
+//
+// Usage:
+// struct MyType {
+// IntrusiveListLink link_a;
+// IntrusiveListLink link_b;
+// };
+// IntrusiveList<MyType, offsetof(MyType, link_a)> list_a;
+// IntrusiveList<MyType, offsetof(MyType, link_b)> list_b;
+template <typename T, size_t kOffset = kUseDefaultLinkOffset>
+class IntrusiveList : public IntrusiveListUnrefBase<T, kOffset> {};
+
+// IntrusiveList for raw pointers.
+// Items added to the list will not be owned by the list and must be freed by
+// the caller.
+//
+// Usage:
+// struct MyType : public IntrusiveListBase<void> {};
+// IntrusiveList<MyType> list;
+// auto* p = new MyType();
+// list.push_back(p); // p is not retained and won't be freed!
+// delete p;
+template <typename T>
+class IntrusiveList<T, kUseDefaultLinkOffset>
+ : public IntrusiveListUnrefBase<T, offsetof(T, link)> {};
+
+// -- implementation --
+
+namespace impl {
+
+// Maps an IntrusiveListLink to its containing type T.
+template <typename T, size_t kOffset>
+static inline T* LinkToT(IntrusiveListLink* link) {
+ if (link) {
+ return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(link) - kOffset);
+ } else {
+ return nullptr;
+ }
+}
+
+// Maps a containing type T to its IntrusiveListLink.
+template <typename T, size_t kOffset>
+static inline IntrusiveListLink* TToLink(T* value) {
+ if (value) {
+ return reinterpret_cast<IntrusiveListLink*>(
+ reinterpret_cast<uintptr_t>(value) + kOffset);
+ } else {
+ return nullptr;
+ }
+}
+
+} // namespace impl
+
+template <typename T, size_t kOffset, bool kForward>
+IntrusiveListIterator<T, kOffset, kForward>&
+IntrusiveListIterator<T, kOffset, kForward>::operator++() {
+ if (current_) {
+ current_ = kForward ? current_->next : current_->prev;
+ }
+ return *this;
+}
+
+template <typename T, size_t kOffset, bool kForward>
+IntrusiveListIterator<T, kOffset, kForward>
+IntrusiveListIterator<T, kOffset, kForward>::operator++(int) {
+ self_type tmp(current_);
+ operator++();
+ return tmp;
+}
+
+template <typename T, size_t kOffset, bool kForward>
+IntrusiveListIterator<T, kOffset, kForward>&
+IntrusiveListIterator<T, kOffset, kForward>::operator--() {
+ if (current_) {
+ current_ = kForward ? current_->prev : current_->next;
+ }
+ return *this;
+}
+
+template <typename T, size_t kOffset, bool kForward>
+IntrusiveListIterator<T, kOffset, kForward>
+IntrusiveListIterator<T, kOffset, kForward>::operator--(int) {
+ self_type tmp(current_);
+ operator--();
+ return tmp;
+}
+
+template <typename T, size_t kOffset, bool kForward>
+bool IntrusiveListIterator<T, kOffset, kForward>::operator==(
+ const self_type& rhs) const {
+ return rhs.current_ == current_;
+}
+
+template <typename T, size_t kOffset, bool kForward>
+bool IntrusiveListIterator<T, kOffset, kForward>::operator!=(
+ const self_type& rhs) const {
+ return !operator==(rhs);
+}
+
+template <typename T, size_t kOffset, bool kForward>
+T* IntrusiveListIterator<T, kOffset, kForward>::operator*() const {
+ return impl::LinkToT<T, kOffset>(current_);
+}
+
+template <typename T, size_t kOffset>
+void IntrusiveListUnrefBase<T, kOffset>::clear(
+ const std::function<void(T*)>& deleter) {
+ auto* link = head_;
+ while (link) {
+ auto* next = link->next;
+ link->prev = link->next = nullptr;
+ deleter(impl::LinkToT<T, kOffset>(link));
+ link = next;
+ }
+ head_ = tail_ = nullptr;
+ count_ = 0;
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+void IntrusiveListBase<T, IteratorT, ReverseIteratorT,
+ kOffset>::CheckCorrectness() const {
+#if defined(IREE_PARANOID_INTRUSIVE_LIST)
+ auto* link = head_;
+ IntrusiveListLink* previous = nullptr;
+ size_t actual_count = 0;
+ while (link) {
+ ++actual_count;
+ if (!link->prev) {
+ IREE_DCHECK_EQ(link, head_);
+ }
+ if (!link->next) {
+ IREE_DCHECK_EQ(link, tail_);
+ }
+ IREE_DCHECK_EQ(link->prev, previous);
+ previous = link;
+ link = link->next;
+ }
+ IREE_DCHECK_EQ(actual_count, count_);
+#endif // IREE_PARANOID_INTRUSIVE_LIST
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+bool IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::contains(
+ T* value) const {
+ if (!value) return false;
+ // TODO(benvanik): faster way of checking? requires list ptr in link?
+ auto* needle = impl::TToLink<T, kOffset>(value);
+ auto* link = head_;
+ while (link) {
+ if (link == needle) {
+ return true;
+ }
+ link = link->next;
+ }
+ return false;
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+void IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::merge_from(
+ self_type* other_list) {
+ if (tail_) {
+ tail_->next = other_list->head_;
+ }
+ if (other_list->head_) {
+ other_list->head_->prev = tail_;
+ }
+ if (!head_) {
+ head_ = other_list->head_;
+ }
+ tail_ = other_list->tail_;
+
+ other_list->head_ = nullptr;
+ other_list->tail_ = nullptr;
+
+ count_ += other_list->count_;
+ other_list->count_ = 0;
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+void IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::clear() {
+ auto* link = head_;
+ while (link) {
+ auto* next = link->next;
+ link->prev = link->next = nullptr;
+ OnDeallocate(impl::LinkToT<T, kOffset>(link));
+ link = next;
+ }
+ head_ = tail_ = nullptr;
+ count_ = 0;
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+inline T* IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::next(
+ T* value) const {
+ if (!value) {
+ return nullptr;
+ }
+ auto* link = impl::TToLink<T, kOffset>(value);
+ return impl::LinkToT<T, kOffset>(link->next);
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+inline T* IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::previous(
+ T* value) const {
+ if (!value) {
+ return nullptr;
+ }
+ auto* link = impl::TToLink<T, kOffset>(value);
+ return impl::LinkToT<T, kOffset>(link->prev);
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+inline T* IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::front()
+ const {
+ return impl::LinkToT<T, kOffset>(head_);
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+void IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::push_front(
+ T* value) {
+ IREE_DCHECK(value);
+ auto* link = impl::TToLink<T, kOffset>(value);
+ IREE_DCHECK(!link->next);
+ IREE_DCHECK(!link->prev);
+ link->next = head_;
+ link->prev = nullptr;
+ head_ = link;
+ if (link->next) {
+ link->next->prev = link;
+ }
+ if (!tail_) {
+ tail_ = link;
+ }
+ ++count_;
+ OnAdd(value);
+ CheckCorrectness();
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+void IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::pop_front() {
+ IREE_DCHECK(head_);
+ auto* link = head_;
+ if (link) {
+ head_ = head_->next;
+ link->next = link->prev = nullptr;
+ if (head_) {
+ head_->prev = nullptr;
+ }
+ if (link == tail_) {
+ tail_ = nullptr;
+ }
+ --count_;
+ OnDeallocate(impl::LinkToT<T, kOffset>(link));
+ }
+ CheckCorrectness();
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+inline T* IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::back()
+ const {
+ return impl::LinkToT<T, kOffset>(tail_);
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+void IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::push_back(
+ T* value) {
+ IREE_DCHECK(value);
+ auto* link = impl::TToLink<T, kOffset>(value);
+ IREE_DCHECK(!link->next);
+ IREE_DCHECK(!link->prev);
+ link->prev = tail_;
+ link->next = nullptr;
+ tail_ = link;
+ if (link->prev) {
+ link->prev->next = link;
+ }
+ if (!head_) {
+ head_ = link;
+ }
+ ++count_;
+ OnAdd(value);
+ CheckCorrectness();
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+void IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::pop_back() {
+ IREE_DCHECK(tail_);
+ auto* link = tail_;
+ if (link) {
+ tail_ = tail_->prev;
+ link->next = link->prev = nullptr;
+ if (tail_) {
+ tail_->next = nullptr;
+ }
+ if (link == head_) {
+ head_ = nullptr;
+ }
+ --count_;
+ OnDeallocate(impl::LinkToT<T, kOffset>(link));
+ }
+ CheckCorrectness();
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+void IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::insert(
+ T* position, T* value) {
+ IREE_DCHECK(value);
+ auto* link = impl::TToLink<T, kOffset>(value);
+ auto* position_link = impl::TToLink<T, kOffset>(position);
+ IREE_DCHECK(!link->next);
+ IREE_DCHECK(!link->prev);
+
+ if (position_link == head_) {
+ push_front(value);
+ } else if (position_link == nullptr) {
+ push_back(value);
+ } else {
+ link->next = position_link;
+ link->prev = position_link->prev;
+ position_link->prev->next = link;
+ position_link->prev = link;
+ ++count_;
+ OnAdd(value);
+ }
+ CheckCorrectness();
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+T* IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::erase(T* value) {
+ if (!value) {
+ return nullptr;
+ }
+ auto* link = impl::TToLink<T, kOffset>(value);
+ if (link->prev) {
+ IREE_DCHECK_NE(link, head_);
+ link->prev->next = link->next;
+ } else {
+ IREE_DCHECK_EQ(link, head_);
+ head_ = link->next;
+ }
+ if (link->next) {
+ IREE_DCHECK_NE(link, tail_);
+ link->next->prev = link->prev;
+ } else {
+ IREE_DCHECK_EQ(link, tail_);
+ tail_ = link->prev;
+ }
+ auto* next = link->next;
+ link->next = link->prev = nullptr;
+ --count_;
+ OnDeallocate(value);
+ CheckCorrectness();
+ return impl::LinkToT<T, kOffset>(next);
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+IteratorT IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::erase(
+ const IteratorT& it) {
+ return IteratorT(impl::TToLink<T, kOffset>(erase(*it)));
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+ReverseIteratorT IntrusiveListBase<T, IteratorT, ReverseIteratorT,
+ kOffset>::erase(const ReverseIteratorT& it) {
+ return ReverseIteratorT(impl::TToLink<T, kOffset>(erase(*it)));
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+void IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::replace(
+ T* old_value, T* new_value) {
+ IREE_DCHECK(old_value);
+ IREE_DCHECK(new_value);
+ IREE_DCHECK_NE(old_value, new_value);
+ auto* old_link = impl::TToLink<T, kOffset>(old_value);
+ auto* new_link = impl::TToLink<T, kOffset>(new_value);
+ new_link->next = old_link->next;
+ new_link->prev = old_link->prev;
+ if (new_link->prev) {
+ new_link->prev->next = new_link;
+ } else {
+ head_ = new_link;
+ }
+ if (new_link->next) {
+ new_link->next->prev = new_link;
+ } else {
+ tail_ = new_link;
+ }
+ old_link->next = old_link->prev = nullptr;
+ OnAdd(new_value);
+ OnDeallocate(old_value);
+ CheckCorrectness();
+}
+
+template <typename T, typename IteratorT, typename ReverseIteratorT,
+ size_t kOffset>
+void IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>::sort(
+ bool (*compare_fn)(T* a, T* b)) {
+ if (empty()) {
+ // Empty list no-op.
+ return;
+ }
+ // Repeatedly run until the list is sorted.
+ int in_size = 1;
+ while (true) {
+ IntrusiveListLink* p = head_;
+ IntrusiveListLink* q = nullptr;
+ IntrusiveListLink* e = nullptr;
+ IntrusiveListLink* tail = nullptr;
+ head_ = nullptr;
+ tail_ = nullptr;
+ // Repeatedly merge sublists.
+ int merge_count = 0;
+ do {
+ ++merge_count;
+ q = p;
+ // Determine the size of the first part and find the second.
+ int p_size = 0;
+ for (int i = 0; i < in_size; ++i) {
+ ++p_size;
+ q = q->next;
+ if (!q) {
+ break;
+ }
+ }
+ // Merge the two lists (if we have two).
+ int q_size = in_size;
+ while (p_size > 0 || (q_size > 0 && q)) {
+ if (p_size == 0) {
+ // p is empty; e must come from q.
+ e = q;
+ q = q->next;
+ --q_size;
+ } else if (q_size == 0 || !q) {
+ // q is empty; e must come from p.
+ e = p;
+ p = p->next;
+ --p_size;
+ } else if (compare_fn(impl::LinkToT<T, kOffset>(p),
+ impl::LinkToT<T, kOffset>(q))) {
+ // p <= q; e must come from p.
+ e = p;
+ p = p->next;
+ --p_size;
+ } else {
+ // q < p; e must come from q.
+ e = q;
+ q = q->next;
+ --q_size;
+ }
+ // Append e to the merged list.
+ if (tail) {
+ tail->next = e;
+ } else {
+ head_ = e;
+ }
+ e->prev = tail;
+ tail = e;
+ }
+ p = q;
+ } while (p);
+ tail->next = nullptr;
+ if (merge_count <= 1) {
+ // List is now sorted; stash and return.
+ tail_ = tail;
+ CheckCorrectness();
+ return;
+ }
+ // Run merge again with larger lists.
+ in_size *= 2;
+ }
+}
+
+} // namespace iree
+
+// Specializations:
+#include "iree/hal/vulkan/util/intrusive_list_unique_ptr.inc"
+
+#endif // IREE_HAL_VULKAN_UTIL_INTRUSIVE_LIST_H_
diff --git a/runtime/src/iree/hal/vulkan/util/intrusive_list_test.cc b/runtime/src/iree/hal/vulkan/util/intrusive_list_test.cc
new file mode 100644
index 0000000..ad5dee5
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/util/intrusive_list_test.cc
@@ -0,0 +1,537 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/util/intrusive_list.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "iree/testing/gtest.h"
+
+namespace iree {
+namespace {
+
+using ::testing::ElementsAre;
+
+struct Item {
+ size_t some_data_0;
+ IntrusiveListLink list_a;
+ size_t some_data_1;
+ IntrusiveListLink list_b;
+ size_t some_data_2;
+ int value;
+
+ static constexpr size_t kToken = 0xDEADBEEF;
+ explicit Item(int value)
+ : some_data_0(kToken),
+ some_data_1(kToken),
+ some_data_2(kToken),
+ value(value) {}
+ bool is_valid() {
+ return some_data_0 == kToken && some_data_1 == kToken &&
+ some_data_2 == kToken;
+ }
+};
+
+template <typename T, size_t V>
+std::vector<T*> ExtractItems(const IntrusiveList<T, V>& list) {
+ std::vector<T*> items;
+ for (auto* item : list) {
+ items.push_back(item);
+ }
+ return items;
+}
+
+template <typename T, size_t V>
+std::vector<int> ExtractValues(const IntrusiveList<T, V>& list) {
+ std::vector<int> values;
+ for (auto* item : list) {
+ values.push_back(item->value);
+ }
+ return values;
+}
+
+template <typename T, size_t V>
+std::vector<int> ExtractValuesMutable(const IntrusiveList<T, V>& list) {
+ std::vector<int> values;
+ for (auto* item : list) {
+ values.push_back(item->value);
+ }
+ return values;
+}
+
+TEST(IntrusiveListTest, PushPopItems) {
+ Item item1(1);
+ Item item2(2);
+ Item item3(3);
+ Item item4(4);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items;
+ EXPECT_TRUE(items.empty());
+ EXPECT_EQ(items.size(), 0u);
+ EXPECT_EQ(items.front(), nullptr);
+ EXPECT_EQ(items.back(), nullptr);
+ EXPECT_TRUE(items.begin() == items.end());
+ items.push_front(&item1);
+ EXPECT_FALSE(items.empty());
+ EXPECT_EQ(items.size(), 1u);
+ EXPECT_EQ(items.front(), &item1);
+ EXPECT_EQ(items.back(), &item1);
+ EXPECT_FALSE(items.begin() == items.end());
+ items.push_front(&item2);
+ EXPECT_EQ(items.size(), 2u);
+ EXPECT_EQ(items.front(), &item2);
+ EXPECT_EQ(items.back(), &item1);
+ items.push_front(&item3);
+ EXPECT_EQ(items.size(), 3u);
+ EXPECT_EQ(items.front(), &item3);
+ EXPECT_EQ(items.back(), &item1);
+ EXPECT_THAT(ExtractValues(items), ElementsAre(3, 2, 1));
+
+ items.push_back(&item4);
+ EXPECT_EQ(items.size(), 4u);
+ EXPECT_EQ(items.front(), &item3);
+ EXPECT_EQ(items.back(), &item4);
+ EXPECT_THAT(ExtractValues(items), ElementsAre(3, 2, 1, 4));
+
+ items.pop_front();
+ EXPECT_EQ(items.size(), 3u);
+ EXPECT_EQ(items.front(), &item2);
+ EXPECT_EQ(items.back(), &item4);
+ EXPECT_THAT(ExtractValues(items), ElementsAre(2, 1, 4));
+
+ items.pop_back();
+ EXPECT_EQ(items.size(), 2u);
+ EXPECT_EQ(items.front(), &item2);
+ EXPECT_EQ(items.back(), &item1);
+ EXPECT_THAT(ExtractValues(items), ElementsAre(2, 1));
+
+ items.pop_back();
+ items.pop_front();
+ EXPECT_TRUE(items.empty());
+ EXPECT_EQ(items.size(), 0u);
+ EXPECT_EQ(items.front(), nullptr);
+ EXPECT_EQ(items.back(), nullptr);
+ EXPECT_TRUE(items.begin() == items.end());
+
+ EXPECT_TRUE(item1.is_valid());
+ EXPECT_TRUE(item2.is_valid());
+ EXPECT_TRUE(item3.is_valid());
+ EXPECT_TRUE(item4.is_valid());
+}
+
+TEST(IntrusiveListTest, Contains) {
+ Item item1(1);
+ Item item2(2);
+ Item item3(3);
+ Item item4(4);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items;
+ items.push_back(&item1);
+ items.push_back(&item2);
+ items.push_back(&item3);
+ // item4 omitted.
+
+ EXPECT_TRUE(items.contains(&item1));
+ EXPECT_TRUE(items.contains(&item2));
+ EXPECT_TRUE(items.contains(&item3));
+ EXPECT_FALSE(items.contains(&item4));
+
+ EXPECT_FALSE(items.contains(nullptr));
+}
+
+TEST(IntrusiveListTest, MergeFrom) {
+ Item item1(1);
+ Item item2(2);
+ Item item3(3);
+ Item item4(4);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items0;
+ items0.push_back(&item1);
+ items0.push_back(&item2);
+ items0.push_back(&item3);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items1;
+ items1.push_back(&item4);
+
+ items0.merge_from(&items1);
+ EXPECT_THAT(ExtractValues(items0), ElementsAre(1, 2, 3, 4));
+ EXPECT_TRUE(items1.empty());
+}
+
+TEST(IntrusiveListTest, MergeFromEmpty) {
+ IntrusiveList<Item, offsetof(Item, list_a)> items0;
+ IntrusiveList<Item, offsetof(Item, list_a)> items1;
+ items0.merge_from(&items1);
+}
+
+TEST(IntrusiveListTest, MergeFromAll) {
+ Item item1(1);
+ Item item2(2);
+ Item item3(3);
+ Item item4(4);
+ IntrusiveList<Item, offsetof(Item, list_a)> items0;
+ items0.push_back(&item1);
+ items0.push_back(&item2);
+ items0.push_back(&item3);
+ items0.push_back(&item4);
+ IntrusiveList<Item, offsetof(Item, list_a)> items1;
+
+ // Merge all items from items1 into items0. Shouldn't change anything.
+ items0.merge_from(&items1);
+ EXPECT_THAT(ExtractValues(items0), ElementsAre(1, 2, 3, 4));
+ EXPECT_TRUE(items1.empty());
+
+ // Merge all items from items0 into items1. Should move everything.
+ items1.merge_from(&items0);
+ EXPECT_TRUE(items0.empty());
+ EXPECT_THAT(ExtractValues(items1), ElementsAre(1, 2, 3, 4));
+}
+
+TEST(IntrusiveListTest, Erase) {
+ Item item1(1);
+ Item item2(2);
+ Item item3(3);
+ Item item4(4);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items;
+ items.push_back(&item1);
+ items.push_back(&item2);
+ items.push_back(&item3);
+ items.push_back(&item4);
+
+ EXPECT_THAT(ExtractValues(items), ElementsAre(1, 2, 3, 4));
+ items.erase(&item3);
+ EXPECT_THAT(ExtractValues(items), ElementsAre(1, 2, 4));
+ items.erase(&item1);
+ EXPECT_THAT(ExtractValues(items), ElementsAre(2, 4));
+ items.erase(&item4);
+ EXPECT_THAT(ExtractValues(items), ElementsAre(2));
+ items.erase(&item2);
+ EXPECT_TRUE(items.empty());
+
+ items.push_back(&item1);
+ items.push_back(&item2);
+ items.push_back(&item3);
+ items.push_back(&item4);
+
+ EXPECT_THAT(ExtractValues(items), ElementsAre(1, 2, 3, 4));
+ auto it = items.begin();
+ items.erase(it);
+ EXPECT_THAT(ExtractValues(items), ElementsAre(2, 3, 4));
+ it = items.end();
+ items.erase(it);
+ EXPECT_THAT(ExtractValues(items), ElementsAre(2, 3, 4));
+ it = items.begin();
+ ++it;
+ items.erase(it);
+ EXPECT_THAT(ExtractValues(items), ElementsAre(2, 4));
+
+ it = items.begin();
+ it = items.erase(it);
+ EXPECT_EQ(4, (*it)->value);
+ EXPECT_THAT(ExtractValues(items), ElementsAre(4));
+ it = items.erase(it);
+ EXPECT_TRUE(items.empty());
+ EXPECT_EQ(items.end(), it);
+}
+
+TEST(IntrusiveListTest, MultipleLists) {
+ Item item1(1);
+ Item item2(2);
+ Item item3(3);
+ Item item4(4);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items_a;
+ IntrusiveList<Item, offsetof(Item, list_b)> items_b;
+ items_a.push_back(&item1);
+ items_a.push_back(&item2);
+ items_a.push_back(&item3);
+ items_a.push_back(&item4);
+ items_b.push_front(&item1);
+ items_b.push_front(&item2);
+ items_b.push_front(&item3);
+ items_b.push_front(&item4);
+ EXPECT_THAT(ExtractValues(items_a), ElementsAre(1, 2, 3, 4));
+ EXPECT_THAT(ExtractValues(items_b), ElementsAre(4, 3, 2, 1));
+ items_b.erase(&item3);
+ EXPECT_THAT(ExtractValues(items_a), ElementsAre(1, 2, 3, 4));
+ EXPECT_THAT(ExtractValues(items_b), ElementsAre(4, 2, 1));
+ items_a.pop_back();
+ EXPECT_THAT(ExtractValues(items_a), ElementsAre(1, 2, 3));
+ EXPECT_THAT(ExtractValues(items_b), ElementsAre(4, 2, 1));
+}
+
+TEST(IntrusiveListTest, MutableIterator) {
+ Item item1(1);
+ Item item2(2);
+ Item item3(3);
+ Item item4(4);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items;
+ items.push_back(&item4);
+ items.push_front(&item1);
+ items.push_front(&item2);
+ items.push_front(&item3);
+
+ EXPECT_THAT(ExtractValuesMutable(items), ElementsAre(3, 2, 1, 4));
+}
+
+struct BaseType {
+ explicit BaseType(int value) : value(value) {}
+ int value;
+ IntrusiveListLink base_link;
+};
+struct SubType : public BaseType {
+ explicit SubType(int value) : BaseType(value) {}
+ IntrusiveListLink sub_link;
+};
+TEST(IntrusiveListTest, SimpleType) {
+ SubType item1(1);
+ SubType item2(2);
+ SubType item3(3);
+ SubType item4(4);
+
+ IntrusiveList<BaseType, offsetof(BaseType, base_link)> items_a;
+ items_a.push_front(&item1);
+ items_a.push_front(&item2);
+ items_a.push_front(&item3);
+ items_a.push_front(&item4);
+ EXPECT_THAT(ExtractValues(items_a), ElementsAre(4, 3, 2, 1));
+
+ IntrusiveList<SubType, offsetof(SubType, sub_link)> items_b;
+ items_b.push_back(&item1);
+ items_b.push_back(&item2);
+ items_b.push_back(&item3);
+ items_b.push_back(&item4);
+ EXPECT_THAT(ExtractValues(items_b), ElementsAre(1, 2, 3, 4));
+}
+
+struct AbstractType {
+ explicit AbstractType(int value) : value(value) {}
+ virtual ~AbstractType() = default;
+ virtual int DoSomething() = 0;
+ int value;
+ IntrusiveListLink base_link;
+};
+struct ImplType : public AbstractType {
+ explicit ImplType(int value) : AbstractType(value) {}
+ int DoSomething() override { return value; }
+ IntrusiveListLink sub_link;
+};
+
+TEST(IntrusiveListTest, ComplexType) {
+ ImplType item1(1);
+ ImplType item2(2);
+ ImplType item3(3);
+ ImplType item4(4);
+
+ IntrusiveList<AbstractType, offsetof(AbstractType, base_link)> items_a;
+ items_a.push_front(&item1);
+ items_a.push_front(&item2);
+ items_a.push_front(&item3);
+ items_a.push_front(&item4);
+ EXPECT_THAT(ExtractValues(items_a), ElementsAre(4, 3, 2, 1));
+
+ IntrusiveList<ImplType, offsetof(ImplType, sub_link)> items_b;
+ items_b.push_back(&item1);
+ items_b.push_back(&item2);
+ items_b.push_back(&item3);
+ items_b.push_back(&item4);
+ EXPECT_THAT(ExtractValues(items_b), ElementsAre(1, 2, 3, 4));
+}
+
+bool Comparison(Item* a, Item* b) { return a->value < b->value; }
+
+TEST(IntrusiveListTest, Inserting) {
+ Item item1(1);
+ Item item2(2);
+ Item item3(3);
+ Item item4(4);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items;
+ items.insert(items.end(), &item3);
+ items.insert(items.begin(), &item1);
+ items.insert(items.end(), &item4);
+
+ auto pos = std::upper_bound(items.begin(), items.end(), &item2, Comparison);
+ items.insert(pos, &item2);
+
+ EXPECT_THAT(ExtractValues(items), ElementsAre(1, 2, 3, 4));
+}
+
+TEST(IntrusiveListTest, Iteration) {
+ Item item1(1);
+ Item item2(2);
+ Item item3(3);
+ Item item4(4);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items;
+ items.push_back(&item1);
+ items.push_back(&item2);
+ items.push_back(&item3);
+ items.push_back(&item4);
+
+ std::vector<int> regular;
+ for (auto it = items.begin(); it != items.end(); ++it) {
+ regular.push_back((*it)->value);
+ }
+ EXPECT_THAT(regular, ElementsAre(1, 2, 3, 4));
+
+ std::vector<int> reverse;
+ for (auto rit = items.rbegin(); rit != items.rend(); ++rit) {
+ reverse.push_back((*rit)->value);
+ }
+ EXPECT_THAT(reverse, ElementsAre(4, 3, 2, 1));
+}
+
+TEST(IntrusiveListTest, NextPrevious) {
+ Item item1(1);
+ Item item2(2);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items;
+ EXPECT_EQ(nullptr, items.previous(nullptr));
+ EXPECT_EQ(nullptr, items.next(nullptr));
+
+ items.push_back(&item1);
+ EXPECT_EQ(nullptr, items.previous(&item1));
+ EXPECT_EQ(nullptr, items.next(&item1));
+
+ items.push_back(&item2);
+ EXPECT_EQ(nullptr, items.previous(&item1));
+ EXPECT_EQ(&item2, items.next(&item1));
+ EXPECT_EQ(&item1, items.previous(&item2));
+ EXPECT_EQ(nullptr, items.next(&item2));
+}
+
+TEST(IntrusiveListTest, Clear) {
+ Item item1(1);
+ Item item2(2);
+ Item item3(3);
+ Item item4(4);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items;
+
+ // Empty clear.
+ items.clear();
+ EXPECT_TRUE(items.empty());
+
+ // 1 item clear.
+ items.push_back(&item1);
+ items.clear();
+ EXPECT_TRUE(items.empty());
+
+ // Multi-item clear.
+ items.push_back(&item1);
+ items.push_back(&item2);
+ items.push_back(&item3);
+ items.push_back(&item4);
+ items.clear();
+ EXPECT_TRUE(items.empty());
+}
+
+TEST(IntrusiveListTest, ClearDeleter) {
+ Item item1(1);
+ Item item2(2);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items;
+
+ // No-op first.
+ int delete_count = 0;
+ items.clear([&](Item* item) { ++delete_count; });
+ EXPECT_EQ(0, delete_count);
+
+ // Now with items.
+ items.push_back(&item1);
+ items.push_back(&item2);
+ items.clear([&](Item* item) { ++delete_count; });
+ EXPECT_EQ(2, delete_count);
+ EXPECT_TRUE(items.empty());
+}
+
+TEST(IntrusiveListTest, Replace) {
+ Item item1(1);
+ Item item2(2);
+ Item item3(3);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items;
+ items.push_back(&item1);
+ items.push_back(&item2);
+
+ items.replace(&item1, &item3);
+ EXPECT_THAT(ExtractValues(items), ElementsAre(3, 2));
+ EXPECT_FALSE(items.contains(&item1));
+ items.replace(&item2, &item1);
+ EXPECT_THAT(ExtractValues(items), ElementsAre(3, 1));
+ EXPECT_FALSE(items.contains(&item2));
+}
+
+TEST(IntrusiveListTest, Sort) {
+ Item item1(1);
+ Item item2(2);
+ Item item3(3);
+ Item item4(4);
+
+ IntrusiveList<Item, offsetof(Item, list_a)> items;
+
+ // Empty sort.
+ items.sort([](Item* a, Item* b) { return a->value < b->value; });
+
+ // Single item sort.
+ items.clear();
+ items.push_back(&item1);
+ items.sort([](Item* a, Item* b) { return a->value < b->value; });
+ EXPECT_THAT(ExtractValues(items), ElementsAre(1));
+
+ // Already sorted.
+ items.clear();
+ items.push_back(&item1);
+ items.push_back(&item2);
+ items.push_back(&item3);
+ items.push_back(&item4);
+ items.sort([](Item* a, Item* b) { return a->value < b->value; });
+ EXPECT_THAT(ExtractValues(items), ElementsAre(1, 2, 3, 4));
+
+ // Reverse.
+ items.clear();
+ items.push_back(&item4);
+ items.push_back(&item3);
+ items.push_back(&item2);
+ items.push_back(&item1);
+ items.sort([](Item* a, Item* b) { return a->value < b->value; });
+ EXPECT_THAT(ExtractValues(items), ElementsAre(1, 2, 3, 4));
+
+ // Random.
+ items.clear();
+ items.push_back(&item2);
+ items.push_back(&item4);
+ items.push_back(&item1);
+ items.push_back(&item3);
+ items.sort([](Item* a, Item* b) { return a->value < b->value; });
+ EXPECT_THAT(ExtractValues(items), ElementsAre(1, 2, 3, 4));
+
+ // Stability.
+ Item item1a(1);
+ Item item2a(2);
+ items.clear();
+ items.push_back(&item2);
+ items.push_back(&item4);
+ items.push_back(&item1);
+ items.push_back(&item3);
+ items.push_back(&item1a);
+ items.push_back(&item2a);
+ items.sort([](Item* a, Item* b) { return a->value <= b->value; });
+ EXPECT_THAT(ExtractValues(items), ElementsAre(1, 1, 2, 2, 3, 4));
+ auto items_vector = ExtractItems(items);
+ EXPECT_EQ(&item1, items_vector[0]);
+ EXPECT_EQ(&item1a, items_vector[1]);
+ EXPECT_EQ(&item2, items_vector[2]);
+ EXPECT_EQ(&item2a, items_vector[3]);
+ items.clear();
+}
+
+} // namespace
+} // namespace iree
diff --git a/runtime/src/iree/hal/vulkan/util/intrusive_list_unique_ptr.inc b/runtime/src/iree/hal/vulkan/util/intrusive_list_unique_ptr.inc
new file mode 100644
index 0000000..c0011fa
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/util/intrusive_list_unique_ptr.inc
@@ -0,0 +1,137 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// IWYU pragma: private, include "iree/hal/vulkan/util/intrusive_list.h"
+
+#ifndef IREE_HAL_VULKAN_UTIL_INTRUSIVE_LIST_UNIQUE_PTR_H_
+#define IREE_HAL_VULKAN_UTIL_INTRUSIVE_LIST_UNIQUE_PTR_H_
+
+#include <cstddef>
+#include <memory>
+
+#include "iree/base/logging.h"
+#include "iree/hal/vulkan/util/intrusive_list.h"
+
+namespace iree {
+
+// Specialized IntrusiveListBase for std::unique_ptr types.
+// This makes the list methods accept std::unique_ptrs and contains a special
+// take() method that takes ownership of a list item.
+template <typename T, size_t kOffset>
+class IntrusiveListUniquePtrBase
+ : private IntrusiveListBase<T, IntrusiveListIterator<T, kOffset, true>,
+ IntrusiveListIterator<T, kOffset, false>,
+ kOffset> {
+ public:
+ using IteratorT = IntrusiveListIterator<T, kOffset, true>;
+ using ReverseIteratorT = IntrusiveListIterator<T, kOffset, false>;
+ using base_list = IntrusiveListBase<T, IteratorT, ReverseIteratorT, kOffset>;
+ using self_type = IntrusiveListUniquePtrBase<T, kOffset>;
+
+ IntrusiveListUniquePtrBase() = default;
+
+ using base_list::empty;
+ using base_list::size;
+
+ using base_list::contains;
+
+ inline void merge_from(self_type* other_list) {
+ return base_list::merge_from(static_cast<base_list*>(other_list));
+ }
+
+ using base_list::clear;
+
+ using base_list::begin;
+ using base_list::end;
+ using base_list::rbegin;
+ using base_list::rend;
+
+ using base_list::next;
+
+ using base_list::previous;
+
+ using base_list::front;
+
+ void push_front(std::unique_ptr<T> value) {
+ base_list::push_front(value.release());
+ }
+
+ using base_list::pop_front;
+
+ using base_list::back;
+
+ void push_back(std::unique_ptr<T> value) {
+ base_list::push_back(value.release());
+ }
+
+ using base_list::pop_back;
+
+ void insert(const IteratorT& it, std::unique_ptr<T> value) {
+ base_list::insert(it, value.release());
+ }
+
+ using base_list::erase;
+
+ // Removes an item from the list at the given iterator and transfers ownership
+ // to the caller.
+ // Performance: O(1)
+ std::unique_ptr<T> take(IteratorT& it) { // NOLINT(runtime/references)
+ return take(*it);
+ }
+
+ // Removes an item from the list and transfers ownership to the caller.
+ // Performance: O(1)
+ std::unique_ptr<T> take(T* value) {
+ if (!value) {
+ return {nullptr};
+ }
+ auto* link = impl::TToLink<T, kOffset>(value);
+ if (link->prev) {
+ IREE_DCHECK_NE(link, head_);
+ link->prev->next = link->next;
+ } else {
+ IREE_DCHECK_EQ(link, head_);
+ head_ = link->next;
+ }
+ if (link->next) {
+ IREE_DCHECK_NE(link, tail_);
+ link->next->prev = link->prev;
+ } else {
+ IREE_DCHECK_EQ(link, tail_);
+ tail_ = link->prev;
+ }
+ link->next = link->prev = nullptr;
+ --count_;
+ base_list::OnRemove(value);
+ base_list::CheckCorrectness();
+ return std::unique_ptr<T>(value);
+ }
+
+ void replace(T* old_value, std::unique_ptr<T> new_value) {
+ base_list::replace(old_value, new_value.release());
+ }
+
+ using base_list::sort;
+
+ private:
+ void OnDeallocate(T* value) override { delete value; }
+
+ using base_list::count_;
+ using base_list::head_;
+ using base_list::tail_;
+};
+
+template <typename U, size_t kOffset>
+class IntrusiveList<std::unique_ptr<U>, kOffset>
+ : public IntrusiveListUniquePtrBase<U, kOffset> {};
+
+template <typename U>
+class IntrusiveList<std::unique_ptr<U>, kUseDefaultLinkOffset>
+ : public IntrusiveListUniquePtrBase<U, offsetof(U, link)> {};
+
+} // namespace iree
+
+#endif // IREE_HAL_VULKAN_UTIL_INTRUSIVE_LIST_UNIQUE_PTR_H_
diff --git a/runtime/src/iree/hal/vulkan/util/intrusive_list_unique_ptr_test.cc b/runtime/src/iree/hal/vulkan/util/intrusive_list_unique_ptr_test.cc
new file mode 100644
index 0000000..9596368
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/util/intrusive_list_unique_ptr_test.cc
@@ -0,0 +1,77 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <memory>
+
+#include "iree/hal/vulkan/util/intrusive_list.h"
+#include "iree/testing/gtest.h"
+
+namespace iree {
+namespace {
+
+struct AllocatedType : public IntrusiveLinkBase<void> {
+ AllocatedType() { ++alloc_count; }
+ ~AllocatedType() { --alloc_count; }
+ static int alloc_count;
+};
+int AllocatedType::alloc_count = 0;
+
+TEST(IntrusiveListUniquePtrTest, UniquePtr) {
+ AllocatedType::alloc_count = 0;
+
+ // Push/clear.
+ IntrusiveList<std::unique_ptr<AllocatedType>> list;
+ EXPECT_EQ(0, AllocatedType::alloc_count);
+ list.push_back(std::make_unique<AllocatedType>());
+ EXPECT_EQ(1, AllocatedType::alloc_count);
+ EXPECT_NE(nullptr, list.front());
+ list.clear();
+ EXPECT_EQ(0, AllocatedType::alloc_count);
+
+ // Push/pop.
+ list.push_back(std::make_unique<AllocatedType>());
+ EXPECT_EQ(1, AllocatedType::alloc_count);
+ EXPECT_NE(nullptr, list.front());
+ for (auto item : list) {
+ EXPECT_EQ(item, list.front());
+ }
+ list.pop_back();
+ EXPECT_EQ(0, AllocatedType::alloc_count);
+
+ // Push/take.
+ list.push_back(std::make_unique<AllocatedType>());
+ EXPECT_EQ(1, AllocatedType::alloc_count);
+ EXPECT_NE(nullptr, list.front());
+ auto item = list.take(list.front());
+ EXPECT_TRUE(list.empty());
+ EXPECT_NE(nullptr, item.get());
+ EXPECT_EQ(1, AllocatedType::alloc_count);
+ item.reset();
+ EXPECT_EQ(0, AllocatedType::alloc_count);
+
+ // Push/replace.
+ list.push_back(std::make_unique<AllocatedType>());
+ EXPECT_EQ(1, AllocatedType::alloc_count);
+ list.replace(list.front(), std::make_unique<AllocatedType>());
+ EXPECT_EQ(1, AllocatedType::alloc_count);
+ list.clear();
+ EXPECT_EQ(0, AllocatedType::alloc_count);
+
+ // Iteration.
+ list.push_back(std::make_unique<AllocatedType>());
+ list.push_back(std::make_unique<AllocatedType>());
+ list.push_back(std::make_unique<AllocatedType>());
+ EXPECT_EQ(3, AllocatedType::alloc_count);
+ for (auto item : list) {
+ AllocatedType* item_ptr = item;
+ EXPECT_NE(nullptr, item_ptr);
+ }
+ list.clear();
+ EXPECT_EQ(0, AllocatedType::alloc_count);
+}
+
+} // namespace
+} // namespace iree
diff --git a/runtime/src/iree/hal/vulkan/util/ref_ptr.h b/runtime/src/iree/hal/vulkan/util/ref_ptr.h
new file mode 100644
index 0000000..5bde1c9
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/util/ref_ptr.h
@@ -0,0 +1,383 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_UTIL_REF_PTR_H_
+#define IREE_HAL_VULKAN_UTIL_REF_PTR_H_
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <utility>
+
+#include "iree/base/attributes.h"
+#include "iree/base/logging.h"
+
+namespace iree {
+
+// Use this to get really verbose refptr logging:
+// #define IREE_VERBOSE_REF_PTR
+
+template <class T>
+class ref_ptr;
+
+// Allocates a new ref_ptr type.
+// Like make_unique, but for ref_ptr.
+//
+// Usage:
+// ref_ptr<MyType> p = make_ref<MyType>(1, 2, 3);
+template <typename T, typename... Args>
+ref_ptr<T> make_ref(Args&&... args) {
+ return ref_ptr<T>(new T(std::forward<Args>(args)...));
+}
+
+// Assigns a raw pointer to a ref_ptr without adding a reference.
+//
+// Usage:
+// ref_ptr<MyType> p = assign_ref(new MyType());
+template <typename T>
+inline ref_ptr<T> assign_ref(T* value) {
+ return ref_ptr<T>(value);
+}
+
+// Adds a reference to the given raw pointer.
+//
+// Usage:
+// MyType* raw_ptr = AcquirePointerFromSomewhere();
+// ref_ptr<MyType> p = add_ref(raw_ptr);
+template <typename T>
+inline ref_ptr<T> add_ref(T* value) {
+ if (value) ref_ptr_add_ref(value);
+ return ref_ptr<T>(value);
+}
+
+// Adds a reference to the given ref_ptr.
+//
+// Usage:
+// ref_ptr<MyType> a = make_ref<MyType>();
+// ref_ptr<MyType> p = add_ref(a);
+template <typename T>
+inline ref_ptr<T> add_ref(const ref_ptr<T>& value) {
+ if (value.get()) ref_ptr_add_ref(value.get());
+ return ref_ptr<T>(value.get());
+}
+
+// Reference counted pointer container.
+// This is modeled on boost::instrusive_ptr in that it requires no
+// extra storage over the pointer type and should compile to almost
+// no additional code. It also allows us to round-trip object pointers
+// through regular pointers, which is critical when having to round-trip
+// them through JNI/etc where we can't use things like unique_ptr/shared_ptr.
+//
+// ref_ptr<Foo> p1(new Foo()); // ref count 1
+// ref_ptr<Foo> p2(p1); // ref count 2
+// p1.reset(); // ref count 1
+// p2.reset(); // ref count 0, deleted
+//
+// When round-tripping the pointer through external APIs, use release():
+// ref_ptr<Foo> p1(new Foo()); // ref count 1
+// Foo* raw_p = p1.release(); // ref count 1
+// // pass to API
+// ref_ptr<Foo> p2(raw_p); // ref count 1 (don't add ref)
+// p2.reset(); // ref count 0, deleted
+//
+// See the boost intrusive_ptr docs for details of behavior:
+// http://www.boost.org/doc/libs/1_55_0/libs/smart_ptr/intrusive_ptr.html
+//
+// ref_ptr manages the target objects in a thread-safe way, though you'll want
+// to take care with objects that may have pinned threads for deallocation. If
+// you release the last reference to an object on a thread other than what it
+// was expecting you're gonna have a bad time.
+//
+// Compatible only with types that subclass RefObject or implement the following
+// methods:
+// ref_ptr_add_ref
+// ref_ptr_release_ref
+template <class T>
+class ref_ptr {
+ private:
+ typedef ref_ptr this_type;
+ typedef T* this_type::*unspecified_bool_type;
+
+ public:
+ // Initializes with nullptr.
+ IREE_ATTRIBUTE_ALWAYS_INLINE ref_ptr() noexcept = default;
+
+ // Initializes with nullptr so that there is no way to create an
+ // uninitialized ref_ptr.
+ IREE_ATTRIBUTE_ALWAYS_INLINE ref_ptr(std::nullptr_t) noexcept {} // NOLINT
+
+ // Initializes the pointer to the given value.
+ // The value will not have its reference count incremented (as it is with
+ // unique_ptr). Use Retain to add to the reference count.
+ IREE_ATTRIBUTE_ALWAYS_INLINE explicit ref_ptr(T* p) noexcept : px_(p) {}
+
+ // Decrements the reference count of the owned pointer.
+ IREE_ATTRIBUTE_ALWAYS_INLINE ~ref_ptr() noexcept {
+ if (px_) ref_ptr_release_ref(px_);
+ }
+
+ // No implicit ref_ptr copying allowed; use add_ref instead.
+ ref_ptr(const ref_ptr&) noexcept = delete;
+ ref_ptr& operator=(const ref_ptr&) noexcept = delete;
+
+ // Move support to transfer ownership from one ref_ptr to another.
+ ref_ptr(ref_ptr&& rhs) noexcept : px_(rhs.release()) {}
+ ref_ptr& operator=(ref_ptr&& rhs) noexcept {
+ if (px_ != rhs.px_) {
+ if (px_) ref_ptr_release_ref(px_);
+ px_ = rhs.release();
+ }
+ return *this;
+ }
+
+ // Move support from another compatible type.
+ template <typename U>
+ ref_ptr(ref_ptr<U>&& rhs) noexcept : px_(rhs.release()) {} // NOLINT
+ template <typename U>
+ ref_ptr& operator=(ref_ptr<U>&& rhs) noexcept {
+ if (px_ != rhs.get()) {
+ if (px_) ref_ptr_release_ref(px_);
+ px_ = rhs.release();
+ }
+ return *this;
+ }
+
+ // Resets the object to nullptr and decrements the reference count, possibly
+ // deleting it.
+ void reset() noexcept {
+ if (px_) {
+ ref_ptr_release_ref(px_);
+ px_ = nullptr;
+ }
+ }
+
+ // Releases a pointer.
+ // Returns the current pointer held by this object without having
+ // its reference count decremented and resets the ref_ptr to empty.
+ // Returns nullptr if the ref_ptr holds no value.
+ // To re-wrap in a ref_ptr use either ref_ptr<T>(value) or assign().
+ IREE_ATTRIBUTE_ALWAYS_INLINE T* release() noexcept {
+ T* p = px_;
+ px_ = nullptr;
+ return p;
+ }
+
+ // Assigns a pointer.
+ // The pointer will be accepted by the ref_ptr and its reference count will
+ // not be incremented.
+ IREE_ATTRIBUTE_ALWAYS_INLINE void assign(T* value) noexcept {
+ reset();
+ px_ = value;
+ }
+
+ // Gets the pointer referenced by this instance.
+ // operator* and operator-> will assert() if there is no current object.
+ constexpr T* get() const noexcept { return px_; }
+ constexpr T& operator*() const noexcept { return *px_; }
+ constexpr T* operator->() const noexcept { return px_; }
+
+ // Support boolean expression evaluation ala unique_ptr/shared_ptr:
+ // https://en.cppreference.com/w/cpp/memory/shared_ptr/operator_bool
+ constexpr operator unspecified_bool_type() const noexcept {
+ return px_ ? &this_type::px_ : nullptr;
+ }
+ // Supports unary expression evaluation.
+ constexpr bool operator!() const noexcept { return !px_; }
+
+ // Swap support.
+ void swap(ref_ptr& rhs) { std::swap(px_, rhs.px_); }
+
+ private:
+ T* px_ = nullptr;
+};
+
+// Base class for reference counted objects.
+// Reference counted objects should be used with the ref_ptr pointer type.
+// As reference counting can be tricky always prefer to use unique_ptr and
+// avoid this type. Only use this when unique_ptr is not possible, such as
+// when round-tripping objects through marshaling boundaries (v8/Java) or
+// any objects that may have their lifetime tied to a garbage collected
+// object.
+//
+// Subclasses should protect their dtor so that reference counting must
+// be used.
+//
+// This is designed to avoid the need for extra vtable space or for adding
+// methods to the vtable of subclasses. This differs from the boost Pointable
+// version of this object.
+// Inspiration for this comes from Peter Weinert's Dr. Dobb's article:
+// http://www.drdobbs.com/cpp/a-base-class-for-intrusively-reference-c/229218807
+//
+// RefObjects are thread safe and may be used with ref_ptrs from multiple
+// threads.
+//
+// Subclasses may implement a custom Delete operator to handle their
+// deallocation. It should be thread safe as it may be called from any thread.
+//
+// Usage:
+// class MyRefObject : public RefObject<MyRefObject> {
+// public:
+// MyRefObject() = default;
+// // Optional; can be used to return to pool/etc - must be public:
+// static void Delete(MyRefObject* ptr) {
+// ::operator delete(ptr);
+// }
+// };
+template <class T>
+class RefObject {
+ static_assert(!std::is_array<T>::value, "T must not be an array");
+
+ // value is true if a static Delete(T*) function is present.
+ struct has_custom_deleter {
+ template <typename C>
+ static auto Test(C* p) -> decltype(C::Delete(nullptr), std::true_type());
+ template <typename>
+ static std::false_type Test(...);
+ static constexpr bool value =
+ std::is_same<std::true_type, decltype(Test<T>(nullptr))>::value;
+ };
+
+ template <typename V, bool has_custom_deleter>
+ struct delete_thunk {
+ static void Delete(V* p) {
+ auto ref_obj = static_cast<RefObject<V>*>(p);
+ int previous_count = ref_obj->counter_.fetch_sub(1);
+#ifdef IREE_VERBOSE_REF_PTR
+ IREE_LOG(INFO) << "ro-- " << typeid(V).name() << " " << p << " now "
+ << previous_count - 1
+ << (previous_count == 1 ? " DEAD (CUSTOM)" : "");
+#endif // IREE_VERBOSE_REF_PTR
+ if (previous_count == 1) {
+ // We delete type T pointer here to avoid the need for a virtual dtor.
+ V::Delete(p);
+ }
+ }
+ static void Destroy(V* p) { V::Delete(p); }
+ };
+
+ template <typename V>
+ struct delete_thunk<V, false> {
+ static void Delete(V* p) {
+ auto ref_obj = static_cast<RefObject<V>*>(p);
+ int previous_count = ref_obj->counter_.fetch_sub(1);
+#ifdef IREE_VERBOSE_REF_PTR
+ IREE_LOG(INFO) << "ro-- " << typeid(V).name() << " " << p << " now "
+ << previous_count - 1
+ << (previous_count == 1 ? " DEAD" : "");
+#endif // IREE_VERBOSE_REF_PTR
+ if (previous_count == 1) {
+ // We delete type T pointer here to avoid the need for a virtual dtor.
+ delete p;
+ }
+ }
+ static void Destroy(V* p) { delete p; }
+ };
+
+ public:
+ // Adds a reference; used by ref_ptr.
+ friend void ref_ptr_add_ref(T* p) {
+ auto ref_obj = static_cast<RefObject*>(p);
+ ++ref_obj->counter_;
+
+#ifdef IREE_VERBOSE_REF_PTR
+ IREE_LOG(INFO) << "ro++ " << typeid(T).name() << " " << p << " now "
+ << ref_obj->counter_;
+#endif // IREE_VERBOSE_REF_PTR
+ }
+
+ // Releases a reference, potentially deleting the object; used by ref_ptr.
+ friend void ref_ptr_release_ref(T* p) {
+ delete_thunk<T, has_custom_deleter::value>::Delete(p);
+ }
+
+ // Deletes the object (precondition: ref count is zero).
+ friend void ref_ptr_destroy_ref(T* p) {
+ delete_thunk<T, has_custom_deleter::value>::Destroy(p);
+ }
+
+ // Deletes the object (precondition: ref count is zero).
+ static void DirectDestroy(void* p) {
+ ref_ptr_destroy_ref(reinterpret_cast<T*>(p));
+ }
+
+ // Adds a reference.
+ // ref_ptr should be used instead of this in most cases. This is required
+ // for when interoperating with marshaling APIs.
+ void AddReference() { ref_ptr_add_ref(static_cast<T*>(this)); }
+
+ // Releases a reference, potentially deleting the object.
+ // ref_ptr should be used instead of this in most cases. This is required
+ // for when interoperating with marshaling APIs.
+ void ReleaseReference() { ref_ptr_release_ref(static_cast<T*>(this)); }
+
+ // Returns the offset of the reference counter field from the start of the
+ // type T.
+ //
+ // This is generally unsafe to use and is here for support of the
+ // iree_vm_ref_t glue that allows RefObject-derived types to be round-tripped
+ // through the VM.
+ //
+ // For simple POD types or non-virtual classes we expect this to return 0.
+ // If the type has virtual methods (dtors/etc) then it should be 4 or 8
+ // (depending on pointer width). It may be other things, and instead of too
+ // much crazy magic we just rely on offsetof doing the right thing here.
+ static constexpr size_t offsetof_counter() { return offsetof(T, counter_); }
+
+ protected:
+ RefObject() { ref_ptr_add_ref(static_cast<T*>(this)); }
+ RefObject(const RefObject&) = default;
+ RefObject& operator=(const RefObject&) { return *this; }
+
+ std::atomic<int32_t> counter_{0};
+};
+
+// Various comparison operator overloads.
+
+template <class T, class U>
+inline bool operator==(ref_ptr<T> const& a, ref_ptr<U> const& b) {
+ return a.get() == b.get();
+}
+
+template <class T, class U>
+inline bool operator!=(ref_ptr<T> const& a, ref_ptr<U> const& b) {
+ return a.get() != b.get();
+}
+
+template <class T, class U>
+inline bool operator==(ref_ptr<T> const& a, U* b) {
+ return a.get() == b;
+}
+
+template <class T, class U>
+inline bool operator!=(ref_ptr<T> const& a, U* b) {
+ return a.get() != b;
+}
+
+template <class T, class U>
+inline bool operator==(T* a, ref_ptr<U> const& b) {
+ return a == b.get();
+}
+
+template <class T, class U>
+inline bool operator!=(T* a, ref_ptr<U> const& b) {
+ return a != b.get();
+}
+
+template <class T>
+inline bool operator<(ref_ptr<T> const& a, ref_ptr<T> const& b) {
+ return a.get() < b.get();
+}
+
+// Swaps the pointers of two ref_ptrs.
+template <class T>
+void swap(ref_ptr<T>& lhs, ref_ptr<T>& rhs) {
+ lhs.swap(rhs);
+}
+
+} // namespace iree
+
+#endif // IREE_HAL_VULKAN_UTIL_REF_PTR_H_
diff --git a/runtime/src/iree/hal/vulkan/util/ref_ptr_test.cc b/runtime/src/iree/hal/vulkan/util/ref_ptr_test.cc
new file mode 100644
index 0000000..532931c
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/util/ref_ptr_test.cc
@@ -0,0 +1,324 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/util/ref_ptr.h"
+
+#include "iree/testing/gtest.h"
+
+namespace iree {
+namespace {
+
+class MyType : public RefObject<MyType> {
+ public:
+ int x = 5;
+
+ using RefObject<MyType>::counter_; // Expose for testing.
+};
+
+TEST(RefPtrTest, Construction) {
+ // Empty.
+ ref_ptr<MyType> n1;
+ EXPECT_EQ(nullptr, n1.get());
+ ref_ptr<MyType> n2(nullptr);
+ EXPECT_EQ(nullptr, n2.get());
+
+ // Assign a new ptr and add ref.
+ MyType* a_ptr = new MyType();
+ EXPECT_EQ(1, a_ptr->counter_);
+ ref_ptr<MyType> a(a_ptr);
+ EXPECT_EQ(1, a->counter_);
+
+ // Assign existing ptr without adding a ref.
+ ref_ptr<MyType> b(a_ptr);
+ EXPECT_EQ(1, b->counter_);
+
+ // Add a new ref.
+ ref_ptr<MyType> c = add_ref(b);
+ EXPECT_EQ(2, c->counter_);
+
+ b.release();
+}
+
+TEST(RefPtrTest, Assign) {
+ // Ok to assign nothing.
+ ref_ptr<MyType> n1 = assign_ref<MyType>(nullptr);
+ EXPECT_EQ(nullptr, n1.get());
+
+ ref_ptr<MyType> mt = make_ref<MyType>();
+ EXPECT_EQ(1, mt->counter_);
+ ref_ptr<MyType> n2 = assign_ref(mt.get());
+ EXPECT_EQ(1, mt->counter_);
+ mt.release(); // must release, as we assigned to n2.
+ EXPECT_EQ(1, n2->counter_);
+ n2.reset();
+}
+
+TEST(RefPtrTest, Retain) {
+ // Ok to retain nothing.
+ ref_ptr<MyType> n1 = add_ref<MyType>(nullptr);
+ EXPECT_EQ(nullptr, n1.get());
+
+ ref_ptr<MyType> mt = make_ref<MyType>();
+ EXPECT_EQ(1, mt->counter_);
+ ref_ptr<MyType> n2 = add_ref(mt.get());
+ EXPECT_EQ(2, mt->counter_);
+ mt.reset();
+ EXPECT_EQ(1, n2->counter_);
+ n2.reset();
+}
+
+TEST(RefPtrTest, Reset) {
+ ref_ptr<MyType> a(new MyType());
+ ref_ptr<MyType> b(new MyType());
+
+ // Reset to drop reference.
+ ref_ptr<MyType> a_copy = add_ref(a);
+ EXPECT_EQ(2, a_copy->counter_);
+ a.reset();
+ EXPECT_EQ(1, a_copy->counter_);
+
+ // Reset via = operator.
+ a = nullptr;
+ EXPECT_EQ(1, a_copy->counter_);
+ a = add_ref(a_copy);
+ EXPECT_EQ(2, a_copy->counter_);
+
+ // No-op on empty ptrs.
+ ref_ptr<MyType> n;
+ n.reset();
+ n.assign(nullptr);
+}
+
+TEST(RefPtrTest, ReleaseAssign) {
+ ref_ptr<MyType> a(new MyType());
+
+ // Release a's pointer.
+ MyType* a_raw_ptr = a.get();
+ MyType* a_ptr = a.release();
+ EXPECT_EQ(a_raw_ptr, a_ptr);
+ EXPECT_EQ(nullptr, a.get());
+ EXPECT_EQ(1, a_ptr->counter_);
+
+ // Re-wrap in a ref_ptr.
+ a.assign(a_ptr);
+ EXPECT_EQ(1, a->counter_);
+
+ // No-op on empty ptrs.
+ ref_ptr<MyType> n;
+ EXPECT_EQ(nullptr, n.release());
+}
+
+TEST(RefPtrTest, Accessors) {
+ ref_ptr<MyType> a(new MyType());
+ EXPECT_EQ(5, a->x);
+ a->x = 100;
+ EXPECT_EQ(100, a->x);
+
+ MyType& ra = *a;
+ ra.x = 200;
+ EXPECT_EQ(200, ra.x);
+
+ const MyType& cra = *a;
+ EXPECT_EQ(200, cra.x);
+}
+
+TEST(RefPtrTest, BooleanExpressions) {
+ ref_ptr<MyType> a(new MyType());
+ ref_ptr<MyType> n;
+
+ EXPECT_NE(nullptr, a.get());
+ EXPECT_TRUE(a);
+ EXPECT_FALSE(!a);
+ EXPECT_EQ(true, static_cast<bool>(a));
+
+ EXPECT_EQ(nullptr, n.get());
+ EXPECT_FALSE(n);
+ EXPECT_TRUE(!n);
+ EXPECT_EQ(false, static_cast<bool>(n));
+}
+
+TEST(RefPtrTest, Comparisons) {
+ ref_ptr<MyType> a(new MyType());
+ ref_ptr<MyType> b(new MyType());
+ ref_ptr<MyType> n;
+
+ EXPECT_TRUE(a == a);
+ EXPECT_TRUE(a == a.get());
+ EXPECT_TRUE(a.get() == a);
+ EXPECT_FALSE(a != a);
+ EXPECT_FALSE(a != a.get());
+ EXPECT_FALSE(a.get() != a);
+
+ EXPECT_FALSE(a == b);
+ EXPECT_FALSE(a == b.get());
+ EXPECT_FALSE(a.get() == b);
+ EXPECT_TRUE(a != b);
+ EXPECT_TRUE(a != b.get());
+ EXPECT_TRUE(a.get() != b);
+
+ EXPECT_TRUE(n == n);
+ EXPECT_TRUE(n == n.get());
+ EXPECT_TRUE(n.get() == n);
+ EXPECT_FALSE(n != n);
+ EXPECT_FALSE(n != n.get());
+ EXPECT_FALSE(n.get() != n);
+
+ EXPECT_FALSE(a < a);
+ EXPECT_TRUE(n < a);
+}
+
+TEST(RefPtrTest, Swap) {
+ ref_ptr<MyType> a(new MyType());
+ ref_ptr<MyType> b(new MyType());
+ MyType* a_ptr = a.get();
+ MyType* b_ptr = b.get();
+
+ swap(a, a);
+ EXPECT_EQ(a_ptr, a);
+
+ swap(a, b);
+ EXPECT_EQ(a_ptr, b.get());
+ EXPECT_EQ(b_ptr, a.get());
+
+ swap(a, b);
+ EXPECT_EQ(a_ptr, a.get());
+ EXPECT_EQ(b_ptr, b.get());
+
+ ref_ptr<MyType> c;
+ swap(a, c);
+ EXPECT_EQ(a_ptr, c.get());
+ EXPECT_EQ(nullptr, a.get());
+}
+
+TEST(RefPtrTest, Move) {
+ auto a = make_ref<MyType>();
+ auto b = make_ref<MyType>();
+ ref_ptr<MyType> c;
+ EXPECT_EQ(nullptr, c.get());
+
+ c = std::move(a);
+ EXPECT_NE(nullptr, c.get());
+
+ b = std::move(c);
+ EXPECT_NE(nullptr, b.get());
+}
+
+TEST(RefPtrTest, MoveCompatible) {
+ struct MyBaseType : public RefObject<MyBaseType> {
+ int x = 5;
+ using RefObject<MyBaseType>::counter_; // Expose for testing.
+
+ virtual ~MyBaseType() = default;
+ };
+ struct MyTypeA : public MyBaseType {
+ int a = 6;
+ };
+ struct MyTypeB : public MyBaseType {
+ int b = 7;
+ };
+
+ ref_ptr<MyTypeA> a = make_ref<MyTypeA>();
+ EXPECT_EQ(1, a->counter_);
+ ref_ptr<MyBaseType> base = add_ref(a);
+ EXPECT_EQ(a.get(), base.get());
+ EXPECT_EQ(2, a->counter_);
+
+ base = make_ref<MyTypeB>();
+ EXPECT_EQ(1, a->counter_);
+ EXPECT_EQ(1, base->counter_);
+}
+
+TEST(RefPtrTest, StackAllocation) {
+ static int alloc_count = 0;
+ class StackAllocationType : public RefObject<StackAllocationType> {
+ public:
+ StackAllocationType() { ++alloc_count; }
+ ~StackAllocationType() { --alloc_count; }
+ };
+ {
+ StackAllocationType a;
+ EXPECT_EQ(1, alloc_count);
+ }
+ EXPECT_EQ(0, alloc_count);
+}
+
+TEST(RefPtrTest, DefaultDeleter) {
+ static int alloc_count = 0;
+ class DefaultDeleterType : public RefObject<DefaultDeleterType> {
+ public:
+ DefaultDeleterType() { ++alloc_count; }
+ ~DefaultDeleterType() { --alloc_count; }
+ };
+
+ // Empty is ok.
+ ref_ptr<DefaultDeleterType> n;
+ n.reset();
+
+ // Lifecycle.
+ EXPECT_EQ(0, alloc_count);
+ ref_ptr<DefaultDeleterType> a = make_ref<DefaultDeleterType>();
+ EXPECT_EQ(1, alloc_count);
+ a.reset();
+ EXPECT_EQ(0, alloc_count);
+}
+
+TEST(RefPtrTest, InlineDeallocator) {
+ static int alloc_count = 0;
+ class CustomDeleterType : public RefObject<CustomDeleterType> {
+ public:
+ CustomDeleterType() { ++alloc_count; }
+ static void Delete(CustomDeleterType* ptr) {
+ --alloc_count;
+ ::operator delete(ptr);
+ }
+ };
+
+ // Empty is ok.
+ ref_ptr<CustomDeleterType> n;
+ n.reset();
+
+ // Lifecycle.
+ EXPECT_EQ(0, alloc_count);
+ auto a = make_ref<CustomDeleterType>();
+ EXPECT_EQ(1, alloc_count);
+ a.reset();
+ EXPECT_EQ(0, alloc_count);
+}
+
+class VirtualDtorTypeA : public RefObject<VirtualDtorTypeA> {
+ public:
+ VirtualDtorTypeA() { ++alloc_count_a; }
+ virtual ~VirtualDtorTypeA() { --alloc_count_a; }
+ static int alloc_count_a;
+};
+int VirtualDtorTypeA::alloc_count_a = 0;
+
+class VirtualDtorTypeB : public VirtualDtorTypeA {
+ public:
+ VirtualDtorTypeB() { ++alloc_count_b; }
+ ~VirtualDtorTypeB() override { --alloc_count_b; }
+ static int alloc_count_b;
+};
+int VirtualDtorTypeB::alloc_count_b = 0;
+
+TEST(RefPtrTest, VirtualDestructor) {
+ // Empty is ok.
+ ref_ptr<VirtualDtorTypeB> n;
+ n.reset();
+
+ // Lifecycle.
+ EXPECT_EQ(0, VirtualDtorTypeA::alloc_count_a);
+ EXPECT_EQ(0, VirtualDtorTypeB::alloc_count_b);
+ ref_ptr<VirtualDtorTypeA> a = make_ref<VirtualDtorTypeB>();
+ EXPECT_EQ(1, VirtualDtorTypeA::alloc_count_a);
+ EXPECT_EQ(1, VirtualDtorTypeB::alloc_count_b);
+ a.reset();
+ EXPECT_EQ(0, VirtualDtorTypeA::alloc_count_a);
+ EXPECT_EQ(0, VirtualDtorTypeB::alloc_count_b);
+}
+
+} // namespace
+} // namespace iree
diff --git a/runtime/src/iree/hal/vulkan/vma_allocator.cc b/runtime/src/iree/hal/vulkan/vma_allocator.cc
new file mode 100644
index 0000000..9cc167d
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/vma_allocator.cc
@@ -0,0 +1,406 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/vma_allocator.h"
+
+#include <cstddef>
+#include <cstring>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+#include "iree/hal/vulkan/vma_buffer.h"
+
+using namespace iree::hal::vulkan;
+
+typedef struct iree_hal_vulkan_vma_allocator_t {
+ iree_hal_resource_t resource;
+ iree_hal_device_t* device; // unretained to avoid cycles
+ iree_allocator_t host_allocator;
+ VmaAllocator vma;
+
+ IREE_STATISTICS(VkPhysicalDeviceMemoryProperties memory_props;)
+ IREE_STATISTICS(iree_hal_allocator_statistics_t statistics;)
+} iree_hal_vulkan_vma_allocator_t;
+
+namespace {
+extern const iree_hal_allocator_vtable_t iree_hal_vulkan_vma_allocator_vtable;
+} // namespace
+
+static iree_hal_vulkan_vma_allocator_t* iree_hal_vulkan_vma_allocator_cast(
+ iree_hal_allocator_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_vulkan_vma_allocator_vtable);
+ return (iree_hal_vulkan_vma_allocator_t*)base_value;
+}
+
+#if IREE_STATISTICS_ENABLE
+
+static iree_hal_memory_type_t iree_hal_vulkan_vma_allocator_lookup_memory_type(
+ iree_hal_vulkan_vma_allocator_t* allocator, uint32_t memory_type_ordinal) {
+ // We could better map the types however today we only use the
+ // device/host-local bits.
+ VkMemoryPropertyFlags flags =
+ allocator->memory_props.memoryTypes[memory_type_ordinal].propertyFlags;
+ if (iree_all_bits_set(flags, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
+ return IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL;
+ } else {
+ return IREE_HAL_MEMORY_TYPE_HOST_LOCAL;
+ }
+}
+
+// Callback function called before vkAllocateMemory.
+static void VKAPI_PTR iree_hal_vulkan_vma_allocate_callback(
+ VmaAllocator VMA_NOT_NULL vma, uint32_t memoryType,
+ VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, VkDeviceSize size,
+ void* VMA_NULLABLE pUserData) {
+ iree_hal_vulkan_vma_allocator_t* allocator =
+ (iree_hal_vulkan_vma_allocator_t*)pUserData;
+ iree_hal_allocator_statistics_record_alloc(
+ &allocator->statistics,
+ iree_hal_vulkan_vma_allocator_lookup_memory_type(allocator, memoryType),
+ (iree_device_size_t)size);
+}
+
+// Callback function called before vkFreeMemory.
+static void VKAPI_PTR iree_hal_vulkan_vma_free_callback(
+ VmaAllocator VMA_NOT_NULL vma, uint32_t memoryType,
+ VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, VkDeviceSize size,
+ void* VMA_NULLABLE pUserData) {
+ iree_hal_vulkan_vma_allocator_t* allocator =
+ (iree_hal_vulkan_vma_allocator_t*)pUserData;
+ iree_hal_allocator_statistics_record_free(
+ &allocator->statistics,
+ iree_hal_vulkan_vma_allocator_lookup_memory_type(allocator, memoryType),
+ (iree_device_size_t)size);
+}
+
+#endif // IREE_STATISTICS_ENABLE
+
+iree_status_t iree_hal_vulkan_vma_allocator_create(
+ VkInstance instance, VkPhysicalDevice physical_device,
+ VkDeviceHandle* logical_device, iree_hal_device_t* device,
+ iree_hal_allocator_t** out_allocator) {
+ IREE_ASSERT_ARGUMENT(instance);
+ IREE_ASSERT_ARGUMENT(physical_device);
+ IREE_ASSERT_ARGUMENT(logical_device);
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(out_allocator);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_t host_allocator = logical_device->host_allocator();
+ iree_hal_vulkan_vma_allocator_t* allocator = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(host_allocator, sizeof(*allocator),
+ (void**)&allocator));
+ iree_hal_resource_initialize(&iree_hal_vulkan_vma_allocator_vtable,
+ &allocator->resource);
+ allocator->host_allocator = host_allocator;
+ allocator->device = device;
+
+ const auto& syms = logical_device->syms();
+ VmaVulkanFunctions vulkan_fns;
+ memset(&vulkan_fns, 0, sizeof(vulkan_fns));
+ vulkan_fns.vkGetPhysicalDeviceProperties =
+ syms->vkGetPhysicalDeviceProperties;
+ vulkan_fns.vkGetPhysicalDeviceMemoryProperties =
+ syms->vkGetPhysicalDeviceMemoryProperties;
+ vulkan_fns.vkAllocateMemory = syms->vkAllocateMemory;
+ vulkan_fns.vkFreeMemory = syms->vkFreeMemory;
+ vulkan_fns.vkMapMemory = syms->vkMapMemory;
+ vulkan_fns.vkUnmapMemory = syms->vkUnmapMemory;
+ vulkan_fns.vkFlushMappedMemoryRanges = syms->vkFlushMappedMemoryRanges;
+ vulkan_fns.vkInvalidateMappedMemoryRanges =
+ syms->vkInvalidateMappedMemoryRanges;
+ vulkan_fns.vkBindBufferMemory = syms->vkBindBufferMemory;
+ vulkan_fns.vkBindImageMemory = syms->vkBindImageMemory;
+ vulkan_fns.vkGetBufferMemoryRequirements =
+ syms->vkGetBufferMemoryRequirements;
+ vulkan_fns.vkGetImageMemoryRequirements = syms->vkGetImageMemoryRequirements;
+ vulkan_fns.vkCreateBuffer = syms->vkCreateBuffer;
+ vulkan_fns.vkDestroyBuffer = syms->vkDestroyBuffer;
+ vulkan_fns.vkCreateImage = syms->vkCreateImage;
+ vulkan_fns.vkDestroyImage = syms->vkDestroyImage;
+ vulkan_fns.vkCmdCopyBuffer = syms->vkCmdCopyBuffer;
+
+ VmaDeviceMemoryCallbacks device_memory_callbacks;
+ memset(&device_memory_callbacks, 0, sizeof(device_memory_callbacks));
+ IREE_STATISTICS({
+ device_memory_callbacks.pfnAllocate = iree_hal_vulkan_vma_allocate_callback;
+ device_memory_callbacks.pfnFree = iree_hal_vulkan_vma_free_callback;
+ device_memory_callbacks.pUserData = allocator;
+ });
+
+ VmaAllocatorCreateInfo create_info;
+ memset(&create_info, 0, sizeof(create_info));
+ create_info.flags = 0;
+ create_info.physicalDevice = physical_device;
+ create_info.device = *logical_device;
+ create_info.instance = instance;
+ create_info.preferredLargeHeapBlockSize = 64 * 1024 * 1024;
+ create_info.pAllocationCallbacks = logical_device->allocator();
+ create_info.pDeviceMemoryCallbacks = &device_memory_callbacks;
+ create_info.pHeapSizeLimit = NULL;
+ create_info.pVulkanFunctions = &vulkan_fns;
+ VmaAllocator vma = VK_NULL_HANDLE;
+ iree_status_t status = VK_RESULT_TO_STATUS(
+ vmaCreateAllocator(&create_info, &vma), "vmaCreateAllocator");
+
+ if (iree_status_is_ok(status)) {
+ allocator->vma = vma;
+
+ IREE_STATISTICS({
+ const VkPhysicalDeviceMemoryProperties* memory_props = NULL;
+ vmaGetMemoryProperties(allocator->vma, &memory_props);
+ memcpy(&allocator->memory_props, memory_props,
+ sizeof(allocator->memory_props));
+ });
+
+ *out_allocator = (iree_hal_allocator_t*)allocator;
+ } else {
+ vmaDestroyAllocator(vma);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_vulkan_vma_allocator_destroy(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
+ iree_hal_vulkan_vma_allocator_t* allocator =
+ iree_hal_vulkan_vma_allocator_cast(base_allocator);
+ iree_allocator_t host_allocator = allocator->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ vmaDestroyAllocator(allocator->vma);
+ iree_allocator_free(host_allocator, allocator);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_allocator_t iree_hal_vulkan_vma_allocator_host_allocator(
+ const iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
+ iree_hal_vulkan_vma_allocator_t* allocator =
+ (iree_hal_vulkan_vma_allocator_t*)base_allocator;
+ return allocator->host_allocator;
+}
+
+static iree_status_t iree_hal_vulkan_vma_allocator_trim(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
+ return iree_ok_status();
+}
+
+static void iree_hal_vulkan_vma_allocator_query_statistics(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_hal_allocator_statistics_t* IREE_RESTRICT out_statistics) {
+ IREE_STATISTICS({
+ iree_hal_vulkan_vma_allocator_t* allocator =
+ iree_hal_vulkan_vma_allocator_cast(base_allocator);
+ memcpy(out_statistics, &allocator->statistics, sizeof(*out_statistics));
+ });
+}
+
+static iree_hal_buffer_compatibility_t
+iree_hal_vulkan_vma_allocator_query_compatibility(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_device_size_t allocation_size) {
+ // TODO(benvanik): check to ensure the allocator can serve the memory type.
+
+ // All buffers can be allocated on the heap.
+ iree_hal_buffer_compatibility_t compatibility =
+ IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE;
+
+ // All buffers can be used as transfer source/dest.
+ if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER)) {
+ compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER;
+ }
+
+ // Buffers can only be used on the queue if they are device visible.
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) {
+ if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_DISPATCH)) {
+ compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_DISPATCH;
+ }
+ }
+
+ return compatibility;
+}
+
+static iree_status_t iree_hal_vulkan_vma_allocator_allocate_internal(
+ iree_hal_vulkan_vma_allocator_t* IREE_RESTRICT allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_device_size_t allocation_size, iree_const_byte_span_t initial_data,
+ VmaAllocationCreateFlags flags,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+ // Guard against the corner case where the requested buffer size is 0. The
+ // application is unlikely to do anything when requesting a 0-byte buffer; but
+ // it can happen in real world use cases. So we should at least not crash.
+ if (allocation_size == 0) allocation_size = 4;
+ // Align allocation sizes to 4 bytes so shaders operating on 32 bit types can
+ // act safely even on buffer ranges that are not naturally aligned.
+ allocation_size = iree_host_align(allocation_size, 4);
+
+ VkBufferCreateInfo buffer_create_info;
+ buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+ buffer_create_info.pNext = NULL;
+ buffer_create_info.flags = 0;
+ buffer_create_info.size = allocation_size;
+ buffer_create_info.usage = 0;
+ if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER)) {
+ buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+ buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+ }
+ if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_DISPATCH)) {
+ buffer_create_info.usage |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+ buffer_create_info.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+ buffer_create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
+ }
+ buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+ buffer_create_info.queueFamilyIndexCount = 0;
+ buffer_create_info.pQueueFamilyIndices = NULL;
+
+ VmaAllocationCreateInfo allocation_create_info;
+ allocation_create_info.flags = flags;
+ allocation_create_info.usage = VMA_MEMORY_USAGE_UNKNOWN;
+ allocation_create_info.requiredFlags = 0;
+ allocation_create_info.preferredFlags = 0;
+ allocation_create_info.memoryTypeBits = 0; // Automatic selection.
+ allocation_create_info.pool = VK_NULL_HANDLE;
+ allocation_create_info.pUserData = NULL;
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)) {
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) {
+ // Device-local, host-visible.
+ allocation_create_info.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
+ allocation_create_info.preferredFlags |=
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ } else {
+ // Device-local only.
+ allocation_create_info.usage = VMA_MEMORY_USAGE_GPU_ONLY;
+ allocation_create_info.requiredFlags |=
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ }
+ } else {
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) {
+ // Host-local, device-visible.
+ allocation_create_info.usage = VMA_MEMORY_USAGE_GPU_TO_CPU;
+ } else {
+ // Host-local only.
+ allocation_create_info.usage = VMA_MEMORY_USAGE_CPU_ONLY;
+ }
+ }
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_HOST_CACHED)) {
+ allocation_create_info.requiredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ }
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_HOST_COHERENT)) {
+ allocation_create_info.requiredFlags |=
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+ }
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_TRANSIENT)) {
+ allocation_create_info.preferredFlags |=
+ VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
+ }
+ if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_MAPPING)) {
+ allocation_create_info.requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ }
+
+ // TODO(benvanik): if on a unified memory system and initial data is present
+ // we could set the mapping bit and ensure a much more efficient upload.
+
+ VkBuffer handle = VK_NULL_HANDLE;
+ VmaAllocation allocation = VK_NULL_HANDLE;
+ VmaAllocationInfo allocation_info;
+ VK_RETURN_IF_ERROR(vmaCreateBuffer(allocator->vma, &buffer_create_info,
+ &allocation_create_info, &handle,
+ &allocation, &allocation_info),
+ "vmaCreateBuffer");
+
+ iree_hal_buffer_t* buffer = NULL;
+ iree_status_t status = iree_hal_vulkan_vma_buffer_wrap(
+ (iree_hal_allocator_t*)allocator, params->type, params->access,
+ params->usage, allocation_size,
+ /*byte_offset=*/0,
+ /*byte_length=*/allocation_size, allocator->vma, handle, allocation,
+ allocation_info, &buffer);
+ if (!iree_status_is_ok(status)) {
+ vmaDestroyBuffer(allocator->vma, handle, allocation);
+ return status;
+ }
+
+ // Copy the initial contents into the buffer. This may require staging.
+ if (iree_status_is_ok(status) &&
+ !iree_const_byte_span_is_empty(initial_data)) {
+ status = iree_hal_device_transfer_range(
+ allocator->device,
+ iree_hal_make_host_transfer_buffer_span((void*)initial_data.data,
+ initial_data.data_length),
+ 0, iree_hal_make_device_transfer_buffer(buffer), 0,
+ initial_data.data_length, IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT,
+ iree_infinite_timeout());
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_buffer = buffer;
+ } else {
+ iree_hal_buffer_release(buffer);
+ }
+ return status;
+}
+
+static iree_status_t iree_hal_vulkan_vma_allocator_allocate_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_device_size_t allocation_size, iree_const_byte_span_t initial_data,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+ iree_hal_vulkan_vma_allocator_t* allocator =
+ iree_hal_vulkan_vma_allocator_cast(base_allocator);
+ return iree_hal_vulkan_vma_allocator_allocate_internal(
+ allocator, params, allocation_size, initial_data,
+ /*flags=*/0, out_buffer);
+}
+
+static void iree_hal_vulkan_vma_allocator_deallocate_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_hal_buffer_t* IREE_RESTRICT base_buffer) {
+ // VMA does the pooling for us so we don't need anything special.
+ iree_hal_buffer_destroy(base_buffer);
+}
+
+static iree_status_t iree_hal_vulkan_vma_allocator_import_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+ iree_hal_buffer_release_callback_t release_callback,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+ // TODO(#7242): use VK_EXT_external_memory_host to import memory.
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "importing from external buffers not supported");
+}
+
+static iree_status_t iree_hal_vulkan_vma_allocator_export_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_hal_buffer_t* IREE_RESTRICT buffer,
+ iree_hal_external_buffer_type_t requested_type,
+ iree_hal_external_buffer_flags_t requested_flags,
+ iree_hal_external_buffer_t* IREE_RESTRICT out_external_buffer) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "exporting to external buffers not supported");
+}
+
+namespace {
+const iree_hal_allocator_vtable_t iree_hal_vulkan_vma_allocator_vtable = {
+ /*.destroy=*/iree_hal_vulkan_vma_allocator_destroy,
+ /*.host_allocator=*/iree_hal_vulkan_vma_allocator_host_allocator,
+ /*.trim=*/iree_hal_vulkan_vma_allocator_trim,
+ /*.query_statistics=*/iree_hal_vulkan_vma_allocator_query_statistics,
+ /*.query_compatibility=*/
+ iree_hal_vulkan_vma_allocator_query_compatibility,
+ /*.allocate_buffer=*/iree_hal_vulkan_vma_allocator_allocate_buffer,
+ /*.deallocate_buffer=*/iree_hal_vulkan_vma_allocator_deallocate_buffer,
+ /*.import_buffer=*/iree_hal_vulkan_vma_allocator_import_buffer,
+ /*.export_buffer=*/iree_hal_vulkan_vma_allocator_export_buffer,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/vulkan/vma_allocator.h b/runtime/src/iree/hal/vulkan/vma_allocator.h
new file mode 100644
index 0000000..86d892e
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/vma_allocator.h
@@ -0,0 +1,45 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_VMA_ALLOCATOR_H_
+#define IREE_HAL_VULKAN_VMA_ALLOCATOR_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/handle_util.h"
+#include "iree/hal/vulkan/internal_vk_mem_alloc.h" // IWYU pragma: export
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a VMA-based allocator that performs internal suballocation and a
+// bunch of other fancy things.
+//
+// This uses the Vulkan Memory Allocator (VMA) to manage memory.
+// VMA (//third_party/vulkan_memory_allocator) provides dlmalloc-like behavior
+// with suballocations made with various policies (best fit, first fit, etc).
+// This reduces the number of allocations we need from the Vulkan implementation
+// (which can sometimes be limited to as little as 4096 total allowed) and
+// manages higher level allocation semantics like slab allocation and
+// defragmentation.
+//
+// VMA is internally synchronized and the functionality exposed on the HAL
+// interface is thread-safe.
+//
+// More information:
+// https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
+// https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/
+iree_status_t iree_hal_vulkan_vma_allocator_create(
+ VkInstance instance, VkPhysicalDevice physical_device,
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ iree_hal_device_t* device, iree_hal_allocator_t** out_allocator);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_VMA_ALLOCATOR_H_
diff --git a/runtime/src/iree/hal/vulkan/vma_buffer.cc b/runtime/src/iree/hal/vulkan/vma_buffer.cc
new file mode 100644
index 0000000..765ad17
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/vma_buffer.cc
@@ -0,0 +1,179 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/vma_buffer.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/vulkan/status_util.h"
+
+typedef struct iree_hal_vulkan_vma_buffer_t {
+ iree_hal_buffer_t base;
+
+ VmaAllocator vma;
+ VkBuffer handle;
+ VmaAllocation allocation;
+ VmaAllocationInfo allocation_info;
+} iree_hal_vulkan_vma_buffer_t;
+
+namespace {
+extern const iree_hal_buffer_vtable_t iree_hal_vulkan_vma_buffer_vtable;
+} // namespace
+
+static iree_hal_vulkan_vma_buffer_t* iree_hal_vulkan_vma_buffer_cast(
+ iree_hal_buffer_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_vulkan_vma_buffer_vtable);
+ return (iree_hal_vulkan_vma_buffer_t*)base_value;
+}
+
+iree_status_t iree_hal_vulkan_vma_buffer_wrap(
+ iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+ iree_hal_memory_access_t allowed_access,
+ iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
+ iree_device_size_t byte_offset, iree_device_size_t byte_length,
+ VmaAllocator vma, VkBuffer handle, VmaAllocation allocation,
+ VmaAllocationInfo allocation_info, iree_hal_buffer_t** out_buffer) {
+ IREE_ASSERT_ARGUMENT(allocator);
+ IREE_ASSERT_ARGUMENT(vma);
+ IREE_ASSERT_ARGUMENT(handle);
+ IREE_ASSERT_ARGUMENT(allocation);
+ IREE_ASSERT_ARGUMENT(out_buffer);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_t host_allocator =
+ iree_hal_allocator_host_allocator(allocator);
+ iree_hal_vulkan_vma_buffer_t* buffer = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer);
+ if (iree_status_is_ok(status)) {
+ iree_hal_buffer_initialize(
+ host_allocator, allocator, &buffer->base, allocation_size, byte_offset,
+ byte_length, memory_type, allowed_access, allowed_usage,
+ &iree_hal_vulkan_vma_buffer_vtable, &buffer->base);
+ buffer->vma = vma;
+ buffer->handle = handle;
+ buffer->allocation = allocation;
+ buffer->allocation_info = allocation_info;
+
+ // TODO(benvanik): set debug name instead and use the
+ // VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT flag.
+ vmaSetAllocationUserData(buffer->vma, buffer->allocation, buffer);
+
+ // TODO(benvanik): figure out why this is not working - has unbalanced
+ // allocs in the tracy UI even though they are definitely balanced here.
+ // IREE_TRACE_ALLOC_NAMED("VMA", (void*)buffer->handle, byte_length);
+
+ *out_buffer = &buffer->base;
+ } else {
+ vmaDestroyBuffer(vma, handle, allocation);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void iree_hal_vulkan_vma_buffer_destroy(iree_hal_buffer_t* base_buffer) {
+ iree_hal_vulkan_vma_buffer_t* buffer =
+ iree_hal_vulkan_vma_buffer_cast(base_buffer);
+ iree_allocator_t host_allocator = base_buffer->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // IREE_TRACE_FREE_NAMED("VMA", (void*)buffer->handle);
+
+ vmaDestroyBuffer(buffer->vma, buffer->handle, buffer->allocation);
+ iree_allocator_free(host_allocator, buffer);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+VkBuffer iree_hal_vulkan_vma_buffer_handle(iree_hal_buffer_t* base_buffer) {
+ iree_hal_vulkan_vma_buffer_t* buffer =
+ iree_hal_vulkan_vma_buffer_cast(base_buffer);
+ return buffer->handle;
+}
+
+static iree_status_t iree_hal_vulkan_vma_buffer_map_range(
+ iree_hal_buffer_t* base_buffer, iree_hal_mapping_mode_t mapping_mode,
+ iree_hal_memory_access_t memory_access,
+ iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
+ iree_hal_buffer_mapping_t* mapping) {
+ iree_hal_vulkan_vma_buffer_t* buffer =
+ iree_hal_vulkan_vma_buffer_cast(base_buffer);
+
+ // TODO(benvanik): add upload/download for unmapped buffers.
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_memory_type(
+ iree_hal_buffer_memory_type(base_buffer),
+ IREE_HAL_MEMORY_TYPE_HOST_VISIBLE));
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_validate_usage(iree_hal_buffer_allowed_usage(base_buffer),
+ IREE_HAL_BUFFER_USAGE_MAPPING));
+
+ uint8_t* data_ptr = nullptr;
+ VK_RETURN_IF_ERROR(
+ vmaMapMemory(buffer->vma, buffer->allocation, (void**)&data_ptr),
+ "vmaMapMemory");
+ mapping->contents =
+ iree_make_byte_span(data_ptr + local_byte_offset, local_byte_length);
+
+ // If we mapped for discard scribble over the bytes. This is not a mandated
+ // behavior but it will make debugging issues easier. Alternatively for
+ // heap buffers we could reallocate them such that ASAN yells, but that
+ // would only work if the entire buffer was discarded.
+#ifndef NDEBUG
+ if (iree_any_bit_set(memory_access, IREE_HAL_MEMORY_ACCESS_DISCARD)) {
+ memset(mapping->contents.data, 0xCD, local_byte_length);
+ }
+#endif // !NDEBUG
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_vma_buffer_unmap_range(
+ iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length, iree_hal_buffer_mapping_t* mapping) {
+ iree_hal_vulkan_vma_buffer_t* buffer =
+ iree_hal_vulkan_vma_buffer_cast(base_buffer);
+ vmaUnmapMemory(buffer->vma, buffer->allocation);
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_vma_buffer_invalidate_range(
+ iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length) {
+ iree_hal_vulkan_vma_buffer_t* buffer =
+ iree_hal_vulkan_vma_buffer_cast(base_buffer);
+ VK_RETURN_IF_ERROR(
+ vmaInvalidateAllocation(buffer->vma, buffer->allocation,
+ local_byte_offset, local_byte_length),
+ "vmaInvalidateAllocation");
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_vma_buffer_flush_range(
+ iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
+ iree_device_size_t local_byte_length) {
+ iree_hal_vulkan_vma_buffer_t* buffer =
+ iree_hal_vulkan_vma_buffer_cast(base_buffer);
+ VK_RETURN_IF_ERROR(vmaFlushAllocation(buffer->vma, buffer->allocation,
+ local_byte_offset, local_byte_length),
+ "vmaFlushAllocation");
+ return iree_ok_status();
+}
+
+namespace {
+const iree_hal_buffer_vtable_t iree_hal_vulkan_vma_buffer_vtable = {
+ /*.recycle=*/iree_hal_buffer_recycle,
+ /*.destroy=*/iree_hal_vulkan_vma_buffer_destroy,
+ /*.map_range=*/iree_hal_vulkan_vma_buffer_map_range,
+ /*.unmap_range=*/iree_hal_vulkan_vma_buffer_unmap_range,
+ /*.invalidate_range=*/iree_hal_vulkan_vma_buffer_invalidate_range,
+ /*.flush_range=*/iree_hal_vulkan_vma_buffer_flush_range,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/vulkan/vma_buffer.h b/runtime/src/iree/hal/vulkan/vma_buffer.h
new file mode 100644
index 0000000..a00adbd
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/vma_buffer.h
@@ -0,0 +1,37 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_VMA_BUFFER_H_
+#define IREE_HAL_VULKAN_VMA_BUFFER_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/internal_vk_mem_alloc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Wraps a VMA allocation in an iree_hal_buffer_t.
+// The allocation will be released back to VMA when the buffer is released.
+iree_status_t iree_hal_vulkan_vma_buffer_wrap(
+ iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+ iree_hal_memory_access_t allowed_access,
+ iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
+ iree_device_size_t byte_offset, iree_device_size_t byte_length,
+ VmaAllocator vma, VkBuffer handle, VmaAllocation allocation,
+ VmaAllocationInfo allocation_info, iree_hal_buffer_t** out_buffer);
+
+// Returns the Vulkan handle backing the given |buffer|.
+// This is the entire allocated_buffer and must be offset by the buffer
+// byte_offset and byte_length when used.
+VkBuffer iree_hal_vulkan_vma_buffer_handle(iree_hal_buffer_t* buffer);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_VMA_BUFFER_H_
diff --git a/runtime/src/iree/hal/vulkan/vulkan_device.cc b/runtime/src/iree/hal/vulkan/vulkan_device.cc
new file mode 100644
index 0000000..c7c3ecc
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/vulkan_device.cc
@@ -0,0 +1,1169 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/vulkan_device.h"
+
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <vector>
+
+#include "iree/base/internal/arena.h"
+#include "iree/base/internal/math.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/utils/buffer_transfer.h"
+#include "iree/hal/vulkan/api.h"
+#include "iree/hal/vulkan/builtin_executables.h"
+#include "iree/hal/vulkan/command_queue.h"
+#include "iree/hal/vulkan/descriptor_pool_cache.h"
+#include "iree/hal/vulkan/direct_command_buffer.h"
+#include "iree/hal/vulkan/direct_command_queue.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/emulated_semaphore.h"
+#include "iree/hal/vulkan/extensibility_util.h"
+#include "iree/hal/vulkan/handle_util.h"
+#include "iree/hal/vulkan/native_descriptor_set.h"
+#include "iree/hal/vulkan/native_descriptor_set_layout.h"
+#include "iree/hal/vulkan/native_event.h"
+#include "iree/hal/vulkan/native_executable_layout.h"
+#include "iree/hal/vulkan/native_semaphore.h"
+#include "iree/hal/vulkan/nop_executable_cache.h"
+#include "iree/hal/vulkan/serializing_command_queue.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/timepoint_util.h"
+#include "iree/hal/vulkan/tracing.h"
+#include "iree/hal/vulkan/util/arena.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+#include "iree/hal/vulkan/vma_allocator.h"
+
+using namespace iree::hal::vulkan;
+
+//===----------------------------------------------------------------------===//
+// iree_hal_vulkan_device_t extensibility util
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_hal_vulkan_query_extensibility_set(
+ iree_hal_vulkan_features_t requested_features,
+ iree_hal_vulkan_extensibility_set_t set, iree_host_size_t string_capacity,
+ const char** out_string_values, iree_host_size_t* out_string_count) {
+ *out_string_count = 0;
+
+ iree_status_t status = iree_ok_status();
+ iree_host_size_t string_count = 0;
+#define ADD_EXT(target_set, name_literal) \
+ if (iree_status_is_ok(status) && set == (target_set)) { \
+ if (string_count >= string_capacity && out_string_values) { \
+ status = iree_status_from_code(IREE_STATUS_OUT_OF_RANGE); \
+ } else if (out_string_values) { \
+ out_string_values[string_count] = (name_literal); \
+ } \
+ ++string_count; \
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Baseline IREE requirements
+ //===--------------------------------------------------------------------===//
+ // Using IREE at all requires these extensions unconditionally. Adding things
+ // here changes our minimum requirements and should be done carefully.
+ // Optional extensions here are feature detected by the runtime.
+
+#ifdef IREE_PLATFORM_APPLE
+ // VK_KHR_portability_subset:
+ // For Apple platforms, Vulkan is layered on top of Metal via MoltenVK.
+ // It exposes this extension to allow a non-conformant Vulkan implementation
+ // to be built on top of another non-Vulkan graphics API. This extension must
+ // be enabled if exists.
+ ADD_EXT(IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_REQUIRED,
+ VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME);
+#endif
+
+ // VK_KHR_storage_buffer_storage_class:
+ // Our generated SPIR-V kernels use storage buffers for all their data access.
+ ADD_EXT(IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_REQUIRED,
+ VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME);
+
+ // VK_KHR_get_physical_device_properties2:
+ // Multiple extensions depend on VK_KHR_get_physical_device_properties2.
+ // This extension was deprecated in Vulkan 1.1 as its functionality was
+ // promoted to core so we list it as optional even though we require it.
+ ADD_EXT(IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_OPTIONAL,
+ VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
+
+ // VK_KHR_push_descriptor:
+ // We can avoid a lot of additional Vulkan descriptor set manipulation
+ // overhead when this extension is present. Android is a holdout, though, and
+ // we have a fallback for when it's not available.
+ ADD_EXT(IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL,
+ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
+
+ //===--------------------------------------------------------------------===//
+ // Vulkan forward-compatibility shims
+ //===--------------------------------------------------------------------===//
+ // These are shims or extensions that are made core later in the spec and can
+ // be removed once we require the core version that contains them.
+
+ // VK_KHR_timeline_semaphore:
+ // timeline semaphore support is optional and will be emulated if necessary.
+ ADD_EXT(IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL,
+ VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME);
+
+ // VK_LAYER_KHRONOS_timeline_semaphore:
+ // polyfill layer - enable if present instead of our custom emulation. Ignored
+ // if timeline semaphores are supported natively (Vulkan 1.2+).
+ ADD_EXT(IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_OPTIONAL,
+ "VK_LAYER_KHRONOS_timeline_semaphore");
+
+ //===--------------------------------------------------------------------===//
+ // Optional debugging features
+ //===--------------------------------------------------------------------===//
+ // Used only when explicitly requested as they drastically change the
+ // performance behavior of Vulkan.
+
+ // VK_LAYER_KHRONOS_validation:
+ // only enabled if validation is desired. Since validation in Vulkan is just a
+ // API correctness check it can't be used as a security mechanism and is fine
+ // to ignore.
+ if (iree_all_bits_set(requested_features,
+ IREE_HAL_VULKAN_FEATURE_ENABLE_VALIDATION_LAYERS)) {
+ ADD_EXT(IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_OPTIONAL,
+ "VK_LAYER_KHRONOS_validation");
+ }
+
+ // VK_EXT_debug_utils:
+ // only enabled if debugging is desired to route Vulkan debug messages through
+ // our logging sinks. Note that this adds a non-trivial runtime overhead and
+ // we may want to disable it even in debug builds.
+ if (iree_all_bits_set(requested_features,
+ IREE_HAL_VULKAN_FEATURE_ENABLE_DEBUG_UTILS)) {
+ ADD_EXT(IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_OPTIONAL,
+ VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
+ }
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ if (iree_all_bits_set(requested_features,
+ IREE_HAL_VULKAN_FEATURE_ENABLE_TRACING)) {
+ // VK_EXT_host_query_reset:
+ // optionally allows for vkResetQueryPool to be used to reset query pools
+ // from the host without needing to do an expensive vkCmdResetQueryPool
+ // submission.
+ ADD_EXT(IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL,
+ VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
+
+ // VK_EXT_calibrated_timestamps:
+ // optionally provides more accurate timestamps that correspond to the
+ // system time. If this is not present then tracy will attempt calibration
+ // itself and have some per-run variance in the skew (up to many
+ // milliseconds).
+ ADD_EXT(IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL,
+ VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
+ }
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+ *out_string_count = string_count;
+ return status;
+}
+
+//===----------------------------------------------------------------------===//
+// Queue selection
+//===----------------------------------------------------------------------===//
+
+#define IREE_HAL_VULKAN_INVALID_QUEUE_FAMILY_INDEX (-1)
+
+typedef struct iree_hal_vulkan_queue_family_info_t {
+ uint32_t dispatch_index;
+ iree_host_size_t dispatch_queue_count;
+ uint32_t transfer_index;
+ iree_host_size_t transfer_queue_count;
+} iree_hal_vulkan_queue_family_info_t;
+
+// Finds the first queue in the listing (which is usually the
+// driver-preferred) that has all of the |required_queue_flags| and none of
+// the |excluded_queue_flags|.
+// Returns IREE_HAL_VULKAN_INVALID_QUEUE_FAMILY_INDEX if no matching queue is
+// found.
+static uint32_t iree_hal_vulkan_find_first_queue_family_with_flags(
+ uint32_t queue_family_count,
+ const VkQueueFamilyProperties* queue_family_properties,
+ VkQueueFlags required_queue_flags, VkQueueFlags excluded_queue_flags) {
+ for (uint32_t queue_family_index = 0; queue_family_index < queue_family_count;
+ ++queue_family_index) {
+ const VkQueueFamilyProperties* properties =
+ &queue_family_properties[queue_family_index];
+ if (iree_all_bits_set(properties->queueFlags, required_queue_flags) &&
+ !iree_any_bit_set(properties->queueFlags, excluded_queue_flags)) {
+ return queue_family_index;
+ }
+ }
+ return IREE_HAL_VULKAN_INVALID_QUEUE_FAMILY_INDEX;
+}
+
+// Selects queue family indices for compute and transfer queues.
+// Note that both queue families may be the same if there is only one family
+// available.
+static iree_status_t iree_hal_vulkan_select_queue_families(
+ VkPhysicalDevice physical_device, iree::hal::vulkan::DynamicSymbols* syms,
+ iree_hal_vulkan_queue_family_info_t* out_family_info) {
+ // Enumerate queue families available on the device.
+ uint32_t queue_family_count = 0;
+ syms->vkGetPhysicalDeviceQueueFamilyProperties(physical_device,
+ &queue_family_count, NULL);
+ VkQueueFamilyProperties* queue_family_properties =
+ (VkQueueFamilyProperties*)iree_alloca(queue_family_count *
+ sizeof(VkQueueFamilyProperties));
+ syms->vkGetPhysicalDeviceQueueFamilyProperties(
+ physical_device, &queue_family_count, queue_family_properties);
+
+ memset(out_family_info, 0, sizeof(*out_family_info));
+ out_family_info->dispatch_index = IREE_HAL_VULKAN_INVALID_QUEUE_FAMILY_INDEX;
+ out_family_info->dispatch_queue_count = 0;
+ out_family_info->transfer_index = IREE_HAL_VULKAN_INVALID_QUEUE_FAMILY_INDEX;
+ out_family_info->transfer_queue_count = 0;
+
+ // Try to find a dedicated compute queue (no graphics caps).
+ // Some may support both transfer and compute. If that fails then fallback
+ // to any queue that supports compute.
+ out_family_info->dispatch_index =
+ iree_hal_vulkan_find_first_queue_family_with_flags(
+ queue_family_count, queue_family_properties, VK_QUEUE_COMPUTE_BIT,
+ VK_QUEUE_GRAPHICS_BIT);
+ if (out_family_info->dispatch_index ==
+ IREE_HAL_VULKAN_INVALID_QUEUE_FAMILY_INDEX) {
+ out_family_info->dispatch_index =
+ iree_hal_vulkan_find_first_queue_family_with_flags(
+ queue_family_count, queue_family_properties, VK_QUEUE_COMPUTE_BIT,
+ 0);
+ }
+ if (out_family_info->dispatch_index ==
+ IREE_HAL_VULKAN_INVALID_QUEUE_FAMILY_INDEX) {
+ return iree_make_status(
+ IREE_STATUS_NOT_FOUND,
+ "unable to find any queue family support compute operations");
+ }
+ out_family_info->dispatch_queue_count =
+ queue_family_properties[out_family_info->dispatch_index].queueCount;
+
+ // Try to find a dedicated transfer queue (no compute or graphics caps).
+ // Not all devices have one, and some have only a queue family for
+ // everything and possibly a queue family just for compute/etc. If that
+ // fails then fallback to any queue that supports transfer. Finally, if
+ // /that/ fails then we just won't create a transfer queue and instead use
+ // the compute queue for all operations.
+ out_family_info->transfer_index =
+ iree_hal_vulkan_find_first_queue_family_with_flags(
+ queue_family_count, queue_family_properties, VK_QUEUE_TRANSFER_BIT,
+ VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT);
+ if (out_family_info->transfer_index ==
+ IREE_HAL_VULKAN_INVALID_QUEUE_FAMILY_INDEX) {
+ out_family_info->transfer_index =
+ iree_hal_vulkan_find_first_queue_family_with_flags(
+ queue_family_count, queue_family_properties, VK_QUEUE_TRANSFER_BIT,
+ VK_QUEUE_GRAPHICS_BIT);
+ }
+ if (out_family_info->transfer_index ==
+ IREE_HAL_VULKAN_INVALID_QUEUE_FAMILY_INDEX) {
+ out_family_info->transfer_index =
+ iree_hal_vulkan_find_first_queue_family_with_flags(
+ queue_family_count, queue_family_properties, VK_QUEUE_TRANSFER_BIT,
+ 0);
+ }
+ if (out_family_info->transfer_index !=
+ IREE_HAL_VULKAN_INVALID_QUEUE_FAMILY_INDEX) {
+ out_family_info->transfer_queue_count =
+ queue_family_properties[out_family_info->transfer_index].queueCount;
+ }
+
+ // Ensure that we don't share the dispatch queues with transfer queues if
+ // that would put us over the queue count.
+ if (out_family_info->dispatch_index == out_family_info->transfer_index) {
+ out_family_info->transfer_queue_count = iree_min(
+ queue_family_properties[out_family_info->dispatch_index].queueCount -
+ out_family_info->dispatch_queue_count,
+ out_family_info->transfer_queue_count);
+ }
+
+ // Limit the number of queues we create (for now).
+ // We may want to allow this to grow, but each queue adds overhead and we
+ // need to measure to make sure we can effectively use them all.
+ out_family_info->dispatch_queue_count =
+ iree_min(2u, out_family_info->dispatch_queue_count);
+ out_family_info->transfer_queue_count =
+ iree_min(1u, out_family_info->transfer_queue_count);
+
+ return iree_ok_status();
+}
+
+// Builds a set of compute and transfer queues based on the queues available on
+// the device and some magic heuristical goo.
+static iree_status_t iree_hal_vulkan_build_queue_sets(
+ VkPhysicalDevice physical_device, iree::hal::vulkan::DynamicSymbols* syms,
+ iree_hal_vulkan_queue_set_t* out_compute_queue_set,
+ iree_hal_vulkan_queue_set_t* out_transfer_queue_set) {
+ // Select which queues to use (and fail the implementation can't handle them).
+ iree_hal_vulkan_queue_family_info_t queue_family_info;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_select_queue_families(
+ physical_device, syms, &queue_family_info));
+
+ // Build queue indices for the selected queue families.
+ memset(out_compute_queue_set, 0, sizeof(*out_compute_queue_set));
+ out_compute_queue_set->queue_family_index = queue_family_info.dispatch_index;
+ for (iree_host_size_t i = 0; i < queue_family_info.dispatch_queue_count;
+ ++i) {
+ out_compute_queue_set->queue_indices |= 1ull << i;
+ }
+
+ memset(out_transfer_queue_set, 0, sizeof(*out_transfer_queue_set));
+ out_transfer_queue_set->queue_family_index = queue_family_info.transfer_index;
+ uint32_t base_queue_index = 0;
+ if (queue_family_info.dispatch_index == queue_family_info.transfer_index) {
+ // Sharing a family, so transfer queues follow compute queues.
+ base_queue_index = queue_family_info.dispatch_index;
+ }
+ for (iree_host_size_t i = 0; i < queue_family_info.transfer_queue_count;
+ ++i) {
+ out_transfer_queue_set->queue_indices |= 1ull << (i + base_queue_index);
+ }
+
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_vulkan_device_t
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_vulkan_device_t {
+ iree_hal_resource_t resource;
+ iree_string_view_t identifier;
+
+ // Optional driver that owns the instance. We retain it for our lifetime to
+ // ensure the instance remains valid.
+ iree_hal_driver_t* driver;
+
+ // Flags overriding default device behavior.
+ iree_hal_vulkan_device_flags_t flags;
+ // Which optional extensions are active and available on the device.
+ iree_hal_vulkan_device_extensions_t device_extensions;
+
+ VkInstance instance;
+ VkPhysicalDevice physical_device;
+ VkDeviceHandle* logical_device;
+
+ iree_allocator_t host_allocator;
+ iree_hal_allocator_t* device_allocator;
+
+ // All queues available on the device; the device owns these.
+ iree_host_size_t queue_count;
+ CommandQueue** queues;
+ // The subset of queues that support dispatch operations. May overlap with
+ // transfer_queues.
+ iree_host_size_t dispatch_queue_count;
+ CommandQueue** dispatch_queues;
+ // The subset of queues that support transfer operations. May overlap with
+ // dispatch_queues.
+ iree_host_size_t transfer_queue_count;
+ CommandQueue** transfer_queues;
+
+ // |queue_count| tracing contexts, if tracing is enabled.
+ iree_hal_vulkan_tracing_context_t** queue_tracing_contexts;
+
+ DescriptorPoolCache* descriptor_pool_cache;
+
+ VkCommandPoolHandle* dispatch_command_pool;
+ VkCommandPoolHandle* transfer_command_pool;
+
+ // Block pool used for command buffers with a larger block size (as command
+ // buffers can contain inlined data uploads).
+ iree_arena_block_pool_t block_pool;
+
+ // Used only for emulated timeline semaphores.
+ TimePointSemaphorePool* semaphore_pool;
+ TimePointFencePool* fence_pool;
+
+ BuiltinExecutables* builtin_executables;
+} iree_hal_vulkan_device_t;
+
+namespace {
+extern const iree_hal_device_vtable_t iree_hal_vulkan_device_vtable;
+} // namespace
+
+static iree_hal_vulkan_device_t* iree_hal_vulkan_device_cast(
+ iree_hal_device_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_vulkan_device_vtable);
+ return (iree_hal_vulkan_device_t*)base_value;
+}
+
+IREE_API_EXPORT void iree_hal_vulkan_device_options_initialize(
+ iree_hal_vulkan_device_options_t* out_options) {
+ memset(out_options, 0, sizeof(*out_options));
+ out_options->flags = 0;
+}
+
+// Creates a transient command pool for the given queue family.
+// Command buffers allocated from the pool must only be issued on queues
+// belonging to the specified family.
+static iree_status_t iree_hal_vulkan_create_transient_command_pool(
+ VkDeviceHandle* logical_device, uint32_t queue_family_index,
+ VkCommandPoolHandle** out_handle) {
+ VkCommandPoolCreateInfo create_info;
+ create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+ create_info.pNext = NULL;
+ create_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
+ create_info.queueFamilyIndex = queue_family_index;
+ VkCommandPoolHandle* command_pool = new VkCommandPoolHandle(logical_device);
+ iree_status_t status = VK_RESULT_TO_STATUS(
+ logical_device->syms()->vkCreateCommandPool(
+ *logical_device, &create_info, logical_device->allocator(),
+ command_pool->mutable_value()),
+ "vkCreateCommandPool");
+ if (iree_status_is_ok(status)) {
+ *out_handle = command_pool;
+ } else {
+ delete command_pool;
+ }
+ return status;
+}
+
+// Creates a command queue of the given queue family.
+static CommandQueue* iree_hal_vulkan_device_create_queue(
+ VkDeviceHandle* logical_device,
+ iree_hal_command_category_t command_category, uint32_t queue_family_index,
+ uint32_t queue_index, TimePointFencePool* fence_pool) {
+ VkQueue queue = VK_NULL_HANDLE;
+ logical_device->syms()->vkGetDeviceQueue(*logical_device, queue_family_index,
+ queue_index, &queue);
+
+ // When emulating timeline semaphores we use a special queue that allows us to
+ // sequence the semaphores correctly.
+ if (fence_pool != NULL) {
+ return new SerializingCommandQueue(logical_device, command_category, queue,
+ fence_pool);
+ }
+
+ return new DirectCommandQueue(logical_device, command_category, queue);
+}
+
+// Creates command queues for the given sets of queues and populates the
+// device queue lists.
+static iree_status_t iree_hal_vulkan_device_initialize_command_queues(
+ iree_hal_vulkan_device_t* device,
+ iree_hal_vulkan_features_t enabled_features,
+ iree_string_view_t queue_prefix,
+ const iree_hal_vulkan_queue_set_t* compute_queue_set,
+ const iree_hal_vulkan_queue_set_t* transfer_queue_set) {
+ device->queue_count = 0;
+ device->dispatch_queue_count = 0;
+ device->transfer_queue_count = 0;
+
+ // The first available queue supporting dispatch commands that will be used by
+ // the tracing subsystem for query and cleanup tasks.
+ VkQueue maintenance_dispatch_queue = VK_NULL_HANDLE;
+
+ uint64_t compute_queue_count =
+ iree_math_count_ones_u64(compute_queue_set->queue_indices);
+ uint64_t transfer_queue_count =
+ iree_math_count_ones_u64(transfer_queue_set->queue_indices);
+ for (iree_host_size_t i = 0; i < compute_queue_count; ++i) {
+ if (!(compute_queue_set->queue_indices & (1ull << i))) continue;
+
+ char queue_name_buffer[32];
+ int queue_name_length =
+ snprintf(queue_name_buffer, IREE_ARRAYSIZE(queue_name_buffer),
+ "Vulkan[%c:%d]", 'D', (int)device->dispatch_queue_count);
+ iree_string_view_t queue_name =
+ iree_make_string_view(queue_name_buffer, queue_name_length);
+
+ CommandQueue* queue = iree_hal_vulkan_device_create_queue(
+ device->logical_device, IREE_HAL_COMMAND_CATEGORY_ANY,
+ compute_queue_set->queue_family_index, i, device->fence_pool);
+
+ iree_host_size_t queue_index = device->queue_count++;
+ device->queues[queue_index] = queue;
+ device->dispatch_queues[device->dispatch_queue_count++] = queue;
+
+ if (!transfer_queue_count) {
+ // If we don't have any dedicated transfer queues then use all dispatch
+ // queues as transfer queues.
+ device->transfer_queues[device->transfer_queue_count++] = queue;
+ }
+
+ if (maintenance_dispatch_queue == VK_NULL_HANDLE) {
+ maintenance_dispatch_queue = queue->handle();
+ }
+
+ if (iree_all_bits_set(enabled_features,
+ IREE_HAL_VULKAN_FEATURE_ENABLE_TRACING)) {
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_tracing_context_allocate(
+ device->physical_device, device->logical_device, queue->handle(),
+ queue_name, maintenance_dispatch_queue, device->dispatch_command_pool,
+ device->host_allocator,
+ &device->queue_tracing_contexts[queue_index]));
+ queue->set_tracing_context(device->queue_tracing_contexts[queue_index]);
+ }
+ }
+ for (iree_host_size_t i = 0; i < transfer_queue_count; ++i) {
+ if (!(transfer_queue_set->queue_indices & (1ull << i))) continue;
+
+ char queue_name_buffer[32];
+ int queue_name_length =
+ snprintf(queue_name_buffer, IREE_ARRAYSIZE(queue_name_buffer),
+ "Vulkan[%c:%d]", 'T', (int)device->transfer_queue_count);
+ iree_string_view_t queue_name =
+ iree_make_string_view(queue_name_buffer, queue_name_length);
+
+ CommandQueue* queue = iree_hal_vulkan_device_create_queue(
+ device->logical_device, IREE_HAL_COMMAND_CATEGORY_TRANSFER,
+ transfer_queue_set->queue_family_index, i, device->fence_pool);
+
+ iree_host_size_t queue_index = device->queue_count++;
+ device->queues[queue_index] = queue;
+ device->transfer_queues[device->transfer_queue_count++] = queue;
+
+ if (iree_all_bits_set(enabled_features,
+ IREE_HAL_VULKAN_FEATURE_ENABLE_TRACING)) {
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_tracing_context_allocate(
+ device->physical_device, device->logical_device, queue->handle(),
+ queue_name, maintenance_dispatch_queue, device->dispatch_command_pool,
+ device->host_allocator,
+ &device->queue_tracing_contexts[queue_index]));
+ queue->set_tracing_context(device->queue_tracing_contexts[queue_index]);
+ }
+ }
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_device_create_internal(
+ iree_hal_driver_t* driver, iree_string_view_t identifier,
+ iree_hal_vulkan_features_t enabled_features,
+ const iree_hal_vulkan_device_options_t* options, VkInstance instance,
+ VkPhysicalDevice physical_device, VkDeviceHandle* logical_device,
+ const iree_hal_vulkan_device_extensions_t* device_extensions,
+ const iree_hal_vulkan_queue_set_t* compute_queue_set,
+ const iree_hal_vulkan_queue_set_t* transfer_queue_set,
+ iree_allocator_t host_allocator, iree_hal_device_t** out_device) {
+ auto& device_syms = logical_device->syms();
+
+ iree_host_size_t compute_queue_count =
+ iree_math_count_ones_u64(compute_queue_set->queue_indices);
+ iree_host_size_t transfer_queue_count =
+ iree_math_count_ones_u64(transfer_queue_set->queue_indices);
+ iree_host_size_t total_queue_count =
+ compute_queue_count + transfer_queue_count;
+
+ iree_hal_vulkan_device_t* device = NULL;
+ iree_host_size_t total_size =
+ sizeof(*device) + identifier.size +
+ total_queue_count * sizeof(device->queues[0]) +
+ total_queue_count * sizeof(device->dispatch_queues[0]) +
+ total_queue_count * sizeof(device->transfer_queues[0]) +
+ total_queue_count * sizeof(device->queue_tracing_contexts[0]);
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(host_allocator, total_size, (void**)&device));
+ memset(device, 0, total_size);
+ iree_hal_resource_initialize(&iree_hal_vulkan_device_vtable,
+ &device->resource);
+ device->host_allocator = host_allocator;
+ device->driver = driver;
+ iree_hal_driver_retain(device->driver);
+ uint8_t* buffer_ptr = (uint8_t*)device + sizeof(*device);
+ buffer_ptr += iree_string_view_append_to_buffer(
+ identifier, &device->identifier, (char*)buffer_ptr);
+ device->flags = options->flags;
+
+ device->device_extensions = *device_extensions;
+ device->instance = instance;
+ device->physical_device = physical_device;
+ device->logical_device = logical_device;
+ device->logical_device->AddReference();
+
+ iree_arena_block_pool_initialize(32 * 1024, host_allocator,
+ &device->block_pool);
+
+ // Point the queue storage into the new device allocation. The queues
+ // themselves are populated
+ device->queues = (CommandQueue**)buffer_ptr;
+ buffer_ptr += total_queue_count * sizeof(device->queues[0]);
+ device->dispatch_queues = (CommandQueue**)buffer_ptr;
+ buffer_ptr += total_queue_count * sizeof(device->dispatch_queues[0]);
+ device->transfer_queues = (CommandQueue**)buffer_ptr;
+ buffer_ptr += total_queue_count * sizeof(device->transfer_queues[0]);
+ device->queue_tracing_contexts =
+ (iree_hal_vulkan_tracing_context_t**)buffer_ptr;
+ buffer_ptr += total_queue_count * sizeof(device->queue_tracing_contexts[0]);
+
+ device->descriptor_pool_cache =
+ new DescriptorPoolCache(device->logical_device);
+
+ // Create the device memory allocator that will service all buffer
+ // allocation requests.
+ iree_status_t status = iree_hal_vulkan_vma_allocator_create(
+ instance, physical_device, logical_device, (iree_hal_device_t*)device,
+ &device->device_allocator);
+
+ // Create command pools for each queue family. If we don't have a transfer
+ // queue then we'll ignore that one and just use the dispatch pool.
+ // If we wanted to expose the pools through the HAL to allow the VM to more
+ // effectively manage them (pool per fiber, etc) we could, however I doubt
+ // the overhead of locking the pool will be even a blip.
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_vulkan_create_transient_command_pool(
+ device->logical_device, compute_queue_set->queue_family_index,
+ &device->dispatch_command_pool);
+ }
+ if (transfer_queue_set->queue_indices != 0 && iree_status_is_ok(status)) {
+ status = iree_hal_vulkan_create_transient_command_pool(
+ device->logical_device, transfer_queue_set->queue_family_index,
+ &device->transfer_command_pool);
+ }
+
+ // Emulate timeline semaphores when the extension is not available and we are
+ // ony Vulkan versions prior to 1.2 when they were made core.
+ bool emulate_timeline_semaphores =
+ device_syms->vkGetSemaphoreCounterValue == NULL ||
+ iree_all_bits_set(
+ options->flags,
+ IREE_HAL_VULKAN_DEVICE_FORCE_TIMELINE_SEMAPHORE_EMULATION);
+ if (emulate_timeline_semaphores && iree_status_is_ok(status)) {
+ status = TimePointSemaphorePool::Create(device->logical_device,
+ &device->semaphore_pool);
+ }
+ if (emulate_timeline_semaphores && iree_status_is_ok(status)) {
+ status =
+ TimePointFencePool::Create(device->logical_device, &device->fence_pool);
+ }
+
+ // Initialize queues now that we've completed the rest of the device
+ // initialization; this happens last as the queues require the pools allocated
+ // above.
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_vulkan_device_initialize_command_queues(
+ device, enabled_features, identifier, compute_queue_set,
+ transfer_queue_set);
+ }
+
+ if (iree_status_is_ok(status)) {
+ device->builtin_executables =
+ new BuiltinExecutables(device->logical_device);
+ status = device->builtin_executables->InitializeExecutables();
+ }
+
+ if (iree_status_is_ok(status)) {
+ *out_device = (iree_hal_device_t*)device;
+ } else {
+ iree_hal_device_destroy((iree_hal_device_t*)device);
+ }
+ return status;
+}
+
+static void iree_hal_vulkan_device_destroy(iree_hal_device_t* base_device) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+ iree_allocator_t host_allocator = iree_hal_device_host_allocator(base_device);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Drop all command queues. These may wait until idle in their destructor.
+ for (iree_host_size_t i = 0; i < device->queue_count; ++i) {
+ delete device->queues[i];
+ iree_hal_vulkan_tracing_context_free(device->queue_tracing_contexts[i]);
+ }
+
+ // Drop command pools now that we know there are no more outstanding command
+ // buffers.
+ delete device->dispatch_command_pool;
+ delete device->transfer_command_pool;
+
+ // Now that no commands are outstanding we can release all resources that may
+ // have been in use.
+ delete device->builtin_executables;
+ delete device->descriptor_pool_cache;
+ delete device->semaphore_pool;
+ delete device->fence_pool;
+
+ // There should be no more buffers live that use the allocator.
+ iree_hal_allocator_release(device->device_allocator);
+
+ // All arena blocks should have been returned.
+ iree_arena_block_pool_deinitialize(&device->block_pool);
+
+ // Finally, destroy the device.
+ device->logical_device->ReleaseReference();
+ iree_hal_driver_release(device->driver);
+
+ iree_allocator_free(host_allocator, device);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_vulkan_device_query_extensibility_set(
+ iree_hal_vulkan_features_t requested_features,
+ iree_hal_vulkan_extensibility_set_t set, iree::Arena* arena,
+ iree_hal_vulkan_string_list_t* out_string_list) {
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_query_extensibility_set(
+ requested_features, set, 0, NULL, &out_string_list->count));
+ out_string_list->values = (const char**)arena->AllocateBytes(
+ out_string_list->count * sizeof(out_string_list->values[0]));
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_query_extensibility_set(
+ requested_features, set, out_string_list->count, out_string_list->values,
+ &out_string_list->count));
+ return iree_ok_status();
+}
+
+iree_status_t iree_hal_vulkan_device_create(
+ iree_hal_driver_t* driver, iree_string_view_t identifier,
+ iree_hal_vulkan_features_t enabled_features,
+ const iree_hal_vulkan_device_options_t* options,
+ iree_hal_vulkan_syms_t* opaque_syms, VkInstance instance,
+ VkPhysicalDevice physical_device, iree_allocator_t host_allocator,
+ iree_hal_device_t** out_device) {
+ DynamicSymbols* instance_syms = (DynamicSymbols*)opaque_syms;
+
+ // Find the extensions we need (or want) that are also available
+ // on the device. This will fail when required ones are not present.
+ // TODO(benvanik): replace with a real arena.
+ iree::Arena arena(128 * 1024);
+ iree_hal_vulkan_string_list_t required_extensions;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_device_query_extensibility_set(
+ enabled_features,
+ IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_REQUIRED, &arena,
+ &required_extensions));
+ iree_hal_vulkan_string_list_t optional_extensions;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_device_query_extensibility_set(
+ enabled_features,
+ IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL, &arena,
+ &optional_extensions));
+ iree_hal_vulkan_string_list_t enabled_extensions;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_match_available_device_extensions(
+ instance_syms, physical_device, &required_extensions,
+ &optional_extensions, &arena, &enabled_extensions));
+ iree_hal_vulkan_device_extensions_t enabled_device_extensions =
+ iree_hal_vulkan_populate_enabled_device_extensions(&enabled_extensions);
+
+ // Find queue families we will expose as HAL queues.
+ iree_hal_vulkan_queue_family_info_t queue_family_info;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_select_queue_families(
+ physical_device, instance_syms, &queue_family_info));
+
+ bool has_dedicated_transfer_queues =
+ queue_family_info.transfer_queue_count > 0;
+
+ // TODO(benvanik): convert to using the arena.
+ // Setup the queue info we'll be using.
+ // Each queue here (created from within a family) will map to a HAL queue.
+ //
+ // Note that we need to handle the case where we have transfer queues that
+ // are of the same queue family as the dispatch queues: Vulkan requires that
+ // all queues created from the same family are done in the same
+ // VkDeviceQueueCreateInfo struct.
+ std::vector<VkDeviceQueueCreateInfo> queue_create_info;
+ // Reserve space for create infos. Note: must be the maximum used, or else
+ // references used below will be invalidated as the vector grows.
+ queue_create_info.reserve(2);
+ std::vector<float> dispatch_queue_priorities;
+ std::vector<float> transfer_queue_priorities;
+ queue_create_info.push_back({});
+ auto& dispatch_queue_info = queue_create_info.back();
+ dispatch_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+ dispatch_queue_info.pNext = NULL;
+ dispatch_queue_info.flags = 0;
+ dispatch_queue_info.queueFamilyIndex = queue_family_info.dispatch_index;
+ dispatch_queue_info.queueCount = queue_family_info.dispatch_queue_count;
+ if (has_dedicated_transfer_queues) {
+ if (queue_family_info.dispatch_index == queue_family_info.transfer_index) {
+ dispatch_queue_info.queueCount += queue_family_info.transfer_queue_count;
+ } else {
+ queue_create_info.push_back({});
+ auto& transfer_queue_info = queue_create_info.back();
+ transfer_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+ transfer_queue_info.pNext = NULL;
+ transfer_queue_info.queueFamilyIndex = queue_family_info.transfer_index;
+ transfer_queue_info.queueCount = queue_family_info.transfer_queue_count;
+ transfer_queue_info.flags = 0;
+ transfer_queue_priorities.resize(transfer_queue_info.queueCount);
+ transfer_queue_info.pQueuePriorities = transfer_queue_priorities.data();
+ }
+ }
+ dispatch_queue_priorities.resize(dispatch_queue_info.queueCount);
+ dispatch_queue_info.pQueuePriorities = dispatch_queue_priorities.data();
+
+ // Create device and its queues.
+ VkDeviceCreateInfo device_create_info;
+ memset(&device_create_info, 0, sizeof(device_create_info));
+ device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
+ device_create_info.enabledLayerCount = 0;
+ device_create_info.ppEnabledLayerNames = NULL;
+ device_create_info.enabledExtensionCount = enabled_extensions.count;
+ device_create_info.ppEnabledExtensionNames = enabled_extensions.values;
+ device_create_info.queueCreateInfoCount = queue_create_info.size();
+ device_create_info.pQueueCreateInfos = queue_create_info.data();
+ device_create_info.pEnabledFeatures = NULL;
+
+ VkPhysicalDeviceFeatures2 features2;
+ memset(&features2, 0, sizeof(features2));
+ features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+ device_create_info.pNext = &features2;
+
+ VkPhysicalDeviceTimelineSemaphoreFeatures semaphore_features;
+ bool emulate_timeline_semaphores =
+ !enabled_device_extensions.timeline_semaphore ||
+ iree_all_bits_set(
+ options->flags,
+ IREE_HAL_VULKAN_DEVICE_FORCE_TIMELINE_SEMAPHORE_EMULATION);
+ if (!emulate_timeline_semaphores) {
+ memset(&semaphore_features, 0, sizeof(semaphore_features));
+ semaphore_features.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES;
+ semaphore_features.pNext = features2.pNext;
+ features2.pNext = &semaphore_features;
+ semaphore_features.timelineSemaphore = VK_TRUE;
+ }
+
+ VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset_features;
+ if (enabled_device_extensions.host_query_reset) {
+ memset(&host_query_reset_features, 0, sizeof(host_query_reset_features));
+ host_query_reset_features.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT;
+ host_query_reset_features.pNext = features2.pNext;
+ features2.pNext = &host_query_reset_features;
+ host_query_reset_features.hostQueryReset = VK_TRUE;
+ }
+
+ auto logical_device = new VkDeviceHandle(
+ instance_syms, enabled_device_extensions,
+ /*owns_device=*/true, host_allocator, /*allocator=*/NULL);
+
+ iree_status_t status = VK_RESULT_TO_STATUS(
+ instance_syms->vkCreateDevice(physical_device, &device_create_info,
+ logical_device->allocator(),
+ logical_device->mutable_value()),
+ "vkCreateDevice");
+ if (iree_status_is_ok(status)) {
+ status = logical_device->syms()->LoadFromDevice(instance,
+ logical_device->value());
+ }
+
+ // Select queue indices and create command queues with them.
+ iree_hal_vulkan_queue_set_t compute_queue_set;
+ iree_hal_vulkan_queue_set_t transfer_queue_set;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_vulkan_build_queue_sets(
+ physical_device, logical_device->syms().get(), &compute_queue_set,
+ &transfer_queue_set);
+ }
+
+ // Allocate and initialize the device.
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_vulkan_device_create_internal(
+ driver, identifier, enabled_features, options, instance,
+ physical_device, logical_device, &enabled_device_extensions,
+ &compute_queue_set, &transfer_queue_set, host_allocator, out_device);
+ }
+
+ logical_device->ReleaseReference();
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_vulkan_wrap_device(
+ iree_string_view_t identifier,
+ const iree_hal_vulkan_device_options_t* options,
+ const iree_hal_vulkan_syms_t* instance_syms, VkInstance instance,
+ VkPhysicalDevice physical_device, VkDevice logical_device,
+ const iree_hal_vulkan_queue_set_t* compute_queue_set,
+ const iree_hal_vulkan_queue_set_t* transfer_queue_set,
+ iree_allocator_t host_allocator, iree_hal_device_t** out_device) {
+ IREE_ASSERT_ARGUMENT(instance_syms);
+ IREE_ASSERT_ARGUMENT(instance);
+ IREE_ASSERT_ARGUMENT(physical_device);
+ IREE_ASSERT_ARGUMENT(logical_device);
+ IREE_ASSERT_ARGUMENT(out_device);
+
+ if (iree_math_count_ones_u64(compute_queue_set->queue_indices) == 0) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "at least one compute queue is required");
+ }
+
+ // Grab symbols from the device.
+ auto device_syms = iree::make_ref<DynamicSymbols>();
+ device_syms->vkGetInstanceProcAddr =
+ ((const DynamicSymbols*)instance_syms)->vkGetInstanceProcAddr;
+ IREE_RETURN_IF_ERROR(device_syms->LoadFromDevice(instance, logical_device));
+
+ // Since the device is already created, we can't actually enable any
+ // extensions or query if they are really enabled - we just have to trust
+ // that the caller already enabled them for us or we may fail later. For the
+ // optional extensions we check for the symbols but this is not always
+ // guaranteed to work.
+ iree_hal_vulkan_device_extensions_t enabled_device_extensions =
+ iree_hal_vulkan_infer_enabled_device_extensions(device_syms.get());
+
+ iree_hal_vulkan_features_t enabled_features = 0;
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ enabled_features |= IREE_HAL_VULKAN_FEATURE_ENABLE_TRACING;
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+ // Wrap the provided VkDevice with a VkDeviceHandle for use within the HAL.
+ auto logical_device_handle = new VkDeviceHandle(
+ device_syms.get(), enabled_device_extensions,
+ /*owns_device=*/false, host_allocator, /*allocator=*/NULL);
+ *logical_device_handle->mutable_value() = logical_device;
+
+ // Allocate and initialize the device.
+ iree_status_t status = iree_hal_vulkan_device_create_internal(
+ /*driver=*/NULL, identifier, enabled_features, options, instance,
+ physical_device, logical_device_handle, &enabled_device_extensions,
+ compute_queue_set, transfer_queue_set, host_allocator, out_device);
+
+ logical_device_handle->ReleaseReference();
+ return status;
+}
+
+static iree_string_view_t iree_hal_vulkan_device_id(
+ iree_hal_device_t* base_device) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+ return device->identifier;
+}
+
+static iree_allocator_t iree_hal_vulkan_device_host_allocator(
+ iree_hal_device_t* base_device) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+ return device->host_allocator;
+}
+
+static iree_hal_allocator_t* iree_hal_vulkan_device_allocator(
+ iree_hal_device_t* base_device) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+ return device->device_allocator;
+}
+
+static iree_status_t iree_hal_vulkan_device_trim(
+ iree_hal_device_t* base_device) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+ iree_arena_block_pool_trim(&device->block_pool);
+ return iree_hal_allocator_trim(device->device_allocator);
+}
+
+static iree_status_t iree_hal_vulkan_device_query_i32(
+ iree_hal_device_t* base_device, iree_string_view_t category,
+ iree_string_view_t key, int32_t* out_value) {
+ // iree_hal_vulkan_device_t* device =
+ // iree_hal_vulkan_device_cast(base_device);
+ *out_value = 0;
+
+ if (iree_string_view_equal(category,
+ iree_make_cstring_view("hal.executable.format"))) {
+ *out_value =
+ iree_string_view_equal(key, iree_make_cstring_view("vulkan-spirv-fb"))
+ ? 1
+ : 0;
+ return iree_ok_status();
+ }
+
+ return iree_make_status(
+ IREE_STATUS_NOT_FOUND,
+ "unknown device configuration key value '%.*s :: %.*s'",
+ (int)category.size, category.data, (int)key.size, key.data);
+}
+
+// Returns the queue to submit work to based on the |queue_affinity|.
+static CommandQueue* iree_hal_vulkan_device_select_queue(
+ iree_hal_vulkan_device_t* device,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity) {
+ // TODO(scotttodd): revisit queue selection logic and remove this
+ // * the unaligned buffer fill polyfill and tracing timestamp queries may
+ // both insert dispatches into command buffers that at compile time are
+ // expected to only contain transfer commands
+ // * we could set a bit at recording time if emulation or tracing is used
+ // and submit to the right queue based on that
+ command_categories |= IREE_HAL_COMMAND_CATEGORY_DISPATCH;
+
+ // TODO(benvanik): meaningful heuristics for affinity. We don't generate
+ // anything from the compiler that uses multiple queues and until we do it's
+ // best not to do anything too clever here.
+ if (command_categories == IREE_HAL_COMMAND_CATEGORY_TRANSFER) {
+ return device
+ ->transfer_queues[queue_affinity % device->transfer_queue_count];
+ }
+ return device->dispatch_queues[queue_affinity % device->dispatch_queue_count];
+}
+
+static iree_status_t iree_hal_vulkan_device_create_command_buffer(
+ iree_hal_device_t* base_device, iree_hal_command_buffer_mode_t mode,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity,
+ iree_hal_command_buffer_t** out_command_buffer) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+
+ // TODO(scotttodd): revisit queue selection logic and remove this
+ // * the unaligned buffer fill polyfill and tracing timestamp queries may
+ // both insert dispatches into command buffers that at compile time are
+ // expected to only contain transfer commands
+ // * we could set a bit at recording time if emulation or tracing is used
+ // and submit to the right queue based on that
+ command_categories |= IREE_HAL_COMMAND_CATEGORY_DISPATCH;
+
+ // Select the command pool to used based on the types of commands used.
+ // Note that we may not have a dedicated transfer command pool if there are
+ // no dedicated transfer queues.
+ VkCommandPoolHandle* command_pool = NULL;
+ if (device->transfer_command_pool &&
+ !iree_all_bits_set(command_categories,
+ IREE_HAL_COMMAND_CATEGORY_DISPATCH)) {
+ command_pool = device->transfer_command_pool;
+ } else {
+ command_pool = device->dispatch_command_pool;
+ }
+
+ // The tracing context is tied to a particular queue so we must select here
+ // even though ideally we'd do it during submission. This is informational
+ // only and if the user does provide a different queue affinity during
+ // submission it just means the commands will be attributed to the wrong
+ // queue.
+ CommandQueue* queue = iree_hal_vulkan_device_select_queue(
+ device, command_categories, queue_affinity);
+
+ return iree_hal_vulkan_direct_command_buffer_allocate(
+ base_device, device->logical_device, command_pool, mode,
+ command_categories, queue_affinity, queue->tracing_context(),
+ device->descriptor_pool_cache, device->builtin_executables,
+ &device->block_pool, out_command_buffer);
+}
+
+static iree_status_t iree_hal_vulkan_device_create_descriptor_set(
+ iree_hal_device_t* base_device,
+ iree_hal_descriptor_set_layout_t* set_layout,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_binding_t* bindings,
+ iree_hal_descriptor_set_t** out_descriptor_set) {
+ // TODO(benvanik): rework the create fn to take the bindings.
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "non-push descriptor sets still need work");
+}
+
+static iree_status_t iree_hal_vulkan_device_create_descriptor_set_layout(
+ iree_hal_device_t* base_device,
+ iree_hal_descriptor_set_layout_usage_type_t usage_type,
+ iree_host_size_t binding_count,
+ const iree_hal_descriptor_set_layout_binding_t* bindings,
+ iree_hal_descriptor_set_layout_t** out_descriptor_set_layout) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+ return iree_hal_vulkan_native_descriptor_set_layout_create(
+ device->logical_device, usage_type, binding_count, bindings,
+ out_descriptor_set_layout);
+}
+
+static iree_status_t iree_hal_vulkan_device_create_event(
+ iree_hal_device_t* base_device, iree_hal_event_t** out_event) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+ return iree_hal_vulkan_native_event_create(device->logical_device, out_event);
+}
+
+static iree_status_t iree_hal_vulkan_device_create_executable_cache(
+ iree_hal_device_t* base_device, iree_string_view_t identifier,
+ iree_loop_t loop, iree_hal_executable_cache_t** out_executable_cache) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+ return iree_hal_vulkan_nop_executable_cache_create(
+ device->logical_device, identifier, out_executable_cache);
+}
+
+static iree_status_t iree_hal_vulkan_device_create_executable_layout(
+ iree_hal_device_t* base_device, iree_host_size_t push_constants,
+ iree_host_size_t set_layout_count,
+ iree_hal_descriptor_set_layout_t** set_layouts,
+ iree_hal_executable_layout_t** out_executable_layout) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+ return iree_hal_vulkan_native_executable_layout_create(
+ device->logical_device, push_constants, set_layout_count, set_layouts,
+ out_executable_layout);
+}
+
+static iree_status_t iree_hal_vulkan_device_create_semaphore(
+ iree_hal_device_t* base_device, uint64_t initial_value,
+ iree_hal_semaphore_t** out_semaphore) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+ if (device->semaphore_pool != NULL) {
+ return iree_hal_vulkan_emulated_semaphore_create(
+ device->logical_device, device->semaphore_pool, device->queue_count,
+ device->queues, initial_value, out_semaphore);
+ }
+ return iree_hal_vulkan_native_semaphore_create(device->logical_device,
+ initial_value, out_semaphore);
+}
+
+static iree_status_t iree_hal_vulkan_device_queue_submit(
+ iree_hal_device_t* base_device,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+ CommandQueue* queue = iree_hal_vulkan_device_select_queue(
+ device, command_categories, queue_affinity);
+ return queue->Submit(batch_count, batches);
+}
+
+static iree_status_t iree_hal_vulkan_device_submit_and_wait(
+ iree_hal_device_t* base_device,
+ iree_hal_command_category_t command_categories,
+ iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
+ const iree_hal_submission_batch_t* batches,
+ iree_hal_semaphore_t* wait_semaphore, uint64_t wait_value,
+ iree_timeout_t timeout) {
+ // Submit...
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_device_queue_submit(
+ base_device, command_categories, queue_affinity, batch_count, batches));
+
+ // ...and wait.
+ return iree_hal_semaphore_wait(wait_semaphore, wait_value, timeout);
+}
+
+static iree_status_t iree_hal_vulkan_device_wait_semaphores(
+ iree_hal_device_t* base_device, iree_hal_wait_mode_t wait_mode,
+ const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+ VkSemaphoreWaitFlags wait_flags = 0;
+ if (wait_mode == IREE_HAL_WAIT_MODE_ANY) {
+ wait_flags |= VK_SEMAPHORE_WAIT_ANY_BIT;
+ }
+ if (device->semaphore_pool != NULL) {
+ return iree_hal_vulkan_emulated_semaphore_multi_wait(
+ device->logical_device, semaphore_list, timeout, wait_flags);
+ }
+ return iree_hal_vulkan_native_semaphore_multi_wait(
+ device->logical_device, semaphore_list, timeout, wait_flags);
+}
+
+static iree_status_t iree_hal_vulkan_device_wait_idle(
+ iree_hal_device_t* base_device, iree_timeout_t timeout) {
+ iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+ for (iree_host_size_t i = 0; i < device->queue_count; ++i) {
+ IREE_RETURN_IF_ERROR(device->queues[i]->WaitIdle(timeout));
+ }
+ return iree_ok_status();
+}
+
+namespace {
+const iree_hal_device_vtable_t iree_hal_vulkan_device_vtable = {
+ /*.destroy=*/iree_hal_vulkan_device_destroy,
+ /*.id=*/iree_hal_vulkan_device_id,
+ /*.host_allocator=*/iree_hal_vulkan_device_host_allocator,
+ /*.device_allocator=*/iree_hal_vulkan_device_allocator,
+ /*.trim=*/iree_hal_vulkan_device_trim,
+ /*.query_i32=*/iree_hal_vulkan_device_query_i32,
+ /*.create_command_buffer=*/iree_hal_vulkan_device_create_command_buffer,
+ /*.create_descriptor_set=*/iree_hal_vulkan_device_create_descriptor_set,
+ /*.create_descriptor_set_layout=*/
+ iree_hal_vulkan_device_create_descriptor_set_layout,
+ /*.create_event=*/iree_hal_vulkan_device_create_event,
+ /*.create_executable_cache=*/
+ iree_hal_vulkan_device_create_executable_cache,
+ /*.create_executable_layout=*/
+ iree_hal_vulkan_device_create_executable_layout,
+ /*.create_semaphore=*/iree_hal_vulkan_device_create_semaphore,
+ /*.transfer_range=*/iree_hal_device_submit_transfer_range_and_wait,
+ /*.queue_submit=*/iree_hal_vulkan_device_queue_submit,
+ /*.submit_and_wait=*/
+ iree_hal_vulkan_device_submit_and_wait,
+ /*.wait_semaphores=*/iree_hal_vulkan_device_wait_semaphores,
+ /*.wait_idle=*/iree_hal_vulkan_device_wait_idle,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/vulkan/vulkan_device.h b/runtime/src/iree/hal/vulkan/vulkan_device.h
new file mode 100644
index 0000000..6cf2244
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/vulkan_device.h
@@ -0,0 +1,38 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_VULKAN_DEVICE_H_
+#define IREE_HAL_VULKAN_VULKAN_DEVICE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/api.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/extensibility_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a device that owns and manages its own VkDevice.
+//
+// The |driver| will be retained for as long as the device is live such that if
+// the driver owns the |instance| provided it is ensured to be valid. |driver|
+// may be NULL if there is no parent driver to retain (such as when wrapping
+// existing VkInstances provided by the application).
+iree_status_t iree_hal_vulkan_device_create(
+ iree_hal_driver_t* driver, iree_string_view_t identifier,
+ iree_hal_vulkan_features_t enabled_features,
+ const iree_hal_vulkan_device_options_t* options,
+ iree_hal_vulkan_syms_t* instance_syms, VkInstance instance,
+ VkPhysicalDevice physical_device, iree_allocator_t host_allocator,
+ iree_hal_device_t** out_device);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_VULKAN_VULKAN_DEVICE_H_
diff --git a/runtime/src/iree/hal/vulkan/vulkan_driver.cc b/runtime/src/iree/hal/vulkan/vulkan_driver.cc
new file mode 100644
index 0000000..e58e680
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/vulkan_driver.cc
@@ -0,0 +1,481 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/vulkan/vulkan_driver.h"
+
+#include <cstdint>
+#include <cstring>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/api.h"
+#include "iree/hal/vulkan/debug_reporter.h"
+#include "iree/hal/vulkan/dynamic_symbols.h"
+#include "iree/hal/vulkan/extensibility_util.h"
+#include "iree/hal/vulkan/status_util.h"
+#include "iree/hal/vulkan/util/arena.h"
+#include "iree/hal/vulkan/util/ref_ptr.h"
+#include "iree/hal/vulkan/vulkan_device.h"
+
+using namespace iree::hal::vulkan;
+
+typedef struct iree_hal_vulkan_driver_t {
+ iree_hal_resource_t resource;
+ iree_allocator_t host_allocator;
+
+ // Identifier used for the driver in the IREE driver registry.
+ // We allow overriding so that multiple Vulkan versions can be exposed in the
+ // same process.
+ iree_string_view_t identifier;
+
+ iree_hal_vulkan_device_options_t device_options;
+ int default_device_index;
+
+ iree_hal_vulkan_features_t enabled_features;
+
+ // Which optional extensions are active and available on the instance.
+ iree_hal_vulkan_instance_extensions_t instance_extensions;
+
+ // (Partial) loaded Vulkan symbols. Devices created within the driver may have
+ // different function pointers for device-specific functions that change
+ // behavior with enabled layers/extensions.
+ iree::ref_ptr<DynamicSymbols> syms;
+
+ // The Vulkan instance that all devices created from the driver will share.
+ VkInstance instance;
+ bool owns_instance;
+
+ // Optional debug reporter: may be disabled or unavailable (no debug layers).
+ iree_hal_vulkan_debug_reporter_t* debug_reporter;
+} iree_hal_vulkan_driver_t;
+
+namespace {
+extern const iree_hal_driver_vtable_t iree_hal_vulkan_driver_vtable;
+} // namespace
+
+static iree_hal_vulkan_driver_t* iree_hal_vulkan_driver_cast(
+ iree_hal_driver_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_vulkan_driver_vtable);
+ return (iree_hal_vulkan_driver_t*)base_value;
+}
+
+IREE_API_EXPORT void iree_hal_vulkan_driver_options_initialize(
+ iree_hal_vulkan_driver_options_t* out_options) {
+ memset(out_options, 0, sizeof(*out_options));
+ out_options->api_version = VK_API_VERSION_1_2;
+ out_options->requested_features = 0;
+ iree_hal_vulkan_device_options_initialize(&out_options->device_options);
+ out_options->default_device_index = 0;
+}
+
+// Returns a VkApplicationInfo struct populated with the default app info.
+// We may allow hosting applications to override this via weak-linkage if it's
+// useful, otherwise this is enough to create the application.
+static void iree_hal_vulkan_driver_populate_default_app_info(
+ const iree_hal_vulkan_driver_options_t* options,
+ VkApplicationInfo* out_app_info) {
+ memset(out_app_info, 0, sizeof(*out_app_info));
+ out_app_info->sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
+ out_app_info->pNext = NULL;
+ out_app_info->pApplicationName = "IREE-ML";
+ out_app_info->applicationVersion = 0;
+ out_app_info->pEngineName = "IREE";
+ out_app_info->engineVersion = 0;
+ out_app_info->apiVersion = options->api_version;
+}
+
+// NOTE: takes ownership of |instance|.
+static iree_status_t iree_hal_vulkan_driver_create_internal(
+ iree_string_view_t identifier,
+ const iree_hal_vulkan_driver_options_t* options,
+ const iree_hal_vulkan_string_list_t* enabled_extensions,
+ iree_hal_vulkan_syms_t* opaque_syms, VkInstance instance,
+ bool owns_instance, iree_allocator_t host_allocator,
+ iree_hal_driver_t** out_driver) {
+ auto* instance_syms = (DynamicSymbols*)opaque_syms;
+
+ iree_hal_vulkan_instance_extensions_t instance_extensions =
+ iree_hal_vulkan_populate_enabled_instance_extensions(enabled_extensions);
+
+ // The real debug messenger (not just the static one used above) can now be
+ // created as we've loaded all the required symbols.
+ // TODO(benvanik): strip in min-size release builds.
+ iree_hal_vulkan_debug_reporter_t* debug_reporter = NULL;
+ if (instance_extensions.debug_utils) {
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_debug_reporter_allocate(
+ instance, instance_syms, /*allocation_callbacks=*/NULL, host_allocator,
+ &debug_reporter));
+ }
+
+ iree_hal_vulkan_driver_t* driver = NULL;
+ iree_host_size_t total_size = sizeof(*driver) + identifier.size;
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&driver);
+ if (!iree_status_is_ok(status)) {
+ // Need to clean up if we fail (as we own these).
+ iree_hal_vulkan_debug_reporter_free(debug_reporter);
+ return status;
+ }
+ iree_hal_resource_initialize(&iree_hal_vulkan_driver_vtable,
+ &driver->resource);
+ driver->host_allocator = host_allocator;
+ iree_string_view_append_to_buffer(
+ identifier, &driver->identifier,
+ (char*)driver + total_size - identifier.size);
+ memcpy(&driver->device_options, &options->device_options,
+ sizeof(driver->device_options));
+ driver->default_device_index = options->default_device_index;
+ driver->enabled_features = options->requested_features;
+ driver->syms = iree::add_ref(instance_syms);
+ driver->instance = instance;
+ driver->owns_instance = owns_instance;
+ driver->debug_reporter = debug_reporter;
+ *out_driver = (iree_hal_driver_t*)driver;
+ return status;
+}
+
+static void iree_hal_vulkan_driver_destroy(iree_hal_driver_t* base_driver) {
+ iree_hal_vulkan_driver_t* driver = iree_hal_vulkan_driver_cast(base_driver);
+ iree_allocator_t host_allocator = driver->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_vulkan_debug_reporter_free(driver->debug_reporter);
+ if (driver->owns_instance) {
+ driver->syms->vkDestroyInstance(driver->instance, /*pAllocator=*/NULL);
+ }
+ driver->syms.reset();
+ iree_allocator_free(host_allocator, driver);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_vulkan_driver_query_extensibility_set(
+ iree_hal_vulkan_features_t requested_features,
+ iree_hal_vulkan_extensibility_set_t set, iree::Arena* arena,
+ iree_hal_vulkan_string_list_t* out_string_list) {
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_query_extensibility_set(
+ requested_features, set, 0, NULL, &out_string_list->count));
+ out_string_list->values = (const char**)arena->AllocateBytes(
+ out_string_list->count * sizeof(out_string_list->values[0]));
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_query_extensibility_set(
+ requested_features, set, out_string_list->count, out_string_list->values,
+ &out_string_list->count));
+ return iree_ok_status();
+}
+
+static iree_status_t iree_hal_vulkan_driver_compute_enabled_extensibility_sets(
+ iree::hal::vulkan::DynamicSymbols* syms,
+ iree_hal_vulkan_features_t requested_features, iree::Arena* arena,
+ iree_hal_vulkan_string_list_t* out_enabled_layers,
+ iree_hal_vulkan_string_list_t* out_enabled_extensions) {
+ // Query our required and optional layers and extensions based on the IREE
+ // features the user requested.
+ iree_hal_vulkan_string_list_t required_layers;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_driver_query_extensibility_set(
+ requested_features,
+ IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_REQUIRED, arena,
+ &required_layers));
+ iree_hal_vulkan_string_list_t optional_layers;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_driver_query_extensibility_set(
+ requested_features,
+ IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_OPTIONAL, arena,
+ &optional_layers));
+ iree_hal_vulkan_string_list_t required_extensions;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_driver_query_extensibility_set(
+ requested_features,
+ IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_REQUIRED, arena,
+ &required_extensions));
+ iree_hal_vulkan_string_list_t optional_extensions;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_driver_query_extensibility_set(
+ requested_features,
+ IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_OPTIONAL, arena,
+ &optional_extensions));
+
+ // Find the layers and extensions we need (or want) that are also available
+ // on the instance. This will fail when required ones are not present.
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_match_available_instance_layers(
+ syms, &required_layers, &optional_layers, arena, out_enabled_layers));
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_match_available_instance_extensions(
+ syms, &required_extensions, &optional_extensions, arena,
+ out_enabled_extensions));
+
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_vulkan_driver_create(
+ iree_string_view_t identifier,
+ const iree_hal_vulkan_driver_options_t* options,
+ iree_hal_vulkan_syms_t* opaque_syms, iree_allocator_t host_allocator,
+ iree_hal_driver_t** out_driver) {
+ IREE_ASSERT_ARGUMENT(options);
+ IREE_ASSERT_ARGUMENT(opaque_syms);
+ IREE_ASSERT_ARGUMENT(out_driver);
+ IREE_TRACE_SCOPE();
+
+ auto* instance_syms = (DynamicSymbols*)opaque_syms;
+
+ // Query required and optional instance layers/extensions for the requested
+ // features.
+ iree::Arena arena;
+ iree_hal_vulkan_string_list_t enabled_layers;
+ iree_hal_vulkan_string_list_t enabled_extensions;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_vulkan_driver_compute_enabled_extensibility_sets(
+ instance_syms, options->requested_features, &arena, &enabled_layers,
+ &enabled_extensions));
+
+ // Create the instance this driver will use for all requests.
+ VkApplicationInfo app_info;
+ iree_hal_vulkan_driver_populate_default_app_info(options, &app_info);
+ VkInstanceCreateInfo create_info;
+ create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
+ create_info.pNext = NULL;
+ create_info.flags = 0;
+ create_info.pApplicationInfo = &app_info;
+ create_info.enabledLayerCount = enabled_layers.count;
+ create_info.ppEnabledLayerNames = enabled_layers.values;
+ create_info.enabledExtensionCount = enabled_extensions.count;
+ create_info.ppEnabledExtensionNames = enabled_extensions.values;
+
+ VkInstance instance = VK_NULL_HANDLE;
+ VK_RETURN_IF_ERROR(instance_syms->vkCreateInstance(
+ &create_info, /*pAllocator=*/NULL, &instance),
+ "vkCreateInstance: invalid instance configuration");
+
+ // Now that the instance has been created we can fetch all of the instance
+ // symbols.
+ iree_status_t status = instance_syms->LoadFromInstance(instance);
+
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_vulkan_driver_create_internal(
+ identifier, options, &enabled_extensions, opaque_syms, instance,
+ /*owns_instance=*/true, host_allocator, out_driver);
+ }
+
+ if (!iree_status_is_ok(status)) {
+ instance_syms->vkDestroyInstance(instance, /*pAllocator=*/NULL);
+ }
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_vulkan_driver_create_using_instance(
+ iree_string_view_t identifier,
+ const iree_hal_vulkan_driver_options_t* options,
+ iree_hal_vulkan_syms_t* opaque_syms, VkInstance instance,
+ iree_allocator_t host_allocator, iree_hal_driver_t** out_driver) {
+ IREE_ASSERT_ARGUMENT(options);
+ IREE_ASSERT_ARGUMENT(opaque_syms);
+ IREE_ASSERT_ARGUMENT(out_driver);
+ if (instance == VK_NULL_HANDLE) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "a non-NULL VkInstance must be provided");
+ }
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // May be a no-op but don't rely on that so we can be sure we have the right
+ // function pointers.
+ auto* instance_syms = (DynamicSymbols*)opaque_syms;
+ IREE_RETURN_IF_ERROR(instance_syms->LoadFromInstance(instance));
+
+ // Since the instance is already created we can't actually enable any
+ // extensions or even query if they are really enabled - we just have to trust
+ // that the caller already enabled them for us (or we may fail later).
+ iree::Arena arena;
+ iree_hal_vulkan_string_list_t enabled_layers;
+ iree_hal_vulkan_string_list_t enabled_extensions;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_vulkan_driver_compute_enabled_extensibility_sets(
+ instance_syms, options->requested_features, &arena, &enabled_layers,
+ &enabled_extensions));
+
+ iree_status_t status = iree_hal_vulkan_driver_create_internal(
+ identifier, options, &enabled_extensions, opaque_syms, instance,
+ /*owns_instance=*/true, host_allocator, out_driver);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// Enumerates all physical devices on |instance| and returns them as an
+// allocated list in |out_physical_devices|, which must be freed by the caller.
+static iree_status_t iree_hal_vulkan_driver_enumerate_physical_devices(
+ iree::hal::vulkan::DynamicSymbols* instance_syms, VkInstance instance,
+ iree_allocator_t host_allocator, uint32_t* out_physical_device_count,
+ VkPhysicalDevice** out_physical_devices) {
+ uint32_t physical_device_count = 0;
+ VK_RETURN_IF_ERROR(instance_syms->vkEnumeratePhysicalDevices(
+ instance, &physical_device_count, NULL),
+ "vkEnumeratePhysicalDevices");
+ VkPhysicalDevice* physical_devices = NULL;
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc(
+ host_allocator, physical_device_count * sizeof(physical_devices),
+ (void**)&physical_devices));
+ iree_status_t status = VK_RESULT_TO_STATUS(
+ instance_syms->vkEnumeratePhysicalDevices(
+ instance, &physical_device_count, physical_devices),
+ "vkEnumeratePhysicalDevices");
+ if (iree_status_is_ok(status)) {
+ *out_physical_device_count = physical_device_count;
+ *out_physical_devices = physical_devices;
+ } else {
+ iree_allocator_free(host_allocator, physical_devices);
+ }
+ return status;
+}
+
+// Returns the size, in bytes, of the iree_hal_device_info_t storage required
+// for holding the given |physical_device|.
+static iree_host_size_t iree_hal_vulkan_calculate_device_info_size(
+ VkPhysicalDevice physical_device, iree::hal::vulkan::DynamicSymbols* syms) {
+ VkPhysicalDeviceProperties physical_device_properties;
+ syms->vkGetPhysicalDeviceProperties(physical_device,
+ &physical_device_properties);
+ return strlen(physical_device_properties.deviceName);
+}
+
+// Populates device information from the given Vulkan physical device handle.
+// |out_device_info| must point to valid memory and additional data will be
+// appended to |buffer_ptr| and the new pointer is returned.
+static uint8_t* iree_hal_vulkan_populate_device_info(
+ VkPhysicalDevice physical_device, DynamicSymbols* syms, uint8_t* buffer_ptr,
+ iree_hal_device_info_t* out_device_info) {
+ memset(out_device_info, 0, sizeof(*out_device_info));
+ out_device_info->device_id = (iree_hal_device_id_t)physical_device;
+
+ VkPhysicalDeviceFeatures physical_device_features;
+ syms->vkGetPhysicalDeviceFeatures(physical_device, &physical_device_features);
+ // TODO(benvanik): check and optionally require these features:
+ // - physical_device_features.robustBufferAccess
+ // - physical_device_features.shaderInt16
+ // - physical_device_features.shaderInt64
+ // - physical_device_features.shaderFloat64
+
+ VkPhysicalDeviceProperties physical_device_properties;
+ syms->vkGetPhysicalDeviceProperties(physical_device,
+ &physical_device_properties);
+ // TODO(benvanik): check and optionally require reasonable limits.
+
+ // TODO(benvanik): more clever/sanitized device naming.
+ iree_string_view_t device_name =
+ iree_make_string_view(physical_device_properties.deviceName,
+ strlen(physical_device_properties.deviceName));
+ buffer_ptr += iree_string_view_append_to_buffer(
+ device_name, &out_device_info->name, (char*)buffer_ptr);
+
+ return buffer_ptr;
+}
+
+static iree_status_t iree_hal_vulkan_driver_query_available_devices(
+ iree_hal_driver_t* base_driver, iree_allocator_t host_allocator,
+ iree_hal_device_info_t** out_device_infos,
+ iree_host_size_t* out_device_info_count) {
+ iree_hal_vulkan_driver_t* driver = iree_hal_vulkan_driver_cast(base_driver);
+
+ // Query all devices from the Vulkan instance.
+ uint32_t physical_device_count = 0;
+ VkPhysicalDevice* physical_devices = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_driver_enumerate_physical_devices(
+ driver->syms.get(), driver->instance, host_allocator,
+ &physical_device_count, &physical_devices));
+
+ // Allocate the return infos and populate with the devices.
+ iree_hal_device_info_t* device_infos = NULL;
+ iree_host_size_t total_size =
+ physical_device_count * sizeof(iree_hal_device_info_t);
+ for (uint32_t i = 0; i < physical_device_count; ++i) {
+ total_size += iree_hal_vulkan_calculate_device_info_size(
+ physical_devices[i], driver->syms.get());
+ }
+ iree_status_t status =
+ iree_allocator_malloc(host_allocator, total_size, (void**)&device_infos);
+ if (iree_status_is_ok(status)) {
+ uint8_t* buffer_ptr =
+ (uint8_t*)device_infos +
+ physical_device_count * sizeof(iree_hal_device_info_t);
+ for (uint32_t i = 0; i < physical_device_count; ++i) {
+ buffer_ptr = iree_hal_vulkan_populate_device_info(
+ physical_devices[i], driver->syms.get(), buffer_ptr,
+ &device_infos[i]);
+ }
+ *out_device_info_count = physical_device_count;
+ *out_device_infos = device_infos;
+ }
+
+ iree_allocator_free(host_allocator, physical_devices);
+ return status;
+}
+
+static iree_status_t iree_hal_vulkan_driver_select_default_device(
+ iree::hal::vulkan::DynamicSymbols* instance_syms, VkInstance instance,
+ int default_device_index, iree_allocator_t host_allocator,
+ VkPhysicalDevice* out_physical_device) {
+ uint32_t physical_device_count = 0;
+ VkPhysicalDevice* physical_devices = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_driver_enumerate_physical_devices(
+ instance_syms, instance, host_allocator, &physical_device_count,
+ &physical_devices));
+ iree_status_t status = iree_ok_status();
+ if (physical_device_count == 0 ||
+ default_device_index >= physical_device_count) {
+ status = iree_make_status(IREE_STATUS_NOT_FOUND,
+ "default device %d not found (of %d enumerated)",
+ default_device_index, physical_device_count);
+ } else {
+ *out_physical_device = physical_devices[default_device_index];
+ }
+ iree_allocator_free(host_allocator, physical_devices);
+ return status;
+}
+
+static iree_status_t iree_hal_vulkan_driver_create_device(
+ iree_hal_driver_t* base_driver, iree_hal_device_id_t device_id,
+ iree_allocator_t host_allocator, iree_hal_device_t** out_device) {
+ iree_hal_vulkan_driver_t* driver = iree_hal_vulkan_driver_cast(base_driver);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Use either the specified device (enumerated earlier) or whatever default
+ // one was specified when the driver was created.
+ VkPhysicalDevice physical_device = (VkPhysicalDevice)device_id;
+ if (physical_device == VK_NULL_HANDLE) {
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_hal_vulkan_driver_select_default_device(
+ driver->syms.get(), driver->instance, driver->default_device_index,
+ host_allocator, &physical_device));
+ }
+
+ // TODO(benvanik): remove HAL module dependence on the identifier for matching
+ // devices. Today it *must* be vulkan* to work, whereas really that should be
+ // a device type (vs the identifier, which is arbitrary).
+ // Query the device name to use as an identifier.
+ // VkPhysicalDeviceProperties physical_device_properties;
+ // driver->syms->vkGetPhysicalDeviceProperties(physical_device,
+ // &physical_device_properties);
+ // iree_string_view_t device_name =
+ // iree_make_string_view(physical_device_properties.deviceName,
+ // strlen(physical_device_properties.deviceName));
+ iree_string_view_t device_name = iree_make_cstring_view("vulkan");
+
+ // Attempt to create the device.
+ // This may fail if the device was enumerated but is in exclusive use,
+ // disabled by the system, or permission is denied.
+ iree_status_t status = iree_hal_vulkan_device_create(
+ base_driver, device_name, driver->enabled_features,
+ &driver->device_options, (iree_hal_vulkan_syms_t*)driver->syms.get(),
+ driver->instance, physical_device, host_allocator, out_device);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+namespace {
+const iree_hal_driver_vtable_t iree_hal_vulkan_driver_vtable = {
+ /*.destroy=*/iree_hal_vulkan_driver_destroy,
+ /*.query_available_devices=*/
+ iree_hal_vulkan_driver_query_available_devices,
+ /*.create_device=*/iree_hal_vulkan_driver_create_device,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/vulkan/vulkan_driver.h b/runtime/src/iree/hal/vulkan/vulkan_driver.h
new file mode 100644
index 0000000..c41a8d4
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/vulkan_driver.h
@@ -0,0 +1,17 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_VULKAN_DRIVER_H_
+#define IREE_HAL_VULKAN_VULKAN_DRIVER_H_
+
+#include "iree/hal/api.h"
+#include "iree/hal/vulkan/api.h"
+
+// NOTE: the driver API calls are defined in api.h.
+// TODO(benvanik): clean that up? api.h is nice because then we only need to
+// deploy a single header file for the backend, but it is a bit tricky.
+
+#endif // IREE_HAL_VULKAN_VULKAN_DRIVER_H_
diff --git a/runtime/src/iree/hal/vulkan/vulkan_headers.h b/runtime/src/iree/hal/vulkan/vulkan_headers.h
new file mode 100644
index 0000000..6e88b09
--- /dev/null
+++ b/runtime/src/iree/hal/vulkan/vulkan_headers.h
@@ -0,0 +1,42 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_VULKAN_VULKAN_HEADERS_H_
+#define IREE_HAL_VULKAN_VULKAN_HEADERS_H_
+
+// We exclusively use Vulkan via queried function pointers. To ensure that there
+// are no accidental calls to the linker-loaded implicit functions we just
+// compile them all out.
+//
+// Code under iree/hal/vulkan/ *MUST NOT* directly include vulkan.h or any
+// header that includes it without this first being set. This means that this
+// iree/hal/vulkan/vulkan_headers.h file must usually be included first in all
+// files using it.
+//
+// From there, use iree/hal/vulkan/dynamic_symbols.h to plumb the dynamically
+// resolved symbols to any code that may need to make Vulkan calls. See that
+// header for more information: in general we try to keep our required set of
+// symbols minimal to avoid binary size/runtime memory/linker time so symbols
+// are only added as needed.
+//
+// Other non-core code can choose not to disable the prototypes if they want.
+// I don't suggest it though for anything beyond samples.
+//
+// There's a bunch of reasons to dynamically link against Vulkan like supporting
+// platforms without Vulkan or with differing Vulkan versions where all symbols
+// may not be available.
+//
+// See this article for more information:
+// https://djang86.blogspot.com/2019/01/what-is-vknoprototypes.html
+#define VK_NO_PROTOTYPES 1
+
+#include <vulkan/vulkan.h> // IWYU pragma: export
+
+#ifdef IREE_PLATFORM_APPLE
+#include <vulkan/vulkan_beta.h> // IWYU pragma: export
+#endif
+
+#endif // IREE_HAL_VULKAN_VULKAN_HEADERS_H_
diff --git a/runtime/src/iree/modules/BUILD b/runtime/src/iree/modules/BUILD
new file mode 100644
index 0000000..236a474
--- /dev/null
+++ b/runtime/src/iree/modules/BUILD
@@ -0,0 +1,11 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
diff --git a/runtime/src/iree/modules/CMakeLists.txt b/runtime/src/iree/modules/CMakeLists.txt
new file mode 100644
index 0000000..a913b35
--- /dev/null
+++ b/runtime/src/iree/modules/CMakeLists.txt
@@ -0,0 +1,13 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/modules/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/modules/check/BUILD b/runtime/src/iree/modules/check/BUILD
new file mode 100644
index 0000000..d80f0de
--- /dev/null
+++ b/runtime/src/iree/modules/check/BUILD
@@ -0,0 +1,50 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library", "iree_runtime_cc_test")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_runtime_cc_library(
+ name = "check",
+ testonly = True,
+ srcs = ["module.cc"],
+ hdrs = ["module.h"],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:cc",
+ "//runtime/src/iree/base/internal",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/modules/hal",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/vm",
+ "//runtime/src/iree/vm:cc",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "check_test",
+ srcs = ["check_test.cc"],
+ deps = [
+ ":check",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:cc",
+ "//runtime/src/iree/base/internal",
+ "//runtime/src/iree/base/internal:span",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/vmvx/registration",
+ "//runtime/src/iree/modules/hal",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ "//runtime/src/iree/vm",
+ "//runtime/src/iree/vm:bytecode_module",
+ "//runtime/src/iree/vm:cc",
+ ],
+)
diff --git a/runtime/src/iree/modules/check/CMakeLists.txt b/runtime/src/iree/modules/check/CMakeLists.txt
new file mode 100644
index 0000000..10ba2ae
--- /dev/null
+++ b/runtime/src/iree/modules/check/CMakeLists.txt
@@ -0,0 +1,51 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ check
+ HDRS
+ "module.h"
+ SRCS
+ "module.cc"
+ DEPS
+ iree::base
+ iree::base::cc
+ iree::base::internal
+ iree::hal
+ iree::modules::hal
+ iree::testing::gtest
+ iree::vm
+ iree::vm::cc
+ TESTONLY
+ PUBLIC
+)
+
+# Doesn't use bazel_to_cmake because IREE_HAL_DRIVER_VMVX filtering is custom logic
+if(${IREE_HAL_DRIVER_VMVX})
+ iree_cc_test(
+ NAME
+ check_test
+ SRCS
+ "check_test.cc"
+ DEPS
+ ::check
+ iree::base
+ iree::base::cc
+ iree::base::internal
+ iree::base::internal::span
+ iree::hal
+ iree::hal::vmvx::registration
+ iree::modules::hal
+ iree::testing::gtest
+ iree::testing::gtest_main
+ iree::vm
+ iree::vm::bytecode_module
+ iree::vm::cc
+ )
+endif()
diff --git a/runtime/src/iree/modules/check/check_test.cc b/runtime/src/iree/modules/check/check_test.cc
new file mode 100644
index 0000000..b3701bf
--- /dev/null
+++ b/runtime/src/iree/modules/check/check_test.cc
@@ -0,0 +1,581 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Tests that our bytecode module can call through into our native module.
+
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/math.h"
+#include "iree/base/internal/span.h"
+#include "iree/base/status_cc.h"
+#include "iree/hal/api.h"
+#include "iree/hal/vmvx/registration/driver_module.h"
+#include "iree/modules/check/module.h"
+#include "iree/modules/hal/module.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+#include "iree/vm/api.h"
+#include "iree/vm/ref_cc.h"
+
+namespace iree {
+namespace {
+
+class CheckTest : public ::testing::Test {
+ protected:
+ static void SetUpTestSuite() {
+ IREE_CHECK_OK(iree_hal_vmvx_driver_module_register(
+ iree_hal_driver_registry_default()));
+ // TODO(benvanik): move to instance-based registration.
+ IREE_ASSERT_OK(iree_hal_module_register_types());
+
+ iree_hal_driver_t* hal_driver = nullptr;
+ IREE_ASSERT_OK(iree_hal_driver_registry_try_create_by_name(
+ iree_hal_driver_registry_default(), iree_make_cstring_view("vmvx"),
+ iree_allocator_system(), &hal_driver));
+ IREE_ASSERT_OK(iree_hal_driver_create_default_device(
+ hal_driver, iree_allocator_system(), &device_));
+ IREE_ASSERT_OK(
+ iree_hal_module_create(device_, iree_allocator_system(), &hal_module_));
+ iree_hal_driver_release(hal_driver);
+
+ IREE_ASSERT_OK(
+ iree_vm_instance_create(iree_allocator_system(), &instance_));
+
+ IREE_ASSERT_OK(
+ iree_check_module_create(iree_allocator_system(), &check_module_))
+ << "Native module failed to init";
+ }
+
+ static void TearDownTestSuite() {
+ iree_hal_device_release(device_);
+ iree_vm_module_release(check_module_);
+ iree_vm_module_release(hal_module_);
+ iree_vm_instance_release(instance_);
+ }
+
+ void SetUp() override {
+ std::vector<iree_vm_module_t*> modules = {hal_module_, check_module_};
+ IREE_ASSERT_OK(iree_vm_context_create_with_modules(
+ instance_, IREE_VM_CONTEXT_FLAG_NONE, modules.data(), modules.size(),
+ iree_allocator_system(), &context_));
+ allocator_ = iree_hal_device_allocator(device_);
+ }
+
+ void TearDown() override {
+ inputs_.reset();
+ iree_vm_context_release(context_);
+ }
+
+ void CreateInt32BufferView(iree::span<const int32_t> contents,
+ iree::span<const int32_t> shape,
+ iree_hal_buffer_view_t** out_buffer_view) {
+ size_t num_elements = 1;
+ for (int32_t dim : shape) {
+ num_elements *= dim;
+ }
+ ASSERT_EQ(contents.size(), num_elements);
+ iree_hal_buffer_params_t params = {0};
+ params.type =
+ IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
+ params.usage = IREE_HAL_BUFFER_USAGE_DISPATCH |
+ IREE_HAL_BUFFER_USAGE_TRANSFER |
+ IREE_HAL_BUFFER_USAGE_MAPPING;
+ IREE_ASSERT_OK(iree_hal_buffer_view_allocate_buffer(
+ allocator_, shape.data(), shape.size(), IREE_HAL_ELEMENT_TYPE_INT_32,
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, params,
+ iree_make_const_byte_span(contents.data(),
+ contents.size() * sizeof(int32_t)),
+ &*out_buffer_view));
+ }
+
+ void CreateFloat16BufferView(iree::span<const uint16_t> contents,
+ iree::span<const int32_t> shape,
+ iree_hal_buffer_view_t** out_buffer_view) {
+ size_t num_elements = 1;
+ for (int32_t dim : shape) {
+ num_elements *= dim;
+ }
+ ASSERT_EQ(contents.size(), num_elements);
+ iree_hal_buffer_params_t params = {0};
+ params.type =
+ IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
+ params.usage = IREE_HAL_BUFFER_USAGE_DISPATCH |
+ IREE_HAL_BUFFER_USAGE_TRANSFER |
+ IREE_HAL_BUFFER_USAGE_MAPPING;
+ IREE_ASSERT_OK(iree_hal_buffer_view_allocate_buffer(
+ allocator_, shape.data(), shape.size(), IREE_HAL_ELEMENT_TYPE_FLOAT_16,
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, params,
+ iree_make_const_byte_span(contents.data(),
+ contents.size() * sizeof(uint16_t)),
+ &*out_buffer_view));
+ }
+
+ void CreateFloat32BufferView(iree::span<const float> contents,
+ iree::span<const int32_t> shape,
+ iree_hal_buffer_view_t** out_buffer_view) {
+ size_t num_elements = 1;
+ for (int32_t dim : shape) {
+ num_elements *= dim;
+ }
+ ASSERT_EQ(contents.size(), num_elements);
+ iree_hal_buffer_params_t params = {0};
+ params.type =
+ IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
+ params.usage = IREE_HAL_BUFFER_USAGE_DISPATCH |
+ IREE_HAL_BUFFER_USAGE_TRANSFER |
+ IREE_HAL_BUFFER_USAGE_MAPPING;
+ IREE_ASSERT_OK(iree_hal_buffer_view_allocate_buffer(
+ allocator_, shape.data(), shape.size(), IREE_HAL_ELEMENT_TYPE_FLOAT_32,
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, params,
+ iree_make_const_byte_span(contents.data(),
+ contents.size() * sizeof(float)),
+ &*out_buffer_view));
+ }
+
+ void CreateFloat64BufferView(iree::span<const double> contents,
+ iree::span<const int32_t> shape,
+ iree_hal_buffer_view_t** out_buffer_view) {
+ size_t num_elements = 1;
+ for (int32_t dim : shape) {
+ num_elements *= dim;
+ }
+ ASSERT_EQ(contents.size(), num_elements);
+ iree_hal_buffer_params_t params = {0};
+ params.type =
+ IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
+ params.usage = IREE_HAL_BUFFER_USAGE_DISPATCH |
+ IREE_HAL_BUFFER_USAGE_TRANSFER |
+ IREE_HAL_BUFFER_USAGE_MAPPING;
+ IREE_ASSERT_OK(iree_hal_buffer_view_allocate_buffer(
+ allocator_, shape.data(), shape.size(), IREE_HAL_ELEMENT_TYPE_FLOAT_64,
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, params,
+ iree_make_const_byte_span(contents.data(),
+ contents.size() * sizeof(double)),
+ &*out_buffer_view));
+ }
+
+ iree_status_t Invoke(const char* function_name) {
+ iree_vm_function_t function;
+ IREE_RETURN_IF_ERROR(iree_vm_module_lookup_function_by_name(
+ check_module_, IREE_VM_FUNCTION_LINKAGE_EXPORT,
+ iree_make_cstring_view(function_name), &function),
+ "exported function '%s' not found", function_name);
+ // TODO(#2075): don't directly invoke native functions like this.
+ return iree_vm_invoke(context_, function, IREE_VM_INVOCATION_FLAG_NONE,
+ /*policy=*/nullptr, inputs_.get(),
+ /*outputs=*/nullptr, iree_allocator_system());
+ }
+
+ iree_status_t Invoke(const char* function_name,
+ std::vector<iree_vm_value_t> args) {
+ IREE_RETURN_IF_ERROR(
+ iree_vm_list_create(/*element_type=*/nullptr, args.size(),
+ iree_allocator_system(), &inputs_));
+ for (auto& arg : args) {
+ IREE_RETURN_IF_ERROR(iree_vm_list_push_value(inputs_.get(), &arg));
+ }
+ return Invoke(function_name);
+ }
+
+ iree_status_t Invoke(const char* function_name,
+ std::vector<vm::ref<iree_hal_buffer_view_t>> args) {
+ IREE_RETURN_IF_ERROR(
+ iree_vm_list_create(/*element_type=*/nullptr, args.size(),
+ iree_allocator_system(), &inputs_));
+ for (auto& arg : args) {
+ iree_vm_ref_t arg_ref = iree_hal_buffer_view_move_ref(arg.get());
+ IREE_RETURN_IF_ERROR(iree_vm_list_push_ref_move(inputs_.get(), &arg_ref));
+ }
+ return Invoke(function_name);
+ }
+
+ private:
+ static iree_hal_device_t* device_;
+ static iree_vm_instance_t* instance_;
+ static iree_vm_module_t* check_module_;
+ static iree_vm_module_t* hal_module_;
+
+ iree_vm_context_t* context_ = nullptr;
+ vm::ref<iree_vm_list_t> inputs_;
+ iree_hal_allocator_t* allocator_ = nullptr;
+};
+iree_hal_device_t* CheckTest::device_ = nullptr;
+iree_vm_instance_t* CheckTest::instance_ = nullptr;
+iree_vm_module_t* CheckTest::check_module_ = nullptr;
+iree_vm_module_t* CheckTest::hal_module_ = nullptr;
+
+TEST_F(CheckTest, ExpectTrueSuccess) {
+ IREE_ASSERT_OK(Invoke("expect_true", {iree_vm_value_make_i32(1)}));
+}
+
+TEST_F(CheckTest, ExpectTrueFailure) {
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_true", {iree_vm_value_make_i32(0)})),
+ "Expected 0 to be nonzero");
+}
+
+TEST_F(CheckTest, ExpectFalseSuccess) {
+ IREE_ASSERT_OK(Invoke("expect_false", {iree_vm_value_make_i32(0)}));
+}
+
+TEST_F(CheckTest, ExpectFalseFailure) {
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_false", {iree_vm_value_make_i32(1)})),
+ "Expected 1 to be zero");
+}
+
+TEST_F(CheckTest, ExpectFalseNotOneFailure) {
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_false", {iree_vm_value_make_i32(42)})),
+ "Expected 42 to be zero");
+}
+
+TEST_F(CheckTest, ExpectAllTrueSuccess) {
+ vm::ref<iree_hal_buffer_view_t> input_buffer_view;
+ int32_t contents[] = {1};
+ int32_t shape[] = {1};
+ ASSERT_NO_FATAL_FAILURE(
+ CreateInt32BufferView(contents, shape, &input_buffer_view));
+ IREE_ASSERT_OK(Invoke("expect_all_true", {input_buffer_view}));
+}
+
+TEST_F(CheckTest, ExpectAllTrue3DTrueSuccess) {
+ vm::ref<iree_hal_buffer_view_t> input_buffer_view;
+ int32_t contents[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ int32_t shape[] = {2, 2, 2};
+ ASSERT_NO_FATAL_FAILURE(
+ CreateInt32BufferView(contents, shape, &input_buffer_view));
+ IREE_ASSERT_OK(Invoke("expect_all_true", {input_buffer_view}));
+}
+
+TEST_F(CheckTest, ExpectAllTrueFailure) {
+ vm::ref<iree_hal_buffer_view_t> input_buffer_view;
+ int32_t contents[] = {0};
+ int32_t shape[] = {1};
+ ASSERT_NO_FATAL_FAILURE(
+ CreateInt32BufferView(contents, shape, &input_buffer_view));
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_all_true", {input_buffer_view})), "0");
+}
+
+TEST_F(CheckTest, ExpectAllTrueSingleElementFailure) {
+ vm::ref<iree_hal_buffer_view_t> input_buffer_view;
+ int32_t contents[] = {1, 2, 3, 0, 4};
+ int32_t shape[] = {5};
+ ASSERT_NO_FATAL_FAILURE(
+ CreateInt32BufferView(contents, shape, &input_buffer_view));
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_all_true", {input_buffer_view})),
+ "1, 2, 3, 0, 4");
+}
+
+TEST_F(CheckTest, ExpectAllTrue3DSingleElementFailure) {
+ vm::ref<iree_hal_buffer_view_t> input_buffer_view;
+ int32_t contents[] = {1, 2, 3, 4, 5, 6, 0, 8};
+ int32_t shape[] = {2, 2, 2};
+ ASSERT_NO_FATAL_FAILURE(
+ CreateInt32BufferView(contents, shape, &input_buffer_view));
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_all_true", {input_buffer_view})),
+ "1, 2, 3, 4, 5, 6, 0, 8");
+}
+
+TEST_F(CheckTest, ExpectEqSameBufferSuccess) {
+ vm::ref<iree_hal_buffer_view_t> input_buffer_view;
+ int32_t contents[] = {1};
+ int32_t shape[] = {1};
+ ASSERT_NO_FATAL_FAILURE(
+ CreateInt32BufferView(contents, shape, &input_buffer_view));
+ IREE_ASSERT_OK(Invoke("expect_eq", {input_buffer_view, input_buffer_view}));
+}
+
+TEST_F(CheckTest, ExpectEqIdenticalBufferSuccess) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ int32_t contents[] = {1};
+ int32_t shape[] = {1};
+ ASSERT_NO_FATAL_FAILURE(CreateInt32BufferView(contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateInt32BufferView(contents, shape, &rhs));
+ IREE_ASSERT_OK(Invoke("expect_eq", {lhs, rhs}));
+}
+
+TEST_F(CheckTest, ExpectEqIdentical3DBufferSuccess) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ int32_t contents[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ int32_t shape[] = {2, 2, 2};
+ ASSERT_NO_FATAL_FAILURE(CreateInt32BufferView(contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateInt32BufferView(contents, shape, &rhs));
+ IREE_ASSERT_OK(Invoke("expect_eq", {lhs, rhs}));
+}
+
+TEST_F(CheckTest, ExpectEqDifferentShapeFailure) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ int32_t contents[] = {1, 2, 3, 4};
+ int32_t lhs_shape[] = {2, 2};
+ int32_t rhs_shape[] = {4};
+ ASSERT_NO_FATAL_FAILURE(CreateInt32BufferView(contents, lhs_shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateInt32BufferView(contents, rhs_shape, &rhs));
+ EXPECT_NONFATAL_FAILURE(IREE_ASSERT_OK(Invoke("expect_eq", {lhs, rhs})),
+ "Shapes do not match");
+}
+
+TEST_F(CheckTest, ExpectEqDifferentElementTypeFailure) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ int32_t lhs_contents[] = {1, 2, 3, 4};
+ float rhs_contents[] = {1, 2, 3, 4};
+ int32_t shape[] = {2, 2};
+ ASSERT_NO_FATAL_FAILURE(CreateInt32BufferView(lhs_contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(rhs_contents, shape, &rhs));
+ EXPECT_NONFATAL_FAILURE(IREE_ASSERT_OK(Invoke("expect_eq", {lhs, rhs})),
+ "Element types do not match");
+}
+
+TEST_F(CheckTest, ExpectEqDifferentContentsFailure) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ int32_t lhs_contents[] = {1};
+ int32_t rhs_contents[] = {2};
+ int32_t shape[] = {1};
+ ASSERT_NO_FATAL_FAILURE(CreateInt32BufferView(lhs_contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateInt32BufferView(rhs_contents, shape, &rhs));
+ EXPECT_NONFATAL_FAILURE(IREE_ASSERT_OK(Invoke("expect_eq", {lhs, rhs})),
+ "Contents does not match");
+}
+
+TEST_F(CheckTest, ExpectEqDifferentEverythingFullMessageFailure) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ int32_t lhs_contents[] = {1, 2, 3, 4, 5, 6};
+ float rhs_contents[] = {1, 2, 3, 42};
+ int32_t lhs_shape[] = {2, 3};
+ int32_t rhs_shape[] = {2, 2};
+ ASSERT_NO_FATAL_FAILURE(CreateInt32BufferView(lhs_contents, lhs_shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(
+ CreateFloat32BufferView(rhs_contents, rhs_shape, &rhs));
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_eq", {lhs, rhs})),
+ "Expected equality of these values. Element types do not match."
+ " Shapes do not match. Contents does not match.\n"
+ " lhs:\n"
+ " 2x3xi32=[1 2 3][4 5 6]\n"
+ " rhs:\n"
+ " 2x2xf32=[1 2][3 42]");
+}
+
+TEST_F(CheckTest, ExpectEqDifferentContents3DFullMessageFailure) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ int32_t lhs_contents[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ int32_t rhs_contents[] = {1, 2, 3, 42, 5, 6, 7, 8};
+ int32_t shape[] = {2, 2, 2};
+ ASSERT_NO_FATAL_FAILURE(CreateInt32BufferView(lhs_contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateInt32BufferView(rhs_contents, shape, &rhs));
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_eq", {lhs, rhs})),
+ "Expected equality of these values. Contents does not match.\n"
+ " lhs:\n"
+ " 2x2x2xi32=[[1 2][3 4]][[5 6][7 8]]\n"
+ " rhs:\n"
+ " 2x2x2xi32=[[1 2][3 42]][[5 6][7 8]]");
+}
+
+TEST_F(CheckTest, ExpectAlmostEqSameBufferSuccess) {
+ vm::ref<iree_hal_buffer_view_t> input_buffer_view;
+ float contents[] = {1};
+ int32_t shape[] = {1};
+ ASSERT_NO_FATAL_FAILURE(
+ CreateFloat32BufferView(contents, shape, &input_buffer_view));
+ IREE_ASSERT_OK(
+ Invoke("expect_almost_eq", {input_buffer_view, input_buffer_view}));
+}
+
+TEST_F(CheckTest, ExpectAlmostEqIdenticalBufferSuccess) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ float contents[] = {1};
+ int32_t shape[] = {1};
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(contents, shape, &rhs));
+ IREE_ASSERT_OK(Invoke("expect_almost_eq", {lhs, rhs}));
+}
+
+TEST_F(CheckTest, ExpectAlmostEqNearIdenticalBufferSuccess) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ float lhs_contents[] = {1.0f, 1.99999f, 0.00001f, 4.0f};
+ float rhs_contents[] = {1.00001f, 2.0f, 0.0f, 4.0f};
+ int32_t shape[] = {4};
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(lhs_contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(rhs_contents, shape, &rhs));
+ IREE_ASSERT_OK(Invoke("expect_almost_eq", {lhs, rhs}));
+}
+
+TEST_F(CheckTest, ExpectAlmostEqIdentical3DBufferSuccess) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ float contents[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ int32_t shape[] = {2, 2, 2};
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(contents, shape, &rhs));
+ IREE_ASSERT_OK(Invoke("expect_almost_eq", {lhs, rhs}));
+}
+
+TEST_F(CheckTest, ExpectAlmostEqDifferentShapeFailure) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ float contents[] = {1, 2, 3, 4};
+ int32_t lhs_shape[] = {2, 2};
+ int32_t rhs_shape[] = {4};
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(contents, lhs_shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(contents, rhs_shape, &rhs));
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_almost_eq", {lhs, rhs})),
+ "Shapes do not match");
+}
+
+TEST_F(CheckTest, ExpectAlmostEqSmallerLhsElementCountFailure) {
+ vm::ref<iree_hal_buffer_view_t> smaller;
+ vm::ref<iree_hal_buffer_view_t> bigger;
+ float smaller_contents[] = {1, 2};
+ float bigger_contents[] = {1, 2, 3, 4};
+ int32_t smaller_shape[] = {2};
+ int32_t bigger_shape[] = {4};
+ ASSERT_NO_FATAL_FAILURE(
+ CreateFloat32BufferView(smaller_contents, smaller_shape, &smaller));
+ ASSERT_NO_FATAL_FAILURE(
+ CreateFloat32BufferView(bigger_contents, bigger_shape, &bigger));
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_almost_eq", {smaller, bigger})),
+ "Shapes do not match");
+}
+
+TEST_F(CheckTest, ExpectAlmostEqSmallerRhsElementCountFailure) {
+ vm::ref<iree_hal_buffer_view_t> smaller;
+ vm::ref<iree_hal_buffer_view_t> bigger;
+ float smaller_contents[] = {1, 2};
+ float bigger_contents[] = {1, 2, 3, 4};
+ int32_t smaller_shape[] = {2};
+ int32_t bigger_shape[] = {4};
+ ASSERT_NO_FATAL_FAILURE(
+ CreateFloat32BufferView(smaller_contents, smaller_shape, &smaller));
+ ASSERT_NO_FATAL_FAILURE(
+ CreateFloat32BufferView(bigger_contents, bigger_shape, &bigger));
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_almost_eq", {bigger, smaller})),
+ "Shapes do not match");
+}
+
+TEST_F(CheckTest, ExpectAlmostEqDifferentElementTypeFailure) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ double lhs_contents[] = {1, 2, 3, 4};
+ float rhs_contents[] = {1, 2, 3, 4};
+ int32_t shape[] = {2, 2};
+ ASSERT_NO_FATAL_FAILURE(CreateFloat64BufferView(lhs_contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(rhs_contents, shape, &rhs));
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_almost_eq", {lhs, rhs})),
+ "Element types do not match");
+}
+
+TEST_F(CheckTest, ExpectAlmostEqDifferentContentsFailure) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ float lhs_contents[] = {1};
+ float rhs_contents[] = {2};
+ int32_t shape[] = {1};
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(lhs_contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(rhs_contents, shape, &rhs));
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_almost_eq", {lhs, rhs})),
+ "Contents does not match");
+}
+
+TEST_F(CheckTest, ExpectAlmostEqDifferentEverythingFullMessageFailure) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ double lhs_contents[] = {1, 2, 3, 4, 5, 6};
+ float rhs_contents[] = {1, 2, 3, 42};
+ int32_t lhs_shape[] = {2, 3};
+ int32_t rhs_shape[] = {2, 2};
+ ASSERT_NO_FATAL_FAILURE(
+ CreateFloat64BufferView(lhs_contents, lhs_shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(
+ CreateFloat32BufferView(rhs_contents, rhs_shape, &rhs));
+ // Note no comment on contents. Cannot compare different shapes and element
+ // types.
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_almost_eq", {lhs, rhs})),
+ "Expected near equality of these values. Element types do not match."
+ " Shapes do not match.\n"
+ " lhs:\n"
+ " 2x3xf64=[1 2 3][4 5 6]\n"
+ " rhs:\n"
+ " 2x2xf32=[1 2][3 42]");
+}
+
+TEST_F(CheckTest, ExpectAlmostEqDifferentContents3DFullMessageFailure) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ float lhs_contents[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ float rhs_contents[] = {1, 2, 3, 42, 5, 6, 7, 8};
+ int32_t shape[] = {2, 2, 2};
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(lhs_contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateFloat32BufferView(rhs_contents, shape, &rhs));
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_almost_eq", {lhs, rhs})),
+ "Expected near equality of these values. Contents does not match.\n"
+ " lhs:\n"
+ " 2x2x2xf32=[[1 2][3 4]][[5 6][7 8]]\n"
+ " rhs:\n"
+ " 2x2x2xf32=[[1 2][3 42]][[5 6][7 8]]");
+}
+
+TEST_F(CheckTest, ExpectAlmostEqIdenticalBufferF16Success) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ uint16_t contents[] = {iree_math_f32_to_f16(1.f)};
+ int32_t shape[] = {1};
+ ASSERT_NO_FATAL_FAILURE(CreateFloat16BufferView(contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateFloat16BufferView(contents, shape, &rhs));
+ IREE_ASSERT_OK(Invoke("expect_almost_eq", {lhs, rhs}));
+}
+
+TEST_F(CheckTest, ExpectAlmostEqNearIdenticalBufferF16Success) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ uint16_t lhs_contents[] = {
+ iree_math_f32_to_f16(1.0f), iree_math_f32_to_f16(1.99999f),
+ iree_math_f32_to_f16(0.00001f), iree_math_f32_to_f16(4.0f)};
+ uint16_t rhs_contents[] = {
+ iree_math_f32_to_f16(1.00001f), iree_math_f32_to_f16(2.0f),
+ iree_math_f32_to_f16(0.0f), iree_math_f32_to_f16(4.0f)};
+ int32_t shape[] = {4};
+ ASSERT_NO_FATAL_FAILURE(CreateFloat16BufferView(lhs_contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateFloat16BufferView(rhs_contents, shape, &rhs));
+ IREE_ASSERT_OK(Invoke("expect_almost_eq", {lhs, rhs}));
+}
+
+TEST_F(CheckTest, ExpectAlmostEqDifferentContentsF16Failure) {
+ vm::ref<iree_hal_buffer_view_t> lhs;
+ vm::ref<iree_hal_buffer_view_t> rhs;
+ uint16_t lhs_contents[] = {iree_math_f32_to_f16(1.f)};
+ uint16_t rhs_contents[] = {iree_math_f32_to_f16(2.f)};
+ int32_t shape[] = {1};
+ ASSERT_NO_FATAL_FAILURE(CreateFloat16BufferView(lhs_contents, shape, &lhs));
+ ASSERT_NO_FATAL_FAILURE(CreateFloat16BufferView(rhs_contents, shape, &rhs));
+ EXPECT_NONFATAL_FAILURE(
+ IREE_ASSERT_OK(Invoke("expect_almost_eq", {lhs, rhs})),
+ "Contents does not match");
+}
+} // namespace
+} // namespace iree
diff --git a/runtime/src/iree/modules/check/module.cc b/runtime/src/iree/modules/check/module.cc
new file mode 100644
index 0000000..9996e94
--- /dev/null
+++ b/runtime/src/iree/modules/check/module.cc
@@ -0,0 +1,411 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/modules/check/module.h"
+
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/math.h"
+#include "iree/base/status_cc.h"
+#include "iree/hal/api.h"
+#include "iree/modules/hal/module.h"
+#include "iree/testing/gtest.h"
+#include "iree/vm/native_module_cc.h"
+#include "iree/vm/ref_cc.h"
+
+//===----------------------------------------------------------------------===//
+// VM module interface implementation
+//===----------------------------------------------------------------------===//
+
+namespace iree {
+namespace {
+
+using ::testing::Each;
+using ::testing::Not;
+
+template <typename T>
+iree::span<const T> ToSpan(iree_byte_span_t bytes) {
+ return iree::span<const T>(reinterpret_cast<T*>(bytes.data),
+ bytes.data_length / sizeof(T));
+}
+
+StatusOr<std::string> BufferViewToString(iree_hal_buffer_view_t* buffer_view) {
+ std::string result_str(4096, '\0');
+ iree_status_t status;
+ do {
+ iree_host_size_t actual_length = 0;
+ status = iree_hal_buffer_view_format(
+ buffer_view, /*max_element_count=*/1024, result_str.size() + 1,
+ &result_str[0], &actual_length);
+ result_str.resize(actual_length);
+ } while (iree_status_is_out_of_range(status));
+ IREE_RETURN_IF_ERROR(std::move(status));
+ return std::move(result_str);
+}
+
+template <typename T>
+Status ExpectAllTrue(iree_byte_span_t bytes) {
+ EXPECT_THAT(ToSpan<T>(bytes), Each(Not(T(0))));
+ return OkStatus();
+}
+
+bool EqByteSpan(iree_byte_span_t lhs_bytes, iree_byte_span_t rhs_bytes) {
+ return lhs_bytes.data_length == rhs_bytes.data_length &&
+ memcmp(lhs_bytes.data, rhs_bytes.data, lhs_bytes.data_length) == 0;
+}
+
+static constexpr float kF32PrecisionThreshold = 0.0001f;
+
+template <typename T>
+bool AlmostEqByteSpan(iree_byte_span_t lhs_bytes, iree_byte_span_t rhs_bytes) {
+ auto lhs_span = ToSpan<T>(lhs_bytes);
+ auto rhs_span = ToSpan<T>(rhs_bytes);
+ assert(lhs_span.size() == rhs_span.size());
+ for (int i = 0; i < lhs_span.size(); ++i) {
+ if (fabs(lhs_span[i] - rhs_span[i]) > kF32PrecisionThreshold) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static constexpr float kF16PrecisionThreshold = 0.001f;
+
+bool AlmostEqByteSpanF16(iree_byte_span_t lhs_bytes,
+ iree_byte_span_t rhs_bytes) {
+ auto lhs_span = ToSpan<uint16_t>(lhs_bytes);
+ auto rhs_span = ToSpan<uint16_t>(rhs_bytes);
+ assert(lhs_span.size() == rhs_span.size());
+ for (int i = 0; i < lhs_span.size(); ++i) {
+ if (fabs(iree_math_f16_to_f32(lhs_span[i]) -
+ iree_math_f16_to_f32(rhs_span[i])) > kF16PrecisionThreshold) {
+ return false;
+ }
+ }
+ return true;
+}
+
+StatusOr<bool> AlmostEqByteSpan(iree_byte_span_t lhs_bytes,
+ iree_byte_span_t rhs_bytes,
+ iree_hal_element_type_t element_type) {
+ switch (element_type) {
+ case IREE_HAL_ELEMENT_TYPE_FLOAT_32:
+ return AlmostEqByteSpan<float>(lhs_bytes, rhs_bytes);
+ case IREE_HAL_ELEMENT_TYPE_FLOAT_64:
+ return AlmostEqByteSpan<double>(lhs_bytes, rhs_bytes);
+ case IREE_HAL_ELEMENT_TYPE_FLOAT_16:
+ return AlmostEqByteSpanF16(lhs_bytes, rhs_bytes);
+ default:
+ // TODO(gcmn): Consider supporting fuzzy matching for quantized integers.
+ break;
+ }
+ char element_type_str[16];
+ IREE_RETURN_IF_ERROR(iree_hal_format_element_type(
+ element_type, sizeof(element_type_str), element_type_str, nullptr));
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "unsupported element type %s", element_type_str);
+}
+
+Status ExpectAllTrue(iree_byte_span_t bytes,
+ iree_hal_element_type_t element_type) {
+ switch (element_type) {
+ case IREE_HAL_ELEMENT_TYPE_INT_8:
+ case IREE_HAL_ELEMENT_TYPE_SINT_8:
+ return ExpectAllTrue<int8_t>(bytes);
+ case IREE_HAL_ELEMENT_TYPE_UINT_8:
+ return ExpectAllTrue<uint8_t>(bytes);
+ case IREE_HAL_ELEMENT_TYPE_INT_16:
+ case IREE_HAL_ELEMENT_TYPE_SINT_16:
+ return ExpectAllTrue<int16_t>(bytes);
+ case IREE_HAL_ELEMENT_TYPE_UINT_16:
+ return ExpectAllTrue<uint16_t>(bytes);
+ case IREE_HAL_ELEMENT_TYPE_INT_32:
+ case IREE_HAL_ELEMENT_TYPE_SINT_32:
+ return ExpectAllTrue<int32_t>(bytes);
+ case IREE_HAL_ELEMENT_TYPE_UINT_32:
+ return ExpectAllTrue<uint32_t>(bytes);
+ case IREE_HAL_ELEMENT_TYPE_INT_64:
+ case IREE_HAL_ELEMENT_TYPE_SINT_64:
+ return ExpectAllTrue<int64_t>(bytes);
+ case IREE_HAL_ELEMENT_TYPE_UINT_64:
+ return ExpectAllTrue<uint64_t>(bytes);
+ case IREE_HAL_ELEMENT_TYPE_FLOAT_32:
+ return ExpectAllTrue<float>(bytes);
+ case IREE_HAL_ELEMENT_TYPE_FLOAT_64:
+ return ExpectAllTrue<double>(bytes);
+ default:
+ break;
+ }
+ char element_type_str[16];
+ IREE_RETURN_IF_ERROR(iree_hal_format_element_type(
+ element_type, sizeof(element_type_str), element_type_str, nullptr));
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "unsupported element type %s", element_type_str);
+}
+
+// Per-context module state.
+// This can contain "globals" and other arbitrary state.
+//
+// Thread-compatible; the runtime will not issue multiple calls at the same
+// time using the same state. If the implementation uses external threads then
+// it must synchronize itself.
+class CheckModuleState final {
+ public:
+ explicit CheckModuleState(iree_allocator_t allocator)
+ : allocator_(allocator) {}
+ ~CheckModuleState() = default;
+
+ Status ExpectTrue(int32_t operand) {
+ EXPECT_TRUE(operand) << "Expected " << operand << " to be nonzero.";
+ return OkStatus();
+ }
+
+ Status ExpectFalse(int32_t operand) {
+ EXPECT_FALSE(operand) << "Expected " << operand << " to be zero.";
+ return OkStatus();
+ }
+
+ Status ExpectAllTrue(vm::ref<iree_hal_buffer_view_t> operand) {
+ auto* view = operand.get();
+ iree_hal_element_type_t element_type =
+ iree_hal_buffer_view_element_type(view);
+ iree_hal_buffer_t* buf = iree_hal_buffer_view_buffer(view);
+ iree_device_size_t size = iree_hal_buffer_view_byte_length(view);
+ iree_hal_buffer_mapping_t mapped_memory = {{0}};
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range(
+ buf, IREE_HAL_MAPPING_MODE_SCOPED, IREE_HAL_MEMORY_ACCESS_READ,
+ /*byte_offset=*/0, size, &mapped_memory));
+ IREE_RETURN_IF_ERROR(
+ ::iree::ExpectAllTrue(mapped_memory.contents, element_type));
+ iree_status_ignore(iree_hal_buffer_unmap_range(&mapped_memory));
+ return OkStatus();
+ }
+
+ Status ExpectEq(vm::ref<iree_hal_buffer_view_t> lhs_ref,
+ vm::ref<iree_hal_buffer_view_t> rhs_ref) {
+ auto* lhs = lhs_ref.get();
+ auto* rhs = rhs_ref.get();
+
+ iree_device_size_t lhs_size = iree_hal_buffer_view_byte_length(lhs);
+ size_t lhs_rank = iree_hal_buffer_view_shape_rank(lhs);
+ std::vector<iree_hal_dim_t> lhs_shape(lhs_rank);
+ if (lhs_rank > 0) {
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_view_shape(lhs, lhs_rank, lhs_shape.data(), nullptr));
+ }
+
+ iree_device_size_t rhs_size = iree_hal_buffer_view_byte_length(rhs);
+ size_t rhs_rank = iree_hal_buffer_view_shape_rank(rhs);
+ std::vector<iree_hal_dim_t> rhs_shape(rhs_rank);
+ if (rhs_rank > 0) {
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_view_shape(rhs, rhs_rank, rhs_shape.data(), nullptr));
+ }
+
+ iree_hal_element_type_t lhs_element_type =
+ iree_hal_buffer_view_element_type(lhs);
+ iree_hal_element_type_t rhs_element_type =
+ iree_hal_buffer_view_element_type(rhs);
+
+ // HACK: this is all broken and will leak. Let's kill this entire module
+ // please.
+
+ iree_hal_buffer_t* lhs_buf = iree_hal_buffer_view_buffer(lhs);
+ iree_hal_buffer_mapping_t lhs_mapped_memory = {{0}};
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range(
+ lhs_buf, IREE_HAL_MAPPING_MODE_SCOPED, IREE_HAL_MEMORY_ACCESS_READ,
+ /*byte_offset=*/0, lhs_size, &lhs_mapped_memory));
+ iree_hal_buffer_t* rhs_buf = iree_hal_buffer_view_buffer(rhs);
+ iree_hal_buffer_mapping_t rhs_mapped_memory = {{0}};
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range(
+ rhs_buf, IREE_HAL_MAPPING_MODE_SCOPED, IREE_HAL_MEMORY_ACCESS_READ,
+ /*byte_offset=*/0, rhs_size, &rhs_mapped_memory));
+
+ bool element_types_eq = lhs_element_type == rhs_element_type;
+ bool shape_eq = lhs_shape == rhs_shape;
+ bool contents_eq =
+ EqByteSpan(lhs_mapped_memory.contents, rhs_mapped_memory.contents);
+ iree_status_ignore(iree_hal_buffer_unmap_range(&lhs_mapped_memory));
+ iree_status_ignore(iree_hal_buffer_unmap_range(&rhs_mapped_memory));
+
+ if (!element_types_eq || !shape_eq || !contents_eq) {
+ std::ostringstream os;
+ os << "Expected equality of these values.";
+ if (!element_types_eq) {
+ os << " Element types do not match.";
+ }
+ if (!shape_eq) {
+ os << " Shapes do not match.";
+ }
+ if (!contents_eq) {
+ os << " Contents does not match.";
+ }
+ // TODO(b/146898896): Propagate original variable names.
+ os << "\n"
+ " lhs:\n"
+ " ";
+ IREE_ASSIGN_OR_RETURN(auto lhs_str, BufferViewToString(lhs));
+ os << lhs_str;
+
+ os << "\n"
+ " rhs:\n"
+ " ";
+ IREE_ASSIGN_OR_RETURN(auto rhs_str, BufferViewToString(rhs));
+ os << rhs_str;
+
+ // TODO(b/146898896): Use ADD_FAILURE_AT to propagate source location.
+ ADD_FAILURE() << os.str();
+ }
+
+ return OkStatus();
+ }
+
+ Status ExpectAlmostEq(vm::ref<iree_hal_buffer_view_t> lhs_ref,
+ vm::ref<iree_hal_buffer_view_t> rhs_ref) {
+ auto* lhs = lhs_ref.get();
+ auto* rhs = rhs_ref.get();
+
+ iree_device_size_t lhs_size = iree_hal_buffer_view_byte_length(lhs);
+ size_t lhs_rank = iree_hal_buffer_view_shape_rank(lhs);
+ std::vector<iree_hal_dim_t> lhs_shape(lhs_rank);
+ if (lhs_rank > 0) {
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_view_shape(lhs, lhs_rank, lhs_shape.data(), nullptr));
+ }
+
+ iree_device_size_t rhs_size = iree_hal_buffer_view_byte_length(rhs);
+ size_t rhs_rank = iree_hal_buffer_view_shape_rank(rhs);
+ std::vector<iree_hal_dim_t> rhs_shape(rhs_rank);
+ if (rhs_rank > 0) {
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_view_shape(rhs, rhs_rank, rhs_shape.data(), nullptr));
+ }
+
+ iree_hal_element_type_t lhs_element_type =
+ iree_hal_buffer_view_element_type(lhs);
+ iree_hal_element_type_t rhs_element_type =
+ iree_hal_buffer_view_element_type(rhs);
+
+ iree_hal_buffer_t* lhs_buf = iree_hal_buffer_view_buffer(lhs);
+ iree_hal_buffer_mapping_t lhs_mapped_memory = {{0}};
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range(
+ lhs_buf, IREE_HAL_MAPPING_MODE_SCOPED, IREE_HAL_MEMORY_ACCESS_READ,
+ /*byte_offset=*/0, lhs_size, &lhs_mapped_memory));
+ iree_hal_buffer_t* rhs_buf = iree_hal_buffer_view_buffer(rhs);
+ iree_hal_buffer_mapping_t rhs_mapped_memory = {{0}};
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range(
+ rhs_buf, IREE_HAL_MAPPING_MODE_SCOPED, IREE_HAL_MEMORY_ACCESS_READ,
+ /*byte_offset=*/0, rhs_size, &rhs_mapped_memory));
+
+ bool element_types_eq = lhs_element_type == rhs_element_type;
+ bool shape_eq = lhs_shape == rhs_shape;
+ // Only check contents if shape and element type match. Otherwise we can't.
+ bool contents_could_be_almost_eq = true;
+ if (element_types_eq && shape_eq) {
+ IREE_ASSIGN_OR_RETURN(
+ contents_could_be_almost_eq,
+ AlmostEqByteSpan(lhs_mapped_memory.contents,
+ rhs_mapped_memory.contents, lhs_element_type));
+ }
+ iree_status_ignore(iree_hal_buffer_unmap_range(&lhs_mapped_memory));
+ iree_status_ignore(iree_hal_buffer_unmap_range(&rhs_mapped_memory));
+
+ if (!element_types_eq || !shape_eq || !contents_could_be_almost_eq) {
+ std::ostringstream os;
+ os << "Expected near equality of these values.";
+ if (!element_types_eq) {
+ os << " Element types do not match.";
+ }
+ if (!shape_eq) {
+ os << " Shapes do not match.";
+ }
+ if (!contents_could_be_almost_eq) {
+ os << " Contents does not match.";
+ }
+ // TODO(b/146898896): Propagate original variable names.
+ os << "\n"
+ " lhs:\n"
+ " ";
+ IREE_ASSIGN_OR_RETURN(auto lhs_str, BufferViewToString(lhs));
+ os << lhs_str;
+
+ os << "\n"
+ " rhs:\n"
+ " ";
+ IREE_ASSIGN_OR_RETURN(auto rhs_str, BufferViewToString(rhs));
+ os << rhs_str;
+
+ // TODO(b/146898896): Use ADD_FAILURE_AT to propagate source location.
+ ADD_FAILURE() << os.str();
+ }
+
+ return OkStatus();
+ }
+
+ private:
+ // Allocator that the caller requested we use for any allocations we need to
+ // perform during operation.
+ iree_allocator_t allocator_ = iree_allocator_system();
+};
+
+// Function table mapping imported function names to their implementation.
+// The signature of the target function is expected to match that in the
+// check.imports.mlir file.
+static const vm::NativeFunction<CheckModuleState> kCheckModuleFunctions[] = {
+ vm::MakeNativeFunction("expect_true", &CheckModuleState::ExpectTrue),
+ vm::MakeNativeFunction("expect_false", &CheckModuleState::ExpectFalse),
+ vm::MakeNativeFunction("expect_all_true", &CheckModuleState::ExpectAllTrue),
+ vm::MakeNativeFunction("expect_eq", &CheckModuleState::ExpectEq),
+ vm::MakeNativeFunction("expect_almost_eq",
+ &CheckModuleState::ExpectAlmostEq),
+};
+
+// The module instance that will be allocated and reused across contexts.
+// Any context-specific state must be stored in a state structure such as
+// CheckModuleState below.
+//
+// Assumed thread-safe (by construction here, as it's immutable), though if more
+// state is stored here it will need to be synchronized by the implementation.
+class CheckModule final : public vm::NativeModule<CheckModuleState> {
+ public:
+ using vm::NativeModule<CheckModuleState>::NativeModule;
+
+ // Creates per-context state when the module is added to a new context.
+ // May be called from any thread.
+ StatusOr<std::unique_ptr<CheckModuleState>> CreateState(
+ iree_allocator_t allocator) override {
+ auto state = std::make_unique<CheckModuleState>(allocator);
+ return state;
+ }
+};
+
+} // namespace
+
+// Note that while we are using C++ bindings internally we still expose the
+// module as a C instance. This hides the details of our implementation.
+extern "C" iree_status_t iree_check_module_create(
+ iree_allocator_t allocator, iree_vm_module_t** out_module) {
+ IREE_ASSERT_ARGUMENT(out_module);
+ *out_module = NULL;
+ auto module = std::make_unique<CheckModule>(
+ "check", allocator,
+ iree::span<const vm::NativeFunction<CheckModuleState>>(
+ kCheckModuleFunctions));
+ *out_module = module.release()->interface();
+ return iree_ok_status();
+}
+
+} // namespace iree
diff --git a/runtime/src/iree/modules/check/module.h b/runtime/src/iree/modules/check/module.h
new file mode 100644
index 0000000..24d29ba
--- /dev/null
+++ b/runtime/src/iree/modules/check/module.h
@@ -0,0 +1,27 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_MODULES_CHECK_MODULE_H_
+#define IREE_MODULES_CHECK_MODULE_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/vm/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a native custom module.
+iree_status_t iree_check_module_create(iree_allocator_t allocator,
+ iree_vm_module_t** out_module);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_MODULES_CHECK_MODULE_H_
diff --git a/runtime/src/iree/modules/check/test/BUILD b/runtime/src/iree/modules/check/test/BUILD
new file mode 100644
index 0000000..a834f60
--- /dev/null
+++ b/runtime/src/iree/modules/check/test/BUILD
@@ -0,0 +1,48 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/bazel:iree_check_test.bzl", "iree_check_test_suite")
+load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")
+load("//build_tools/bazel:enforce_glob.bzl", "enforce_glob")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_lit_test_suite(
+ name = "lit",
+ srcs = enforce_glob(
+ [
+ "failure.mlir",
+ "success.mlir",
+ "unavailable.mlir",
+ ],
+ include = ["*.mlir"],
+ ),
+ cfg = "//runtime:lit.cfg.py",
+ tags = ["hostonly"],
+ tools = [
+ "//iree/tools:iree-check-module",
+ "//iree/tools:iree-compile",
+ "//iree/tools:iree-run-module",
+ "@llvm-project//llvm:FileCheck",
+ ],
+)
+
+iree_check_test_suite(
+ name = "check",
+ srcs = ["success.mlir"],
+ compiler_flags = ["-iree-input-type=mhlo"],
+)
+
+iree_check_test_suite(
+ name = "check_failure",
+ srcs = ["failure.mlir"],
+ compiler_flags = ["-iree-input-type=mhlo"],
+ runner_args = ["--expect_failure"],
+)
diff --git a/runtime/src/iree/modules/check/test/CMakeLists.txt b/runtime/src/iree/modules/check/test/CMakeLists.txt
new file mode 100644
index 0000000..f41dae2
--- /dev/null
+++ b/runtime/src/iree/modules/check/test/CMakeLists.txt
@@ -0,0 +1,49 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/modules/check/test/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_lit_test_suite(
+ NAME
+ lit
+ SRCS
+ "failure.mlir"
+ "success.mlir"
+ "unavailable.mlir"
+ TOOLS
+ FileCheck
+ iree::tools::iree-check-module
+ iree::tools::iree-compile
+ iree::tools::iree-run-module
+ LABELS
+ "hostonly"
+)
+
+iree_check_test_suite(
+ NAME
+ check
+ SRCS
+ "success.mlir"
+ COMPILER_FLAGS
+ "-iree-input-type=mhlo"
+)
+
+iree_check_test_suite(
+ NAME
+ check_failure
+ SRCS
+ "failure.mlir"
+ COMPILER_FLAGS
+ "-iree-input-type=mhlo"
+ RUNNER_ARGS
+ "--expect_failure"
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/modules/check/test/failure.mlir b/runtime/src/iree/modules/check/test/failure.mlir
new file mode 100644
index 0000000..a5c541c
--- /dev/null
+++ b/runtime/src/iree/modules/check/test/failure.mlir
@@ -0,0 +1,13 @@
+// RUN: iree-compile --iree-input-type=mhlo --iree-hal-target-backends=vmvx -iree-mlir-to-vm-bytecode-module %s | iree-check-module --expect_failure - | FileCheck %s
+// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-compile --iree-input-type=mhlo --iree-hal-target-backends=vulkan-spirv -iree-mlir-to-vm-bytecode-module %s | iree-check-module --driver=vulkan --expect_failure - | FileCheck %s)
+
+// CHECK-LABEL: expect_failure.expect_true_of_false
+// CHECK: Expected 0 to be nonzero
+// CHECK: Test failed as expected
+module @expect_failure {
+func.func @expect_true_of_false() {
+ %false = util.unfoldable_constant 0 : i32
+ check.expect_true(%false) : i32
+ return
+}
+}
diff --git a/runtime/src/iree/modules/check/test/success.mlir b/runtime/src/iree/modules/check/test/success.mlir
new file mode 100644
index 0000000..2935131
--- /dev/null
+++ b/runtime/src/iree/modules/check/test/success.mlir
@@ -0,0 +1,78 @@
+// RUN: iree-compile --iree-input-type=mhlo --iree-hal-target-backends=vmvx -iree-mlir-to-vm-bytecode-module %s | iree-check-module --driver=vmvx -
+// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-compile --iree-input-type=mhlo --iree-hal-target-backends=vulkan-spirv -iree-mlir-to-vm-bytecode-module %s | iree-check-module --driver=vulkan -)
+
+func.func @expect_true() {
+ %true = util.unfoldable_constant 1 : i32
+ check.expect_true(%true) : i32
+ return
+}
+
+func.func @expect_false() {
+ %false = util.unfoldable_constant 0 : i32
+ check.expect_false(%false) : i32
+ return
+}
+
+func.func @expect_all_true() {
+ %all_true = util.unfoldable_constant dense<1> : tensor<2x2xi32>
+ %all_true_view = hal.tensor.export %all_true : tensor<2x2xi32> -> !hal.buffer_view
+ check.expect_all_true(%all_true_view) : !hal.buffer_view
+ return
+}
+
+func.func @expect_all_true_tensor() {
+ %all_true = util.unfoldable_constant dense<1> : tensor<2x2xi32>
+ check.expect_all_true(%all_true) : tensor<2x2xi32>
+ return
+}
+
+func.func @expect_eq() {
+ %const0 = util.unfoldable_constant dense<[1, 2, 3, 4, 5]> : tensor<5xi32>
+ %const1 = util.unfoldable_constant dense<[1, 2, 3, 4, 5]> : tensor<5xi32>
+ check.expect_eq(%const0, %const1) : tensor<5xi32>
+ return
+}
+
+func.func @expect_eq_const() {
+ %const0 = util.unfoldable_constant dense<[1, 2, 3, 4, 5]> : tensor<5xi32>
+ check.expect_eq_const(%const0, dense<[1, 2, 3, 4, 5]> : tensor<5xi32>) : tensor<5xi32>
+ return
+}
+
+func.func @expect_almost_eq() {
+ %const0 = util.unfoldable_constant dense<[1.0, 2.0, 3.0, 4.0, 5.0]> : tensor<5xf32>
+ %const1 = util.unfoldable_constant dense<[0.999999, 2.0, 3.0, 4.0, 5.0]> : tensor<5xf32>
+ check.expect_almost_eq(%const0, %const1) : tensor<5xf32>
+ return
+}
+
+func.func @expect_almost_eq_const() {
+ %const0 = util.unfoldable_constant dense<[1.0, 2.0, 3.0, 4.0, 5.0]> : tensor<5xf32>
+ check.expect_almost_eq_const(%const0, dense<[0.999999, 2.0, 3.0, 4.0, 5.0]> : tensor<5xf32>) : tensor<5xf32>
+ return
+}
+
+func.func @add() {
+ %c5 = util.unfoldable_constant dense<5> : tensor<i32>
+ %result = "mhlo.add"(%c5, %c5) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+ %c10 = util.unfoldable_constant dense<10> : tensor<i32>
+ check.expect_eq(%result, %c10) : tensor<i32>
+ return
+}
+
+func.func @floats() {
+ %cp1 = util.unfoldable_constant dense<0.1> : tensor<f32>
+ %c1 = util.unfoldable_constant dense<1.0> : tensor<f32>
+ %p2 = "mhlo.add"(%cp1, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+ %p3 = "mhlo.add"(%p2, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+ %p4 = "mhlo.add"(%p3, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+ %p5 = "mhlo.add"(%p4, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+ %p6 = "mhlo.add"(%p5, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+ %p7 = "mhlo.add"(%p6, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+ %p8 = "mhlo.add"(%p7, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+ %p9 = "mhlo.add"(%p8, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+ %approximately_1 = "mhlo.add"(%p9, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+
+ check.expect_almost_eq(%approximately_1, %c1) : tensor<f32>
+ return
+}
diff --git a/runtime/src/iree/modules/check/test/unavailable.mlir b/runtime/src/iree/modules/check/test/unavailable.mlir
new file mode 100644
index 0000000..c8f333c
--- /dev/null
+++ b/runtime/src/iree/modules/check/test/unavailable.mlir
@@ -0,0 +1,15 @@
+// RUN: iree-compile --iree-input-type=mhlo --iree-hal-target-backends=vmvx -iree-mlir-to-vm-bytecode-module %s | iree-run-module --module_file=- --entry_function=expect_true_of_false | FileCheck %s
+
+// Tests that even if the check module is not available (in this case because
+// we are running with iree-run-module instead of iree-check-module) the
+// execution still completes.
+
+// CHECK-LABEL: EXEC @expect_true_of_false
+// CHECK: result[0]: i32=0
+module @expect_failure {
+ func.func @expect_true_of_false() -> i32 {
+ %false = util.unfoldable_constant 0 : i32
+ check.expect_true(%false) : i32
+ return %false : i32
+ }
+}
diff --git a/runtime/src/iree/modules/hal/BUILD b/runtime/src/iree/modules/hal/BUILD
new file mode 100644
index 0000000..9afa643
--- /dev/null
+++ b/runtime/src/iree/modules/hal/BUILD
@@ -0,0 +1,32 @@
+# Copyright 2019 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_runtime_cc_library(
+ name = "hal",
+ srcs = [
+ "module.c",
+ ],
+ hdrs = [
+ "module.h",
+ ],
+ textual_hdrs = [
+ "exports.inl",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/vm",
+ ],
+)
diff --git a/runtime/src/iree/modules/hal/CMakeLists.txt b/runtime/src/iree/modules/hal/CMakeLists.txt
new file mode 100644
index 0000000..14b2612
--- /dev/null
+++ b/runtime/src/iree/modules/hal/CMakeLists.txt
@@ -0,0 +1,30 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/modules/hal/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ hal
+ HDRS
+ "module.h"
+ TEXTUAL_HDRS
+ "exports.inl"
+ SRCS
+ "module.c"
+ DEPS
+ iree::base
+ iree::base::tracing
+ iree::hal
+ iree::vm
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/modules/hal/exports.inl b/runtime/src/iree/modules/hal/exports.inl
new file mode 100644
index 0000000..8bca87f
--- /dev/null
+++ b/runtime/src/iree/modules/hal/exports.inl
@@ -0,0 +1,81 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+//===----------------------------------------------------------------------===//
+//
+// ██ ██ █████ ██████ ███ ██ ██ ███ ██ ██████
+// ██ ██ ██ ██ ██ ██ ████ ██ ██ ████ ██ ██
+// ██ █ ██ ███████ ██████ ██ ██ ██ ██ ██ ██ ██ ██ ███
+// ██ ███ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██
+// ███ ███ ██ ██ ██ ██ ██ ████ ██ ██ ████ ██████
+//
+//===----------------------------------------------------------------------===//
+//
+// This file will be auto generated from hal.imports.mlir in the future; for
+// now it's modified by hand but with strict alphabetical sorting required.
+// The order of these functions must be sorted ascending by name in a way
+// compatible with iree_string_view_compare.
+//
+// Users are meant to `#define EXPORT_FN` to be able to access the information.
+// #define EXPORT_FN(name, arg_type, ret_type, target_fn)
+
+// clang-format off
+
+EXPORT_FN("allocator.allocate", iree_hal_module_allocator_allocate, riii, r)
+EXPORT_FN("allocator.map.byte_buffer", iree_hal_module_allocator_map_byte_buffer, riiirii, r)
+EXPORT_FN("allocator.wrap.byte_buffer", iree_hal_module_allocator_wrap_byte_buffer, riirii, r)
+
+EXPORT_FN("buffer.assert", iree_hal_module_buffer_assert, rrriii, v)
+EXPORT_FN("buffer.length", iree_hal_module_buffer_length, r, i)
+EXPORT_FN("buffer.load", iree_hal_module_buffer_load, rii, i)
+EXPORT_FN("buffer.store", iree_hal_module_buffer_store, irii, v)
+EXPORT_FN("buffer.subspan", iree_hal_module_buffer_subspan, rii, r)
+
+EXPORT_FN("buffer_view.assert", iree_hal_module_buffer_view_assert, rriiCiD, v)
+EXPORT_FN("buffer_view.buffer", iree_hal_module_buffer_view_buffer, r, r)
+EXPORT_FN("buffer_view.byte_length", iree_hal_module_buffer_view_byte_length, r, i)
+EXPORT_FN("buffer_view.create", iree_hal_module_buffer_view_create, riiCiD, r)
+EXPORT_FN("buffer_view.dim", iree_hal_module_buffer_view_dim, ri, i)
+EXPORT_FN("buffer_view.element_type", iree_hal_module_buffer_view_element_type, r, i)
+EXPORT_FN("buffer_view.encoding_type", iree_hal_module_buffer_view_encoding_type, r, i)
+EXPORT_FN("buffer_view.rank", iree_hal_module_buffer_view_rank, r, i)
+EXPORT_FN("buffer_view.trace", iree_hal_module_buffer_view_trace, rCrD, v)
+
+EXPORT_FN("command_buffer.begin", iree_hal_module_command_buffer_begin, r, v)
+EXPORT_FN("command_buffer.begin_debug_group", iree_hal_module_command_buffer_begin_debug_group, rr, v)
+EXPORT_FN("command_buffer.bind_descriptor_set", iree_hal_module_command_buffer_bind_descriptor_set, rrirCiD, v)
+EXPORT_FN("command_buffer.copy_buffer", iree_hal_module_command_buffer_copy_buffer, rririi, v)
+EXPORT_FN("command_buffer.create", iree_hal_module_command_buffer_create, rii, r)
+EXPORT_FN("command_buffer.dispatch", iree_hal_module_command_buffer_dispatch, rriiii, v)
+EXPORT_FN("command_buffer.dispatch.indirect", iree_hal_module_command_buffer_dispatch_indirect, rriri, v)
+EXPORT_FN("command_buffer.end", iree_hal_module_command_buffer_end, r, v)
+EXPORT_FN("command_buffer.end_debug_group", iree_hal_module_command_buffer_end_debug_group, r, v)
+EXPORT_FN("command_buffer.execution_barrier", iree_hal_module_command_buffer_execution_barrier, riii, v)
+EXPORT_FN("command_buffer.fill_buffer", iree_hal_module_command_buffer_fill_buffer, rriiii, v)
+EXPORT_FN("command_buffer.push_constants", iree_hal_module_command_buffer_push_constants, rriCiD, v)
+EXPORT_FN("command_buffer.push_descriptor_set", iree_hal_module_command_buffer_push_descriptor_set, rriCiriiD, v)
+
+EXPORT_FN("descriptor_set.create", iree_hal_module_descriptor_set_create, rrCiriiD, r)
+
+EXPORT_FN("descriptor_set_layout.create", iree_hal_module_descriptor_set_layout_create, riCiiD, r)
+
+EXPORT_FN("device.allocator", iree_hal_module_device_allocator, r, r)
+EXPORT_FN("device.query.i32", iree_hal_module_device_query_i32, rrr, ii)
+
+EXPORT_FN("ex.shared_device", iree_hal_module_ex_shared_device, v, r)
+EXPORT_FN("ex.submit_and_wait", iree_hal_module_ex_submit_and_wait, rr, v)
+
+EXPORT_FN("executable.create", iree_hal_module_executable_create, rrrrCrD, r)
+
+EXPORT_FN("executable_layout.create", iree_hal_module_executable_layout_create, riCrD, r)
+
+EXPORT_FN("semaphore.await", iree_hal_module_semaphore_await, ri, i)
+EXPORT_FN("semaphore.create", iree_hal_module_semaphore_create, ri, r)
+EXPORT_FN("semaphore.fail", iree_hal_module_semaphore_fail, r, i)
+EXPORT_FN("semaphore.query", iree_hal_module_semaphore_query, r, ii)
+EXPORT_FN("semaphore.signal", iree_hal_module_semaphore_signal, ri, v)
+
+// clang-format on
diff --git a/runtime/src/iree/modules/hal/module.c b/runtime/src/iree/modules/hal/module.c
new file mode 100644
index 0000000..bf0534b
--- /dev/null
+++ b/runtime/src/iree/modules/hal/module.c
@@ -0,0 +1,1473 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/modules/hal/module.h"
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/api.h"
+#include "iree/vm/api.h"
+
+// Limit the number of bindings we pass down through the HAL. This can be tuned
+// in the future but right now guards the stack from blowing up during calls.
+#define IREE_HAL_MODULE_MAX_DESCRIPTOR_BINDING_COUNT ((iree_host_size_t)32)
+
+//===----------------------------------------------------------------------===//
+// Type registration
+//===----------------------------------------------------------------------===//
+
+static iree_vm_ref_type_descriptor_t iree_hal_allocator_descriptor = {0};
+static iree_vm_ref_type_descriptor_t iree_hal_buffer_descriptor = {0};
+static iree_vm_ref_type_descriptor_t iree_hal_buffer_view_descriptor = {0};
+static iree_vm_ref_type_descriptor_t iree_hal_command_buffer_descriptor = {0};
+static iree_vm_ref_type_descriptor_t iree_hal_descriptor_set_descriptor = {0};
+static iree_vm_ref_type_descriptor_t iree_hal_descriptor_set_layout_descriptor =
+ {0};
+static iree_vm_ref_type_descriptor_t iree_hal_device_descriptor = {0};
+static iree_vm_ref_type_descriptor_t iree_hal_event_descriptor = {0};
+static iree_vm_ref_type_descriptor_t iree_hal_executable_descriptor = {0};
+static iree_vm_ref_type_descriptor_t iree_hal_executable_layout_descriptor = {
+ 0};
+static iree_vm_ref_type_descriptor_t iree_hal_semaphore_descriptor = {0};
+
+#define IREE_VM_REGISTER_HAL_C_TYPE(type, name, destroy_fn, descriptor) \
+ descriptor.type_name = iree_make_cstring_view(name); \
+ descriptor.offsetof_counter = offsetof(iree_hal_resource_t, ref_count); \
+ descriptor.destroy = (iree_vm_ref_destroy_t)destroy_fn; \
+ IREE_RETURN_IF_ERROR(iree_vm_ref_register_type(&descriptor));
+
+IREE_API_EXPORT iree_status_t iree_hal_module_register_types(void) {
+ static bool has_registered = false;
+ if (has_registered) return iree_ok_status();
+
+ IREE_VM_REGISTER_HAL_C_TYPE(iree_hal_allocator_t, "hal.allocator",
+ iree_hal_allocator_destroy,
+ iree_hal_allocator_descriptor);
+ IREE_VM_REGISTER_HAL_C_TYPE(iree_hal_buffer_t, "hal.buffer",
+ iree_hal_buffer_recycle,
+ iree_hal_buffer_descriptor);
+ IREE_VM_REGISTER_HAL_C_TYPE(iree_hal_buffer_view_t, "hal.buffer_view",
+ iree_hal_buffer_view_destroy,
+ iree_hal_buffer_view_descriptor);
+ IREE_VM_REGISTER_HAL_C_TYPE(iree_hal_command_buffer_t, "hal.command_buffer",
+ iree_hal_command_buffer_destroy,
+ iree_hal_command_buffer_descriptor);
+ IREE_VM_REGISTER_HAL_C_TYPE(iree_hal_descriptor_set_t, "hal.descriptor_set",
+ iree_hal_descriptor_set_destroy,
+ iree_hal_descriptor_set_descriptor);
+ IREE_VM_REGISTER_HAL_C_TYPE(iree_hal_descriptor_set_layout_t,
+ "hal.descriptor_set_layout",
+ iree_hal_descriptor_set_layout_destroy,
+ iree_hal_descriptor_set_layout_descriptor);
+ IREE_VM_REGISTER_HAL_C_TYPE(iree_hal_device_t, "hal.device",
+ iree_hal_device_destroy,
+ iree_hal_device_descriptor);
+ IREE_VM_REGISTER_HAL_C_TYPE(iree_hal_event_t, "hal.event",
+ iree_hal_event_destroy,
+ iree_hal_event_descriptor);
+ IREE_VM_REGISTER_HAL_C_TYPE(iree_hal_executable_t, "hal.executable",
+ iree_hal_executable_destroy,
+ iree_hal_executable_descriptor);
+ IREE_VM_REGISTER_HAL_C_TYPE(iree_hal_executable_layout_t,
+ "hal.executable_layout",
+ iree_hal_executable_layout_destroy,
+ iree_hal_executable_layout_descriptor);
+ IREE_VM_REGISTER_HAL_C_TYPE(iree_hal_semaphore_t, "hal.semaphore",
+ iree_hal_semaphore_destroy,
+ iree_hal_semaphore_descriptor);
+
+ has_registered = true;
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// Type wrappers
+//===----------------------------------------------------------------------===//
+
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_allocator, iree_hal_allocator_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_buffer, iree_hal_buffer_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_buffer_view, iree_hal_buffer_view_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_command_buffer,
+ iree_hal_command_buffer_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_descriptor_set,
+ iree_hal_descriptor_set_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_descriptor_set_layout,
+ iree_hal_descriptor_set_layout_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_device, iree_hal_device_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_event, iree_hal_event_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_executable, iree_hal_executable_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_executable_layout,
+ iree_hal_executable_layout_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_semaphore, iree_hal_semaphore_t);
+
+//===----------------------------------------------------------------------===//
+// Module type definitions
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_hal_module_t {
+ iree_allocator_t host_allocator;
+ iree_hal_device_t* shared_device;
+ // TODO(benvanik): types.
+} iree_hal_module_t;
+
+#define IREE_HAL_MODULE_CAST(module) \
+ (iree_hal_module_t*)((uint8_t*)(module) + iree_vm_native_module_size());
+
+typedef struct iree_hal_module_state_t {
+ iree_allocator_t host_allocator;
+ iree_hal_device_t* shared_device;
+ iree_status_t loop_status;
+ iree_hal_executable_cache_t* executable_cache;
+
+ iree_hal_semaphore_t* submit_semaphore;
+ uint64_t submit_value;
+} iree_hal_module_state_t;
+
+static void IREE_API_PTR iree_hal_module_destroy(void* base_module) {
+ iree_hal_module_t* module = IREE_HAL_MODULE_CAST(base_module);
+ iree_hal_device_release(module->shared_device);
+}
+
+static iree_status_t IREE_API_PTR
+iree_hal_module_alloc_state(void* self, iree_allocator_t host_allocator,
+ iree_vm_module_state_t** out_module_state) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_module_t* module = IREE_HAL_MODULE_CAST(self);
+ iree_hal_module_state_t* state = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_allocator_malloc(host_allocator, sizeof(*state), (void**)&state));
+ memset(state, 0, sizeof(*state));
+ state->host_allocator = host_allocator;
+ state->shared_device = module->shared_device;
+ iree_hal_device_retain(state->shared_device);
+
+ state->loop_status = iree_ok_status();
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_executable_cache_create(
+ state->shared_device, iree_string_view_empty(),
+ iree_loop_inline(&state->loop_status), &state->executable_cache));
+
+ state->submit_value = 0ull;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_semaphore_create(state->shared_device, state->submit_value,
+ &state->submit_semaphore));
+
+ *out_module_state = (iree_vm_module_state_t*)state;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void IREE_API_PTR
+iree_hal_module_free_state(void* self, iree_vm_module_state_t* module_state) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_hal_module_state_t* state = (iree_hal_module_state_t*)module_state;
+ iree_hal_semaphore_release(state->submit_semaphore);
+ iree_hal_executable_cache_release(state->executable_cache);
+ iree_status_ignore(state->loop_status);
+ iree_hal_device_release(state->shared_device);
+ iree_allocator_free(state->host_allocator, state);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t IREE_API_PTR iree_hal_module_notify(
+ void* self, iree_vm_module_state_t* module_state, iree_vm_signal_t signal) {
+ iree_hal_module_state_t* state = (iree_hal_module_state_t*)module_state;
+ switch (signal) {
+ case IREE_VM_SIGNAL_SUSPEND:
+ case IREE_VM_SIGNAL_LOW_MEMORY:
+ return iree_hal_device_trim(state->shared_device);
+ default:
+ return iree_ok_status();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Experimental APIs
+//===----------------------------------------------------------------------===//
+// NOTE: Ex* APIs are experimental and likely to be removed soon. Modules
+// using these APIs are not forward compatible.
+
+IREE_VM_ABI_EXPORT(iree_hal_module_ex_shared_device, //
+ iree_hal_module_state_t, //
+ v, r) {
+ rets->r0 = iree_hal_device_retain_ref(state->shared_device);
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_ex_submit_and_wait, //
+ iree_hal_module_state_t, //
+ rr, v) {
+ iree_hal_device_t* device = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_device_check_deref(args->r0, &device));
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_command_buffer_check_deref(args->r1, &command_buffer));
+
+ // Batch with our single command buffer.
+ iree_hal_submission_batch_t batch;
+ memset(&batch, 0, sizeof(batch));
+
+ iree_hal_command_buffer_t* command_buffer_ptrs[] = {command_buffer};
+ batch.command_buffer_count = IREE_ARRAYSIZE(command_buffer_ptrs);
+ batch.command_buffers = command_buffer_ptrs;
+
+ uint64_t next_semaphore_value = ++state->submit_value;
+ iree_hal_semaphore_t* signal_semaphore_ptrs[] = {state->submit_semaphore};
+ uint64_t signal_semaphore_values[] = {next_semaphore_value};
+ batch.signal_semaphores.count = IREE_ARRAYSIZE(signal_semaphore_ptrs);
+ batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
+ batch.signal_semaphores.payload_values = signal_semaphore_values;
+
+ iree_status_t status = iree_hal_device_submit_and_wait(
+ device, IREE_HAL_COMMAND_CATEGORY_ANY, 0, 1, &batch,
+ state->submit_semaphore, next_semaphore_value, iree_infinite_timeout());
+ if (!iree_status_is_ok(status)) {
+ return status;
+ }
+
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_allocator_t
+//===----------------------------------------------------------------------===//
+
+IREE_VM_ABI_EXPORT(iree_hal_module_allocator_allocate, //
+ iree_hal_module_state_t, //
+ riii, r) {
+ iree_hal_allocator_t* allocator = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_allocator_check_deref(args->r0, &allocator));
+ iree_hal_memory_type_t memory_types = (iree_hal_memory_type_t)args->i1;
+ iree_hal_buffer_usage_t buffer_usage = (iree_hal_buffer_usage_t)args->i2;
+ iree_vm_size_t allocation_size = (iree_vm_size_t)args->i3;
+
+ const iree_hal_buffer_params_t params = {
+ .type = memory_types,
+ .usage = buffer_usage,
+ };
+ iree_hal_buffer_t* buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_allocator_allocate_buffer(
+ allocator, params, allocation_size, iree_const_byte_span_empty(),
+ &buffer));
+ rets->r0 = iree_hal_buffer_move_ref(buffer);
+ return iree_ok_status();
+}
+
+static void iree_hal_module_mapped_buffer_release(void* user_data,
+ iree_hal_buffer_t* buffer) {
+ iree_vm_buffer_t* backing_buffer = (iree_vm_buffer_t*)user_data;
+ iree_vm_buffer_release(backing_buffer);
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_allocator_map_byte_buffer, //
+ iree_hal_module_state_t, //
+ riiirii, r) {
+ iree_hal_allocator_t* allocator = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_allocator_check_deref(args->r0, &allocator));
+ bool is_try = args->i1 != 0;
+ iree_hal_memory_type_t memory_types = (iree_hal_memory_type_t)args->i2;
+ iree_hal_buffer_usage_t buffer_usage = (iree_hal_buffer_usage_t)args->i3;
+ iree_vm_buffer_t* source = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_check_deref(args->r4, &source));
+ iree_vm_size_t offset = (iree_vm_size_t)args->i5;
+ iree_vm_size_t length = (iree_vm_size_t)args->i6;
+
+ iree_host_size_t buffer_length = source->data.data_length;
+ if (length == -1) {
+ length = buffer_length;
+ }
+ if (length < 0 || offset < 0 || offset > buffer_length ||
+ offset + length > buffer_length) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "byte range out of bounds (requested %d-%d of available %zu)", offset,
+ (offset + length - 1), buffer_length);
+ }
+
+ iree_hal_memory_access_t allowed_access = IREE_HAL_MEMORY_ACCESS_READ;
+ if (!iree_all_bits_set(source->access, IREE_VM_BUFFER_ACCESS_MUTABLE)) {
+ // Source buffer is read-only; require that the access request matches.
+ if (!iree_all_bits_set(buffer_usage, IREE_HAL_BUFFER_USAGE_CONSTANT)) {
+ return iree_make_status(IREE_STATUS_PERMISSION_DENIED,
+ "source buffer is immutable and can only be "
+ "mapped for constant usage");
+ }
+
+ // NOTE: if we wanted to lock things down for when there's no MMU to ensure
+ // that the loaded program doesn't touch the memory then we could just fail
+ // the request - the program will then perform an alloc+copy and can do
+ // whatever it wants with the memory.
+ } else {
+ // Source buffer is mutable; allow in-place writes.
+ if (!iree_all_bits_set(buffer_usage, IREE_HAL_BUFFER_USAGE_CONSTANT)) {
+ allowed_access |= IREE_HAL_MEMORY_ACCESS_WRITE;
+ }
+ }
+
+ // Try mapping - note that this may fail if the target device cannot map the
+ // memory into the given type (for example, mapping a host buffer into
+ // device-local memory is only going to work on unified memory systems).
+ const iree_hal_buffer_params_t params = {
+ .type = memory_types,
+ .usage = buffer_usage,
+ .access = allowed_access,
+ };
+ iree_hal_external_buffer_t external_buffer = {
+ .type = IREE_HAL_EXTERNAL_BUFFER_TYPE_HOST_ALLOCATION,
+ .flags = IREE_HAL_EXTERNAL_BUFFER_FLAG_NONE,
+ .size = length,
+ .handle.host_allocation.ptr = source->data.data + offset,
+ };
+ iree_hal_buffer_release_callback_t release_callback = {
+ .fn = iree_hal_module_mapped_buffer_release,
+ .user_data = source,
+ };
+ iree_hal_buffer_t* buffer = NULL;
+ iree_status_t status = iree_hal_allocator_import_buffer(
+ allocator, params, &external_buffer, release_callback, &buffer);
+ if (iree_status_is_ok(status)) {
+ // Mapping succeeded - retain the source buffer that'll be released by
+ // iree_hal_module_map_data_ctl when the mapping is no longer used.
+ iree_vm_buffer_retain(source);
+ rets->r0 = iree_hal_buffer_move_ref(buffer);
+ return iree_ok_status();
+ }
+
+ // Failed to map - if this was a try then don't fail and just rely on the
+ // result being nullptr to indicate to the caller that things failed.
+ memset(&rets->r0, 0, sizeof(rets->r0));
+ if (is_try) {
+ iree_status_ignore(status);
+ return iree_ok_status();
+ }
+ return status;
+}
+
+// TODO(#7277): drop this method (use map instead) with streams.
+IREE_VM_ABI_EXPORT(iree_hal_module_allocator_wrap_byte_buffer, //
+ iree_hal_module_state_t, //
+ riirii, r) {
+ iree_hal_allocator_t* allocator = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_allocator_check_deref(args->r0, &allocator));
+ iree_hal_memory_type_t memory_types = (iree_hal_memory_type_t)args->i1;
+ iree_hal_buffer_usage_t buffer_usage = (iree_hal_buffer_usage_t)args->i2;
+ iree_vm_buffer_t* source = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_check_deref(args->r3, &source));
+ iree_vm_size_t offset = (iree_vm_size_t)args->i4;
+ iree_vm_size_t length = (iree_vm_size_t)args->i5;
+
+ iree_host_size_t buffer_length = source->data.data_length;
+ if (length == -1) {
+ length = buffer_length;
+ }
+ if (length < 0 || offset < 0 || offset > buffer_length ||
+ offset + length > buffer_length) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "byte range out of bounds (requested %d-%d of available %zu)", offset,
+ (offset + length - 1), buffer_length);
+ }
+
+ const iree_hal_buffer_params_t params = {
+ .type = memory_types,
+ .usage = buffer_usage,
+ };
+ iree_hal_buffer_t* buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_allocator_allocate_buffer(
+ allocator, params, length,
+ iree_make_const_byte_span(source->data.data + offset, length),
+ &buffer),
+ "failed to allocate buffer of length %d", length);
+
+ rets->r0 = iree_hal_buffer_move_ref(buffer);
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_buffer_t
+//===----------------------------------------------------------------------===//
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_assert, //
+ iree_hal_module_state_t, //
+ rrriii, v) {
+ iree_hal_buffer_t* buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_check_deref(args->r0, &buffer));
+ iree_vm_buffer_t* message = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_check_deref(args->r1, &message));
+ iree_string_view_t message_str IREE_ATTRIBUTE_UNUSED =
+ iree_vm_buffer_as_string(message);
+ iree_hal_allocator_t* allocator = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_allocator_check_deref(args->r2, &allocator));
+ iree_vm_size_t minimum_length = (iree_vm_size_t)args->i3;
+ iree_hal_memory_type_t required_memory_types =
+ (iree_hal_memory_type_t)args->i4;
+ iree_hal_buffer_usage_t required_buffer_usage =
+ (iree_hal_buffer_usage_t)args->i5;
+
+ // Ensure we have enough bytes in the buffer for the encoding we have.
+ // Note that having more bytes is fine:
+ // assert(expected_length <= actual_length);
+ iree_device_size_t actual_length = iree_hal_buffer_byte_length(buffer);
+ if (actual_length < minimum_length) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "%.*s buffer byte length %" PRIdsz " less than expected minimum %d",
+ (int)message_str.size, message_str.data, actual_length, minimum_length);
+ }
+
+ // TODO(benvanik): assert that the buffer view is accessible from the
+ // target device. This needs some iree_hal_allocator_* methods for checking
+ // whether the external buffer can be used. To start we just compare if the
+ // allocators are identical.
+
+ // All memory type bits expected (indicating where the program intends to use
+ // the buffer data) must be set in the buffer while the buffer is allowed to
+ // have more bits.
+ iree_hal_memory_type_t actual_memory_type =
+ iree_hal_buffer_memory_type(buffer);
+ if (!iree_all_bits_set(actual_memory_type, required_memory_types)) {
+#if IREE_HAL_MODULE_STRING_UTIL_ENABLE
+ iree_bitfield_string_temp_t temp0, temp1;
+ iree_string_view_t actual_memory_type_str =
+ iree_hal_memory_type_format(actual_memory_type, &temp0);
+ iree_string_view_t expected_memory_type_str =
+ iree_hal_memory_type_format(required_memory_types, &temp1);
+ return iree_make_status(
+ IREE_STATUS_PERMISSION_DENIED,
+ "%.*s buffer memory type is not compatible; buffer has %.*s, operation "
+ "requires %.*s",
+ (int)message_str.size, message_str.data,
+ (int)actual_memory_type_str.size, actual_memory_type_str.data,
+ (int)expected_memory_type_str.size, expected_memory_type_str.data);
+#else
+ return iree_make_status(
+ IREE_STATUS_PERMISSION_DENIED,
+ "%.*s buffer memory type is not compatible; buffer has %08X, operation "
+ "requires %08X",
+ (int)message_str.size, message_str.data, actual_memory_type,
+ expected_memory_type);
+#endif // IREE_HAL_MODULE_STRING_UTIL_ENABLE
+ }
+
+ // All usage bits expected (indicating what the program intends to use the
+ // buffer for) must be set in the buffer while the buffer is allowed to have
+ // more bits.
+ iree_hal_buffer_usage_t actual_buffer_usage =
+ iree_hal_buffer_allowed_usage(buffer);
+ if (!iree_all_bits_set(actual_buffer_usage, required_buffer_usage)) {
+#if IREE_HAL_MODULE_STRING_UTIL_ENABLE
+ iree_bitfield_string_temp_t temp0, temp1;
+ iree_string_view_t allowed_usage_str =
+ iree_hal_buffer_usage_format(actual_buffer_usage, &temp0);
+ iree_string_view_t required_usage_str =
+ iree_hal_buffer_usage_format(required_buffer_usage, &temp1);
+ return iree_make_status(
+ IREE_STATUS_PERMISSION_DENIED,
+ "%.*s requested usage was not specified when the buffer was allocated; "
+ "buffer allows %.*s, operation requires %.*s",
+ (int)message_str.size, message_str.data, (int)allowed_usage_str.size,
+ allowed_usage_str.data, (int)required_usage_str.size,
+ required_usage_str.data);
+#else
+ return iree_make_status(
+ IREE_STATUS_PERMISSION_DENIED,
+ "%.*s requested usage was not specified when the buffer was allocated; "
+ "buffer allows %08X, operation requires %08X",
+ (int)message_str.size, message_str.data, allowed_buffer_usage,
+ required_buffer_usage);
+#endif // IREE_HAL_MODULE_STRING_UTIL_ENABLE
+ }
+
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_subspan, //
+ iree_hal_module_state_t, //
+ rii, r) {
+ iree_hal_buffer_t* source_buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_check_deref(args->r0, &source_buffer));
+ iree_vm_size_t source_offset = (iree_vm_size_t)args->i1;
+ iree_vm_size_t length = (iree_vm_size_t)args->i2;
+
+ iree_hal_buffer_t* subspan_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_subspan(source_buffer, source_offset, length,
+ &subspan_buffer),
+ "invalid subspan of an existing buffer (source_offset=%d, length=%d)",
+ source_offset, length);
+ rets->r0 = iree_hal_buffer_move_ref(subspan_buffer);
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_length, //
+ iree_hal_module_state_t, //
+ r, i) {
+ iree_hal_buffer_t* buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_check_deref(args->r0, &buffer));
+ rets->i0 = iree_hal_buffer_byte_length(buffer);
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_load, //
+ iree_hal_module_state_t, //
+ rii, i) {
+ iree_hal_buffer_t* source_buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_check_deref(args->r0, &source_buffer));
+ iree_vm_size_t source_offset = (iree_vm_size_t)args->i1;
+ iree_vm_size_t length = (iree_vm_size_t)args->i2;
+
+ uint32_t target_buffer = 0;
+ if (length > sizeof(target_buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "load length byte count %d exceeds max", length);
+ }
+
+ IREE_RETURN_IF_ERROR(iree_hal_device_transfer_d2h(
+ state->shared_device, source_buffer, source_offset, &target_buffer,
+ length, IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()));
+
+ rets->i0 = target_buffer;
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_store, //
+ iree_hal_module_state_t, //
+ irii, v) {
+ int32_t value = args->i0;
+ iree_hal_buffer_t* target_buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_check_deref(args->r1, &target_buffer));
+ iree_vm_size_t target_offset = (iree_vm_size_t)args->i2;
+ iree_vm_size_t length = (iree_vm_size_t)args->i3;
+
+ if (length > sizeof(value)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "store length byte count %d exceeds max", length);
+ } else if (target_offset + length >
+ iree_hal_buffer_byte_length(target_buffer)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "store out of bounds (target_offset=%d, length=%d into max %" PRIdsz
+ ")",
+ target_offset, length, iree_hal_buffer_byte_length(target_buffer));
+ }
+
+ return iree_hal_device_transfer_h2d(
+ state->shared_device, &value, target_buffer, target_offset, length,
+ IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout());
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_buffer_view_t
+//===----------------------------------------------------------------------===//
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_view_create, //
+ iree_hal_module_state_t, //
+ riiCiD, r) {
+ iree_hal_buffer_t* source_buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_check_deref(args->r0, &source_buffer));
+ iree_hal_element_type_t element_type = (iree_hal_element_type_t)args->i1;
+ iree_hal_encoding_type_t encoding_type = (iree_hal_encoding_type_t)args->i2;
+ iree_host_size_t shape_rank = 0;
+ iree_hal_dim_t* shape_dims = NULL;
+ IREE_VM_ABI_VLA_STACK_CAST(args, a3_count, a3, iree_hal_dim_t, 128,
+ &shape_rank, &shape_dims);
+
+ iree_hal_buffer_view_t* buffer_view = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_view_create(
+ source_buffer, shape_dims, shape_rank, element_type, encoding_type,
+ state->host_allocator, &buffer_view));
+ rets->r0 = iree_hal_buffer_view_move_ref(buffer_view);
+ return iree_ok_status();
+}
+
+// Returns true if the |expected_type| can be satisfied with |actual_type|.
+// This allows for basic type widening and bypassing instead of requiring an
+// exact match in all cases.
+static bool iree_hal_element_types_are_compatible(
+ iree_hal_element_type_t actual_type,
+ iree_hal_element_type_t expected_type) {
+ if (iree_hal_element_numerical_type_is_opaque(actual_type)) {
+ // If the provided type is opaque it can map to anything. This allows
+ // applications to bypass the checks when they are treating all the data as
+ // opaque, such as when carrying around buffer data in binary blobs.
+ return true;
+ }
+
+ if (iree_hal_element_numerical_type_is_integer(actual_type) &&
+ iree_hal_element_numerical_type_is_integer(expected_type) &&
+ iree_hal_element_bit_count(actual_type) ==
+ iree_hal_element_bit_count(expected_type)) {
+ // Integer types of the same bit width are allowed to be cast.
+ // This allows users or the compiler to treat data as signless while still
+ // allowing signedness. For example, tensor<1xi32> can successfully match
+ // a tensor<1xui32> expectation.
+ return true;
+ }
+
+ // Otherwise we require an exact match. This may be overly conservative but
+ // in most cases is a useful error message. Users can pass in OPAQUE types if
+ // hitting this to bypass.
+ return actual_type == expected_type;
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_view_assert, //
+ iree_hal_module_state_t, //
+ rriiCiD, v) {
+ iree_hal_buffer_view_t* buffer_view = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_view_check_deref(args->r0, &buffer_view));
+ iree_vm_buffer_t* message = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_check_deref(args->r1, &message));
+ iree_string_view_t message_str IREE_ATTRIBUTE_UNUSED =
+ iree_vm_buffer_as_string(message);
+ iree_hal_element_type_t expected_element_type =
+ (iree_hal_element_type_t)args->i2;
+ iree_hal_encoding_type_t expected_encoding_type =
+ (iree_hal_encoding_type_t)args->i3;
+ iree_host_size_t expected_shape_rank = 0;
+ iree_hal_dim_t* expected_shape_dims = NULL;
+ IREE_VM_ABI_VLA_STACK_CAST(args, a4_count, a4, iree_hal_dim_t, 128,
+ &expected_shape_rank, &expected_shape_dims);
+
+ // Check encoding first; getting the encoding wrong is worse than the shape.
+ // If the actual encoding is opaque we allow it to pass through - this lets
+ // users override the assertion in the case where they are just passing data
+ // around and don't care about the contents.
+ iree_hal_encoding_type_t actual_encoding_type =
+ iree_hal_buffer_view_encoding_type(buffer_view);
+ if (actual_encoding_type != IREE_HAL_ENCODING_TYPE_OPAQUE &&
+ actual_encoding_type != expected_encoding_type) {
+ // TODO(benvanik): string formatting of encodings.
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "%.*s encoding mismatch; expected %08X but have %08X",
+ (int)message_str.size, message_str.data, expected_encoding_type,
+ actual_encoding_type);
+ }
+
+ // Element types determine the storage requirements.
+ // If the actual element type is opaque we allow it to pass through.
+ iree_hal_element_type_t actual_element_type =
+ iree_hal_buffer_view_element_type(buffer_view);
+ if (!iree_hal_element_types_are_compatible(actual_element_type,
+ expected_element_type)) {
+#if IREE_HAL_MODULE_STRING_UTIL_ENABLE
+ char actual_element_type_str[32];
+ iree_host_size_t actual_element_type_str_length = 0;
+ char expected_element_type_str[32];
+ iree_host_size_t expected_element_type_str_length = 0;
+ IREE_RETURN_IF_ERROR(iree_hal_format_element_type(
+ actual_element_type, sizeof(actual_element_type_str),
+ actual_element_type_str, &actual_element_type_str_length));
+ IREE_RETURN_IF_ERROR(iree_hal_format_element_type(
+ expected_element_type, sizeof(expected_element_type_str),
+ expected_element_type_str, &expected_element_type_str_length));
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "%.*s element type mismatch; expected %.*s (%08X) but have %.*s (%08X)",
+ (int)message_str.size, message_str.data,
+ (int)expected_element_type_str_length, expected_element_type_str,
+ expected_element_type, (int)actual_element_type_str_length,
+ actual_element_type_str, actual_element_type);
+#else
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "%.*s element type mismatch; expected %08X but have %08X",
+ (int)message_str.size, message_str.data, expected_element_type,
+ actual_element_type);
+#endif // IREE_HAL_MODULE_STRING_UTIL_ENABLE
+ }
+
+ // Rank check before the individual shape dimensions.
+ iree_host_size_t actual_shape_rank =
+ iree_hal_buffer_view_shape_rank(buffer_view);
+ const iree_hal_dim_t* actual_shape_dims =
+ iree_hal_buffer_view_shape_dims(buffer_view);
+ iree_status_t shape_status = iree_ok_status();
+ if (actual_shape_rank != expected_shape_rank) {
+ shape_status =
+ iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "%.*s shape rank mismatch; expected %zu but have %zu",
+ (int)message_str.size, message_str.data,
+ expected_shape_rank, actual_shape_rank);
+ }
+ if (iree_status_is_ok(shape_status)) {
+ for (iree_host_size_t i = 0; i < actual_shape_rank; ++i) {
+ if (actual_shape_dims[i] == expected_shape_dims[i]) continue;
+ // Dimension mismatch.
+ shape_status = iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "%.*s shape dimension %zu mismatch; expected %d but have %d",
+ (int)message_str.size, message_str.data, i, expected_shape_dims[i],
+ actual_shape_dims[i]);
+ break;
+ }
+ }
+
+#if IREE_HAL_MODULE_STRING_UTIL_ENABLE
+ if (!iree_status_is_ok(shape_status)) {
+ char actual_shape_str[32];
+ iree_host_size_t actual_shape_str_length = 0;
+ char expected_shape_str[32];
+ iree_host_size_t expected_shape_str_length = 0;
+ IREE_RETURN_IF_ERROR(iree_hal_format_shape(
+ actual_shape_dims, actual_shape_rank, sizeof(actual_shape_str),
+ actual_shape_str, &actual_shape_str_length));
+ IREE_RETURN_IF_ERROR(iree_hal_format_shape(
+ expected_shape_dims, expected_shape_rank, sizeof(expected_shape_str),
+ expected_shape_str, &expected_shape_str_length));
+ shape_status = iree_status_annotate_f(
+ shape_status, "expected shape %.*s, actual shape %.*s",
+ (int)expected_shape_str_length, expected_shape_str,
+ (int)actual_shape_str_length, actual_shape_str);
+ }
+#endif // IREE_HAL_MODULE_STRING_UTIL_ENABLE
+
+ return shape_status;
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_view_buffer, //
+ iree_hal_module_state_t, //
+ r, r) {
+ iree_hal_buffer_view_t* buffer_view = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_view_check_deref(args->r0, &buffer_view));
+ rets->r0 =
+ iree_hal_buffer_retain_ref(iree_hal_buffer_view_buffer(buffer_view));
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_view_byte_length, //
+ iree_hal_module_state_t, //
+ r, i) {
+ iree_hal_buffer_view_t* buffer_view = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_view_check_deref(args->r0, &buffer_view));
+ rets->i0 = (iree_vm_size_t)iree_hal_buffer_view_byte_length(buffer_view);
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_view_element_type, //
+ iree_hal_module_state_t, //
+ r, i) {
+ iree_hal_buffer_view_t* buffer_view = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_view_check_deref(args->r0, &buffer_view));
+ rets->i0 = (uint32_t)iree_hal_buffer_view_element_type(buffer_view);
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_view_encoding_type, //
+ iree_hal_module_state_t, //
+ r, i) {
+ iree_hal_buffer_view_t* buffer_view = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_view_check_deref(args->r0, &buffer_view));
+ rets->i0 = (uint32_t)iree_hal_buffer_view_encoding_type(buffer_view);
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_view_rank, //
+ iree_hal_module_state_t, //
+ r, i) {
+ iree_hal_buffer_view_t* buffer_view = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_view_check_deref(args->r0, &buffer_view));
+ rets->i0 = (iree_vm_size_t)iree_hal_buffer_view_shape_rank(buffer_view);
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_view_dim, //
+ iree_hal_module_state_t, //
+ ri, i) {
+ iree_hal_buffer_view_t* buffer_view = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_view_check_deref(args->r0, &buffer_view));
+ iree_vm_size_t index = (iree_vm_size_t)args->i1;
+ rets->i0 = (iree_vm_size_t)iree_hal_buffer_view_shape_dim(buffer_view, index);
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_buffer_view_trace, //
+ iree_hal_module_state_t, //
+ rCrD, v) {
+#if IREE_HAL_MODULE_STRING_UTIL_ENABLE
+
+ iree_vm_buffer_t* key = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_check_deref(args->r0, &key));
+ iree_string_view_t key_str = iree_vm_buffer_as_string(key);
+
+ fprintf(stderr, "=== %.*s ===\n", (int)key_str.size, key_str.data);
+ for (iree_host_size_t i = 0; i < args->a1_count; ++i) {
+ iree_hal_buffer_view_t* buffer_view = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_view_check_deref(args->a1[i].r0, &buffer_view));
+
+ // NOTE: this export is for debugging only and a no-op in min-size builds.
+ // We heap-alloc here because at the point this export is used performance
+ // is not a concern.
+
+ // Query total length (excluding NUL terminator).
+ iree_host_size_t result_length = 0;
+ iree_status_t status = iree_hal_buffer_view_format(buffer_view, SIZE_MAX, 0,
+ NULL, &result_length);
+ if (!iree_status_is_out_of_range(status)) {
+ return status;
+ }
+ ++result_length; // include NUL
+
+ // Allocate scratch heap memory to contain the result and format into it.
+ char* result_str = NULL;
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc(
+ state->host_allocator, result_length, (void**)&result_str));
+ status = iree_hal_buffer_view_format(buffer_view, SIZE_MAX, result_length,
+ result_str, &result_length);
+ if (iree_status_is_ok(status)) {
+ fprintf(stderr, "%.*s\n", (int)result_length, result_str);
+ }
+ iree_allocator_free(state->host_allocator, result_str);
+ IREE_RETURN_IF_ERROR(status);
+ }
+ fprintf(stderr, "\n");
+
+#endif // IREE_HAL_MODULE_STRING_UTIL_ENABLE
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_command_buffer_t
+//===----------------------------------------------------------------------===//
+
+IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_create, //
+ iree_hal_module_state_t, //
+ rii, r) {
+ iree_hal_device_t* device = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_device_check_deref(args->r0, &device));
+ iree_hal_command_buffer_mode_t modes =
+ (iree_hal_command_buffer_mode_t)args->i1;
+ iree_hal_command_category_t command_categories =
+ (iree_hal_command_category_t)args->i2;
+
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_command_buffer_create(
+ device, modes, command_categories, IREE_HAL_QUEUE_AFFINITY_ANY,
+ &command_buffer));
+ rets->r0 = iree_hal_command_buffer_move_ref(command_buffer);
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_begin, //
+ iree_hal_module_state_t, //
+ r, v) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_command_buffer_check_deref(args->r0, &command_buffer));
+
+ return iree_hal_command_buffer_begin(command_buffer);
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_end, //
+ iree_hal_module_state_t, //
+ r, v) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_command_buffer_check_deref(args->r0, &command_buffer));
+
+ return iree_hal_command_buffer_end(command_buffer);
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_begin_debug_group, //
+ iree_hal_module_state_t, //
+ rr, v) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_command_buffer_check_deref(args->r0, &command_buffer));
+ iree_vm_buffer_t* label = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_check_deref(args->r1, &label));
+ iree_string_view_t label_str = iree_vm_buffer_as_string(label);
+ // TODO(benvanik): query from VM.
+ iree_hal_label_location_t location = {
+ .file = iree_string_view_empty(),
+ .line = 0,
+ };
+ iree_hal_command_buffer_begin_debug_group(
+ command_buffer, label_str, iree_hal_label_color_unspecified(), &location);
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_end_debug_group, //
+ iree_hal_module_state_t, //
+ r, v) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_command_buffer_check_deref(args->r0, &command_buffer));
+ iree_hal_command_buffer_end_debug_group(command_buffer);
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_execution_barrier, //
+ iree_hal_module_state_t, //
+ riii, v) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_command_buffer_check_deref(args->r0, &command_buffer));
+ iree_hal_execution_stage_t source_stage_mask =
+ (iree_hal_execution_stage_t)args->i1;
+ iree_hal_execution_stage_t target_stage_mask =
+ (iree_hal_execution_stage_t)args->i2;
+ iree_hal_execution_barrier_flags_t flags =
+ (iree_hal_execution_barrier_flags_t)args->i3;
+
+ // TODO(benvanik): decode barriers.
+ iree_hal_memory_barrier_t global_barrier;
+ global_barrier.source_scope = IREE_HAL_ACCESS_SCOPE_DISPATCH_WRITE;
+ global_barrier.target_scope = IREE_HAL_ACCESS_SCOPE_DISPATCH_READ;
+
+ return iree_hal_command_buffer_execution_barrier(
+ command_buffer, source_stage_mask, target_stage_mask, flags, 1,
+ &global_barrier, 0, NULL);
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_fill_buffer, //
+ iree_hal_module_state_t, //
+ rriiii, v) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_command_buffer_check_deref(args->r0, &command_buffer));
+ iree_hal_buffer_t* target_buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_check_deref(args->r1, &target_buffer));
+ iree_vm_size_t target_offset = (iree_vm_size_t)args->i2;
+ iree_vm_size_t length = (iree_vm_size_t)args->i3;
+ uint32_t pattern = (uint32_t)args->i4;
+ uint32_t pattern_length = (uint32_t)args->i5;
+ return iree_hal_command_buffer_fill_buffer(command_buffer, target_buffer,
+ target_offset, length, &pattern,
+ pattern_length);
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_copy_buffer, //
+ iree_hal_module_state_t, //
+ rririi, v) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_command_buffer_check_deref(args->r0, &command_buffer));
+ iree_hal_buffer_t* source_buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_check_deref(args->r1, &source_buffer));
+ iree_vm_size_t source_offset = (iree_vm_size_t)args->i2;
+ iree_hal_buffer_t* target_buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_buffer_check_deref(args->r3, &target_buffer));
+ iree_vm_size_t target_offset = (iree_vm_size_t)args->i4;
+ iree_vm_size_t length = (iree_vm_size_t)args->i5;
+ return iree_hal_command_buffer_copy_buffer(command_buffer, source_buffer,
+ source_offset, target_buffer,
+ target_offset, length);
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_push_constants, //
+ iree_hal_module_state_t, //
+ rriCiD, v) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_command_buffer_check_deref(args->r0, &command_buffer));
+ iree_hal_executable_layout_t* executable_layout = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_executable_layout_check_deref(args->r1, &executable_layout));
+ iree_vm_size_t offset = (iree_vm_size_t)args->i2;
+ iree_host_size_t value_count = args->a3_count;
+ const uint32_t* values = (const uint32_t*)&args->a3[0].i0;
+
+ return iree_hal_command_buffer_push_constants(
+ command_buffer, executable_layout, offset * sizeof(uint32_t), values,
+ value_count * sizeof(uint32_t));
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_push_descriptor_set, //
+ iree_hal_module_state_t, //
+ rriCiriiD, v) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_command_buffer_check_deref(args->r0, &command_buffer));
+ iree_hal_executable_layout_t* executable_layout = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_executable_layout_check_deref(args->r1, &executable_layout));
+ iree_vm_size_t set = args->i2;
+
+ iree_host_size_t binding_count = args->a3_count;
+ if (IREE_UNLIKELY(binding_count >
+ IREE_HAL_MODULE_MAX_DESCRIPTOR_BINDING_COUNT)) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE, "binding count %zu > %zu",
+ binding_count,
+ IREE_HAL_MODULE_MAX_DESCRIPTOR_BINDING_COUNT);
+ }
+ iree_hal_descriptor_set_binding_t* bindings =
+ (iree_hal_descriptor_set_binding_t*)iree_alloca(
+ binding_count * sizeof(iree_hal_descriptor_set_binding_t));
+ for (iree_host_size_t i = 0; i < binding_count; ++i) {
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_check_deref(args->a3[i].r1, &bindings[i].buffer));
+ bindings[i].binding = (uint32_t)args->a3[i].i0;
+ bindings[i].offset = (iree_device_size_t)args->a3[i].i2;
+ bindings[i].length = (iree_device_size_t)args->a3[i].i3;
+ }
+
+ return iree_hal_command_buffer_push_descriptor_set(
+ command_buffer, executable_layout, set, binding_count, bindings);
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_bind_descriptor_set, //
+ iree_hal_module_state_t, //
+ rrirCiD, v) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_command_buffer_check_deref(args->r0, &command_buffer));
+ iree_hal_executable_layout_t* executable_layout = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_executable_layout_check_deref(args->r1, &executable_layout));
+ int32_t set = args->i2;
+ iree_hal_descriptor_set_t* descriptor_set = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_descriptor_set_check_deref(args->r3, &descriptor_set));
+ iree_host_size_t dynamic_offset_count = 0;
+ iree_device_size_t* dynamic_offsets = NULL;
+ IREE_VM_ABI_VLA_STACK_CAST(args, a4_count, a4, iree_device_size_t, 64,
+ &dynamic_offset_count, &dynamic_offsets);
+ return iree_hal_command_buffer_bind_descriptor_set(
+ command_buffer, executable_layout, set, descriptor_set,
+ dynamic_offset_count, dynamic_offsets);
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_dispatch, //
+ iree_hal_module_state_t, //
+ rriiii, v) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_command_buffer_check_deref(args->r0, &command_buffer));
+ iree_hal_executable_t* executable = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_executable_check_deref(args->r1, &executable));
+ uint32_t entry_point = (uint32_t)args->i2;
+ uint32_t workgroup_x = (uint32_t)args->i3;
+ uint32_t workgroup_y = (uint32_t)args->i4;
+ uint32_t workgroup_z = (uint32_t)args->i5;
+ return iree_hal_command_buffer_dispatch(command_buffer, executable,
+ entry_point, workgroup_x, workgroup_y,
+ workgroup_z);
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_dispatch_indirect, //
+ iree_hal_module_state_t, //
+ rriri, v) {
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_command_buffer_check_deref(args->r0, &command_buffer));
+ iree_hal_executable_t* executable = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_executable_check_deref(args->r1, &executable));
+ uint32_t entry_point = (uint32_t)args->i2;
+ iree_hal_buffer_t* workgroups_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_check_deref(args->r3, &workgroups_buffer));
+ iree_vm_size_t workgroups_offset = (iree_vm_size_t)args->i4;
+ return iree_hal_command_buffer_dispatch_indirect(
+ command_buffer, executable, entry_point, workgroups_buffer,
+ workgroups_offset);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_descriptor_set_t
+//===----------------------------------------------------------------------===//
+
+IREE_VM_ABI_EXPORT(iree_hal_module_descriptor_set_create, //
+ iree_hal_module_state_t, //
+ rrCiriiD, r) {
+ iree_hal_device_t* device = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_device_check_deref(args->r0, &device));
+ iree_hal_descriptor_set_layout_t* set_layout = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_descriptor_set_layout_check_deref(args->r1, &set_layout));
+
+ iree_host_size_t binding_count = args->a2_count;
+ if (IREE_UNLIKELY(binding_count >
+ IREE_HAL_MODULE_MAX_DESCRIPTOR_BINDING_COUNT)) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE, "binding count %zu > %zu",
+ binding_count,
+ IREE_HAL_MODULE_MAX_DESCRIPTOR_BINDING_COUNT);
+ }
+ iree_hal_descriptor_set_binding_t* bindings =
+ (iree_hal_descriptor_set_binding_t*)iree_alloca(
+ binding_count * sizeof(iree_hal_descriptor_set_binding_t));
+ for (iree_host_size_t i = 0; i < binding_count; ++i) {
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_check_deref(args->a2[i].r1, &bindings[i].buffer));
+ bindings[i].binding = (uint32_t)args->a2[i].i0;
+ bindings[i].offset = (iree_device_size_t)args->a2[i].i2;
+ bindings[i].length = (iree_device_size_t)args->a2[i].i3;
+ }
+
+ iree_hal_descriptor_set_t* descriptor_set = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_descriptor_set_create(
+ device, set_layout, binding_count, bindings, &descriptor_set));
+ rets->r0 = iree_hal_descriptor_set_move_ref(descriptor_set);
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_descriptor_set_layout
+//===----------------------------------------------------------------------===//
+
+IREE_VM_ABI_EXPORT(iree_hal_module_descriptor_set_layout_create, //
+ iree_hal_module_state_t, //
+ riCiiD, r) {
+ iree_hal_device_t* device = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_device_check_deref(args->r0, &device));
+ iree_hal_descriptor_set_layout_usage_type_t usage_type =
+ (iree_hal_descriptor_set_layout_usage_type_t)args->i1;
+
+ iree_host_size_t binding_count = args->a2_count;
+ if (IREE_UNLIKELY(binding_count >
+ IREE_HAL_MODULE_MAX_DESCRIPTOR_BINDING_COUNT)) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE, "binding count %zu > %zu",
+ binding_count,
+ IREE_HAL_MODULE_MAX_DESCRIPTOR_BINDING_COUNT);
+ }
+ iree_hal_descriptor_set_layout_binding_t* bindings =
+ (iree_hal_descriptor_set_layout_binding_t*)iree_alloca(
+ binding_count * sizeof(iree_hal_descriptor_set_layout_binding_t));
+ for (iree_host_size_t i = 0; i < binding_count; ++i) {
+ bindings[i].binding = (uint32_t)args->a2[i].i0;
+ bindings[i].type = (iree_hal_descriptor_type_t)args->a2[i].i1;
+ }
+
+ iree_hal_descriptor_set_layout_t* descriptor_set_layout = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_descriptor_set_layout_create(
+ device, usage_type, binding_count, bindings, &descriptor_set_layout));
+ rets->r0 = iree_hal_descriptor_set_layout_move_ref(descriptor_set_layout);
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_device_t
+//===----------------------------------------------------------------------===//
+
+IREE_VM_ABI_EXPORT(iree_hal_module_device_allocator, //
+ iree_hal_module_state_t, //
+ r, r) {
+ iree_hal_device_t* device = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_device_check_deref(args->r0, &device));
+ rets->r0 = iree_hal_allocator_retain_ref(iree_hal_device_allocator(device));
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_device_query_i32, //
+ iree_hal_module_state_t, //
+ rrr, ii) {
+ iree_hal_device_t* device = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_device_check_deref(args->r0, &device));
+ iree_vm_buffer_t* category = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_check_deref(args->r1, &category));
+ iree_string_view_t category_str = iree_vm_buffer_as_string(category);
+ iree_vm_buffer_t* key = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_check_deref(args->r2, &key));
+ iree_string_view_t key_str = iree_vm_buffer_as_string(key);
+
+ int32_t value = 0;
+ iree_status_t query_status =
+ iree_hal_device_query_i32(device, category_str, key_str, &value);
+ rets->i0 = iree_status_consume_code(query_status) == IREE_STATUS_OK ? 1 : 0;
+ rets->i1 = (int32_t)value;
+ return iree_ok_status();
+}
+
+//===--------------------------------------------------------------------===//
+// iree_hal_executable_t
+//===--------------------------------------------------------------------===//
+
+IREE_VM_ABI_EXPORT(iree_hal_module_executable_create, //
+ iree_hal_module_state_t, //
+ rrrrCrD, r) {
+ iree_hal_device_t* device = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_device_check_deref(args->r0, &device));
+ iree_vm_buffer_t* executable_format = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_vm_buffer_check_deref(args->r1, &executable_format));
+ iree_string_view_t executable_format_str =
+ iree_vm_buffer_as_string(executable_format);
+ iree_vm_buffer_t* executable_data = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_check_deref(args->r2, &executable_data));
+ iree_host_size_t constant_count = 0;
+ const uint32_t* constants = NULL;
+ if (iree_vm_buffer_isa(args->r3)) {
+ iree_vm_buffer_t* constant_buffer = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_vm_buffer_check_deref(args->r3, &constant_buffer));
+ if (constant_buffer->data.data_length % 4 != 0) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "constant buffer data must contain 4-byte "
+ "elements but data length is %" PRIhsz,
+ constant_buffer->data.data_length);
+ }
+ constant_count = constant_buffer->data.data_length / sizeof(uint32_t);
+ constants = (const uint32_t*)constant_buffer->data.data;
+ }
+ iree_host_size_t executable_layout_count = args->a4_count;
+ iree_hal_executable_layout_t** executable_layouts = NULL;
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc(
+ state->host_allocator,
+ executable_layout_count * sizeof(executable_layouts[0]),
+ (void**)&executable_layouts));
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < executable_layout_count; ++i) {
+ status = iree_hal_executable_layout_check_deref(args->a4[i].r0,
+ &executable_layouts[i]);
+ if (!iree_status_is_ok(status)) break;
+ }
+
+ iree_hal_executable_t* executable = NULL;
+ if (iree_status_is_ok(status)) {
+ iree_hal_executable_params_t executable_params;
+ iree_hal_executable_params_initialize(&executable_params);
+ executable_params.caching_mode |=
+ executable_data->access == IREE_VM_BUFFER_ACCESS_ORIGIN_MODULE
+ ? IREE_HAL_EXECUTABLE_CACHING_MODE_ALIAS_PROVIDED_DATA
+ : 0;
+ executable_params.executable_format = executable_format_str;
+ executable_params.executable_data = iree_make_const_byte_span(
+ executable_data->data.data, executable_data->data.data_length);
+ executable_params.executable_layout_count = executable_layout_count;
+ executable_params.executable_layouts = executable_layouts;
+ executable_params.constant_count = constant_count;
+ executable_params.constants = constants;
+ status = iree_hal_executable_cache_prepare_executable(
+ state->executable_cache, &executable_params, &executable);
+ }
+
+ iree_allocator_free(state->host_allocator, executable_layouts);
+ rets->r0 = iree_hal_executable_move_ref(executable);
+ return status;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_layout_t
+//===----------------------------------------------------------------------===//
+
+IREE_VM_ABI_EXPORT(iree_hal_module_executable_layout_create, //
+ iree_hal_module_state_t, //
+ riCrD, r) {
+ iree_hal_device_t* device = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_device_check_deref(args->r0, &device));
+ int32_t push_constants = (int32_t)args->i1;
+ iree_host_size_t set_layout_count = 0;
+ iree_hal_descriptor_set_layout_t** set_layouts = NULL;
+ IREE_VM_ABI_VLA_STACK_DEREF(args, a2_count, a2,
+ iree_hal_descriptor_set_layout, 32,
+ &set_layout_count, &set_layouts);
+
+ iree_hal_executable_layout_t* executable_layout = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_executable_layout_create(
+ device, push_constants, set_layout_count, set_layouts,
+ &executable_layout));
+ rets->r0 = iree_hal_executable_layout_move_ref(executable_layout);
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_semaphore_t
+//===----------------------------------------------------------------------===//
+
+IREE_VM_ABI_EXPORT(iree_hal_module_semaphore_create, //
+ iree_hal_module_state_t, //
+ ri, r) {
+ iree_hal_device_t* device = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_device_check_deref(args->r0, &device));
+ uint32_t initial_value = (uint32_t)args->i1;
+
+ iree_hal_semaphore_t* semaphore = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_semaphore_create(device, initial_value, &semaphore));
+ rets->r0 = iree_hal_semaphore_move_ref(semaphore);
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_semaphore_query, //
+ iree_hal_module_state_t, //
+ r, ii) {
+ iree_hal_semaphore_t* semaphore = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_semaphore_check_deref(args->r0, &semaphore));
+
+ uint64_t value = 0;
+ iree_status_t query_status = iree_hal_semaphore_query(semaphore, &value);
+ rets->i0 = iree_status_consume_code(query_status);
+ rets->i1 = (uint32_t)value;
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_semaphore_signal, //
+ iree_hal_module_state_t, //
+ ri, v) {
+ iree_hal_semaphore_t* semaphore = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_semaphore_check_deref(args->r0, &semaphore));
+ uint32_t new_value = (uint32_t)args->i1;
+
+ return iree_hal_semaphore_signal(semaphore, new_value);
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_semaphore_fail, //
+ iree_hal_module_state_t, //
+ ri, v) {
+ iree_hal_semaphore_t* semaphore = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_semaphore_check_deref(args->r0, &semaphore));
+ iree_status_code_t status_code =
+ (iree_status_code_t)(args->i1 & IREE_STATUS_CODE_MASK);
+
+ iree_hal_semaphore_fail(semaphore, iree_make_status(status_code));
+ return iree_ok_status();
+}
+
+IREE_VM_ABI_EXPORT(iree_hal_module_semaphore_await, //
+ iree_hal_module_state_t, //
+ ri, i) {
+ iree_hal_semaphore_t* semaphore = NULL;
+ IREE_RETURN_IF_ERROR(iree_hal_semaphore_check_deref(args->r0, &semaphore));
+ uint64_t new_value = (uint32_t)args->i1;
+
+ // TODO(benvanik): coroutine magic.
+ iree_status_t status =
+ iree_hal_semaphore_wait(semaphore, new_value, iree_infinite_timeout());
+ if (iree_status_is_ok(status)) {
+ rets->i0 = 0;
+ } else if (iree_status_is_deadline_exceeded(status)) {
+ // Propagate deadline exceeded back to the VM.
+ rets->i0 = (int32_t)iree_status_consume_code(status);
+ }
+ return status;
+}
+
+//===----------------------------------------------------------------------===//
+// VM module interface implementation
+//===----------------------------------------------------------------------===//
+
+// NOTE: this must match the ordering of the iree_hal_module_exports_ table.
+static const iree_vm_native_function_ptr_t iree_hal_module_funcs_[] = {
+#define EXPORT_FN(name, target_fn, arg_types, ret_types) \
+ { \
+ .shim = (iree_vm_native_function_shim_t) \
+ iree_vm_shim_##arg_types##_##ret_types, \
+ .target = (iree_vm_native_function_target_t)(target_fn), \
+ },
+#include "iree/modules/hal/exports.inl" // IWYU pragma: keep
+#undef EXPORT_FN
+};
+
+// NOTE: 0 length, but can't express that in C.
+static const iree_vm_native_import_descriptor_t iree_hal_module_imports_[1];
+
+static const iree_vm_native_export_descriptor_t iree_hal_module_exports_[] = {
+#define EXPORT_FN(name, target_fn, arg_types, ret_types) \
+ { \
+ .local_name = iree_string_view_literal(name), \
+ .calling_convention = \
+ iree_string_view_literal("0" #arg_types "_" #ret_types), \
+ .reflection_attr_count = 0, \
+ .reflection_attrs = NULL, \
+ },
+#include "iree/modules/hal/exports.inl" // IWYU pragma: keep
+#undef EXPORT_FN
+};
+static_assert(IREE_ARRAYSIZE(iree_hal_module_funcs_) ==
+ IREE_ARRAYSIZE(iree_hal_module_exports_),
+ "function pointer table must be 1:1 with exports");
+
+static const iree_vm_native_module_descriptor_t iree_hal_module_descriptor_ = {
+ .module_name = iree_string_view_literal("hal"),
+ .import_count = 0, // workaround for 0-length C struct
+ .imports = iree_hal_module_imports_,
+ .export_count = IREE_ARRAYSIZE(iree_hal_module_exports_),
+ .exports = iree_hal_module_exports_,
+ .function_count = IREE_ARRAYSIZE(iree_hal_module_funcs_),
+ .functions = iree_hal_module_funcs_,
+ .reflection_attr_count = 0,
+ .reflection_attrs = NULL,
+};
+
+IREE_API_EXPORT iree_status_t
+iree_hal_module_create(iree_hal_device_t* device, iree_allocator_t allocator,
+ iree_vm_module_t** out_module) {
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(out_module);
+ *out_module = NULL;
+
+ // Setup the interface with the functions we implement ourselves. Any function
+ // we omit will be handled by the base native module.
+ static const iree_vm_module_t interface = {
+ .destroy = iree_hal_module_destroy,
+ .alloc_state = iree_hal_module_alloc_state,
+ .free_state = iree_hal_module_free_state,
+ .notify = iree_hal_module_notify,
+ };
+
+ // Allocate shared module state.
+ iree_host_size_t total_size =
+ iree_vm_native_module_size() + sizeof(iree_hal_module_t);
+ iree_vm_module_t* base_module = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, total_size, (void**)&base_module));
+ memset(base_module, 0, total_size);
+ iree_status_t status = iree_vm_native_module_initialize(
+ &interface, &iree_hal_module_descriptor_, allocator, base_module);
+ if (!iree_status_is_ok(status)) {
+ iree_allocator_free(allocator, base_module);
+ return status;
+ }
+
+ iree_hal_module_t* module = IREE_HAL_MODULE_CAST(base_module);
+ module->host_allocator = allocator;
+ module->shared_device = device;
+ iree_hal_device_retain(module->shared_device);
+
+ *out_module = base_module;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_hal_device_t* iree_hal_module_state_device(
+ iree_vm_module_state_t* module_state) {
+ iree_hal_module_state_t* state = (iree_hal_module_state_t*)module_state;
+ return state->shared_device;
+}
+
+//===--------------------------------------------------------------------===//
+// Utilities
+//===--------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_hal_buffer_view_t* iree_vm_list_get_buffer_view_assign(
+ const iree_vm_list_t* list, iree_host_size_t i) {
+ return (iree_hal_buffer_view_t*)iree_vm_list_get_ref_deref(
+ list, i, iree_hal_buffer_view_get_descriptor());
+}
+
+IREE_API_EXPORT iree_hal_buffer_view_t* iree_vm_list_get_buffer_view_retain(
+ const iree_vm_list_t* list, iree_host_size_t i) {
+ iree_hal_buffer_view_t* value = iree_vm_list_get_buffer_view_assign(list, i);
+ iree_hal_buffer_view_retain(value);
+ return value;
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_set_buffer_view_retain(
+ iree_vm_list_t* list, iree_host_size_t i, iree_hal_buffer_view_t* value) {
+ iree_vm_ref_t value_ref;
+ IREE_RETURN_IF_ERROR(iree_vm_ref_wrap_assign(
+ value, iree_hal_buffer_view_type_id(), &value_ref));
+ return iree_vm_list_set_ref_retain(list, i, &value_ref);
+}
diff --git a/runtime/src/iree/modules/hal/module.h b/runtime/src/iree/modules/hal/module.h
new file mode 100644
index 0000000..4a66ccd
--- /dev/null
+++ b/runtime/src/iree/modules/hal/module.h
@@ -0,0 +1,69 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_MODULES_HAL_MODULE_H_
+#define IREE_MODULES_HAL_MODULE_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/vm/api.h"
+
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_hal_allocator, iree_hal_allocator_t);
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_hal_buffer, iree_hal_buffer_t);
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_hal_buffer_view, iree_hal_buffer_view_t);
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_hal_command_buffer,
+ iree_hal_command_buffer_t);
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_hal_descriptor_set,
+ iree_hal_descriptor_set_t);
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_hal_descriptor_set_layout,
+ iree_hal_descriptor_set_layout_t);
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_hal_device, iree_hal_device_t);
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_hal_event, iree_hal_event_t);
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_hal_executable, iree_hal_executable_t);
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_hal_executable_cache,
+ iree_hal_executable_cache_t);
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_hal_executable_layout,
+ iree_hal_executable_layout_t);
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_hal_semaphore, iree_hal_semaphore_t);
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Registers the custom types used by the HAL module.
+// WARNING: not thread-safe; call at startup before using.
+IREE_API_EXPORT iree_status_t iree_hal_module_register_types(void);
+
+// Creates the HAL module initialized to use a specific |device|.
+// Each context using this module will share the device and have compatible
+// allocations.
+IREE_API_EXPORT iree_status_t
+iree_hal_module_create(iree_hal_device_t* device, iree_allocator_t allocator,
+ iree_vm_module_t** out_module);
+
+// Returns the device currently in use by the HAL module.
+// Returns NULL if no device has been initialized yet.
+IREE_API_EXPORT iree_hal_device_t* iree_hal_module_state_device(
+ iree_vm_module_state_t* module_state);
+
+// TODO(benvanik): generate these list helpers:
+
+IREE_API_EXPORT iree_hal_buffer_view_t* iree_vm_list_get_buffer_view_assign(
+ const iree_vm_list_t* list, iree_host_size_t i);
+
+IREE_API_EXPORT iree_hal_buffer_view_t* iree_vm_list_get_buffer_view_retain(
+ const iree_vm_list_t* list, iree_host_size_t i);
+
+IREE_API_EXPORT iree_status_t iree_vm_list_set_buffer_view_retain(
+ iree_vm_list_t* list, iree_host_size_t i, iree_hal_buffer_view_t* value);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_MODULES_HAL_MODULE_H_
diff --git a/runtime/src/iree/modules/vmvx/BUILD b/runtime/src/iree/modules/vmvx/BUILD
new file mode 100644
index 0000000..1a49b24
--- /dev/null
+++ b/runtime/src/iree/modules/vmvx/BUILD
@@ -0,0 +1,31 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_runtime_cc_library(
+ name = "vmvx",
+ srcs = [
+ "module.c",
+ ],
+ hdrs = [
+ "module.h",
+ ],
+ textual_hdrs = [
+ "exports.inl",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/vm",
+ ],
+)
diff --git a/runtime/src/iree/modules/vmvx/CMakeLists.txt b/runtime/src/iree/modules/vmvx/CMakeLists.txt
new file mode 100644
index 0000000..5b6bcf9
--- /dev/null
+++ b/runtime/src/iree/modules/vmvx/CMakeLists.txt
@@ -0,0 +1,29 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/modules/vmvx/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ vmvx
+ HDRS
+ "module.h"
+ TEXTUAL_HDRS
+ "exports.inl"
+ SRCS
+ "module.c"
+ DEPS
+ iree::base
+ iree::base::tracing
+ iree::vm
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/modules/vmvx/exports.inl b/runtime/src/iree/modules/vmvx/exports.inl
new file mode 100644
index 0000000..70b3ef0
--- /dev/null
+++ b/runtime/src/iree/modules/vmvx/exports.inl
@@ -0,0 +1,28 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+//===----------------------------------------------------------------------===//
+//
+// ██ ██ █████ ██████ ███ ██ ██ ███ ██ ██████
+// ██ ██ ██ ██ ██ ██ ████ ██ ██ ████ ██ ██
+// ██ █ ██ ███████ ██████ ██ ██ ██ ██ ██ ██ ██ ██ ███
+// ██ ███ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██
+// ███ ███ ██ ██ ██ ██ ██ ████ ██ ██ ████ ██████
+//
+//===----------------------------------------------------------------------===//
+//
+// This file matches the vmvx.imports.mlir in the compiler. It'd be nice to
+// autogenerate this as the order of these functions must be sorted ascending by
+// name in a way compatible with iree_string_view_compare.
+//
+// Users are meant to `#define EXPORT_FN` to be able to access the information.
+// #define EXPORT_FN(name, arg_type, ret_type, target_fn)
+
+// clang-format off
+
+EXPORT_FN("_placeholder", iree_vmvx_module_placeholder, v, v)
+
+// clang-format on
diff --git a/runtime/src/iree/modules/vmvx/module.c b/runtime/src/iree/modules/vmvx/module.c
new file mode 100644
index 0000000..2133f67
--- /dev/null
+++ b/runtime/src/iree/modules/vmvx/module.c
@@ -0,0 +1,183 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/modules/vmvx/module.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/vm/api.h"
+
+//===----------------------------------------------------------------------===//
+// Type registration
+//===----------------------------------------------------------------------===//
+
+// NOTE: we aren't exporting any types yet; this is just the empty boilerplate.
+
+// static iree_vm_ref_type_descriptor_t iree_vmvx_interface_descriptor = {0};
+
+#define IREE_VM_REGISTER_VMVX_C_TYPE(type, name, destroy_fn, descriptor) \
+ descriptor.type_name = iree_make_cstring_view(name); \
+ descriptor.offsetof_counter = offsetof(type, ref_object); \
+ descriptor.destroy = (iree_vm_ref_destroy_t)destroy_fn; \
+ IREE_RETURN_IF_ERROR(iree_vm_ref_register_type(&descriptor));
+
+IREE_API_EXPORT iree_status_t iree_vmvx_module_register_types() {
+ static bool has_registered = false;
+ if (has_registered) return iree_ok_status();
+
+ // IREE_VM_REGISTER_VMVX_C_TYPE(iree_vmvx_interface_t, "vmvx.interface",
+ // iree_vmvx_interface_destroy,
+ // iree_vmvx_interface_descriptor);
+
+ has_registered = true;
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// Type wrappers
+//===----------------------------------------------------------------------===//
+
+// IREE_VM_DEFINE_TYPE_ADAPTERS(iree_vmvx_interface, iree_vmvx_interface_t);
+
+//===----------------------------------------------------------------------===//
+// Module type definitions
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_vmvx_module_t {
+ iree_allocator_t host_allocator;
+ // TODO(benvanik): types when we are not registering them globally.
+} iree_vmvx_module_t;
+
+#define IREE_VMVX_MODULE_CAST(module) \
+ (iree_vmvx_module_t*)((uint8_t*)(module) + iree_vm_native_module_size());
+
+typedef struct iree_vmvx_module_state_t {
+ iree_allocator_t host_allocator;
+
+ // If we have any external libraries we want to interact with that are
+ // stateful we could store their state here. Note that VMVX invocations may
+ // happen from any thread and concurrently and if the state is not thread-safe
+ // we'll have to perform the synchronization ourselves here.
+} iree_vmvx_module_state_t;
+
+static void IREE_API_PTR iree_vmvx_module_destroy(void* base_module) {
+ // No state to clean up (yet).
+}
+
+static iree_status_t IREE_API_PTR
+iree_vmvx_module_alloc_state(void* self, iree_allocator_t host_allocator,
+ iree_vm_module_state_t** out_module_state) {
+ iree_vmvx_module_state_t* state = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(host_allocator, sizeof(*state), (void**)&state));
+ memset(state, 0, sizeof(*state));
+ state->host_allocator = host_allocator;
+ *out_module_state = (iree_vm_module_state_t*)state;
+ return iree_ok_status();
+}
+
+static void IREE_API_PTR
+iree_vmvx_module_free_state(void* self, iree_vm_module_state_t* module_state) {
+ iree_vmvx_module_state_t* state = (iree_vmvx_module_state_t*)module_state;
+ iree_allocator_free(state->host_allocator, state);
+}
+
+//===----------------------------------------------------------------------===//
+// TODO
+//===----------------------------------------------------------------------===//
+
+// Placeholder to make the function pointer arrays happy (they can't be empty).
+IREE_VM_ABI_EXPORT(iree_vmvx_module_placeholder, //
+ iree_vmvx_module_state_t, //
+ v, v) {
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// VM module interface implementation
+//===----------------------------------------------------------------------===//
+
+// NOTE: this must match the ordering of the iree_vmvx_module_exports_ table.
+static const iree_vm_native_function_ptr_t iree_vmvx_module_funcs_[] = {
+#define EXPORT_FN(name, target_fn, arg_types, ret_types) \
+ { \
+ .shim = (iree_vm_native_function_shim_t) \
+ iree_vm_shim_##arg_types##_##ret_types, \
+ .target = (iree_vm_native_function_target_t)(target_fn), \
+ },
+#include "iree/modules/vmvx/exports.inl" // IWYU pragma: keep
+#undef EXPORT_FN
+};
+
+// NOTE: 0 length, but can't express that in C.
+static const iree_vm_native_import_descriptor_t iree_vmvx_module_imports_[1];
+
+static const iree_vm_native_export_descriptor_t iree_vmvx_module_exports_[] = {
+#define EXPORT_FN(name, target_fn, arg_types, ret_types) \
+ { \
+ .local_name = iree_string_view_literal(name), \
+ .calling_convention = \
+ iree_string_view_literal("0" #arg_types "_" #ret_types), \
+ .reflection_attr_count = 0, \
+ .reflection_attrs = NULL, \
+ },
+#include "iree/modules/vmvx/exports.inl" // IWYU pragma: keep
+#undef EXPORT_FN
+};
+static_assert(IREE_ARRAYSIZE(iree_vmvx_module_funcs_) ==
+ IREE_ARRAYSIZE(iree_vmvx_module_exports_),
+ "function pointer table must be 1:1 with exports");
+
+static const iree_vm_native_module_descriptor_t iree_vmvx_module_descriptor_ = {
+ .module_name = iree_string_view_literal("vmvx"),
+ .import_count = 0, // workaround for 0-length C struct
+ .imports = iree_vmvx_module_imports_,
+ .export_count = IREE_ARRAYSIZE(iree_vmvx_module_exports_),
+ .exports = iree_vmvx_module_exports_,
+ .function_count = IREE_ARRAYSIZE(iree_vmvx_module_funcs_),
+ .functions = iree_vmvx_module_funcs_,
+ .reflection_attr_count = 0,
+ .reflection_attrs = NULL,
+};
+
+IREE_API_EXPORT iree_status_t iree_vmvx_module_create(
+ iree_allocator_t allocator, iree_vm_module_t** out_module) {
+ IREE_ASSERT_ARGUMENT(out_module);
+ *out_module = NULL;
+
+ // Setup the interface with the functions we implement ourselves. Any function
+ // we omit will be handled by the base native module.
+ static const iree_vm_module_t interface = {
+ .destroy = iree_vmvx_module_destroy,
+ .alloc_state = iree_vmvx_module_alloc_state,
+ .free_state = iree_vmvx_module_free_state,
+ };
+
+ // Allocate shared module state.
+ iree_host_size_t total_size =
+ iree_vm_native_module_size() + sizeof(iree_vmvx_module_t);
+ iree_vm_module_t* base_module = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, total_size, (void**)&base_module));
+ memset(base_module, 0, total_size);
+ iree_status_t status = iree_vm_native_module_initialize(
+ &interface, &iree_vmvx_module_descriptor_, allocator, base_module);
+ if (!iree_status_is_ok(status)) {
+ iree_allocator_free(allocator, base_module);
+ return status;
+ }
+
+ iree_vmvx_module_t* module = IREE_VMVX_MODULE_CAST(base_module);
+ module->host_allocator = allocator;
+
+ *out_module = base_module;
+ return iree_ok_status();
+}
diff --git a/runtime/src/iree/modules/vmvx/module.h b/runtime/src/iree/modules/vmvx/module.h
new file mode 100644
index 0000000..61ec691
--- /dev/null
+++ b/runtime/src/iree/modules/vmvx/module.h
@@ -0,0 +1,31 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_MODULES_VMVX_MODULE_H_
+#define IREE_MODULES_VMVX_MODULE_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/vm/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Registers the custom types used by the HAL module.
+// WARNING: not thread-safe; call at startup before using.
+IREE_API_EXPORT iree_status_t iree_vmvx_module_register_types();
+
+// Creates the VMVX module with a default configuration.
+IREE_API_EXPORT iree_status_t iree_vmvx_module_create(
+ iree_allocator_t allocator, iree_vm_module_t** out_module);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_MODULES_VMVX_MODULE_H_
diff --git a/runtime/src/iree/runtime/BUILD.bazel b/runtime/src/iree/runtime/BUILD.bazel
new file mode 100644
index 0000000..2c435fb
--- /dev/null
+++ b/runtime/src/iree/runtime/BUILD.bazel
@@ -0,0 +1,58 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+#===------------------------------------------------------------------------===#
+# Public API
+#===------------------------------------------------------------------------===#
+
+iree_runtime_cc_library(
+ name = "runtime",
+ hdrs = [
+ "api.h",
+ ],
+ deps = [
+ ":impl",
+ "//runtime/src/iree/base",
+ ],
+)
+
+#===------------------------------------------------------------------------===#
+# Implementation
+#===------------------------------------------------------------------------===#
+
+iree_runtime_cc_library(
+ name = "impl",
+ srcs = [
+ "call.c",
+ "instance.c",
+ "session.c",
+ ],
+ hdrs = [
+ "call.h",
+ "instance.h",
+ "session.h",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal",
+ "//runtime/src/iree/base/internal:file_io",
+ "//runtime/src/iree/hal",
+ "//runtime/src/iree/hal/drivers",
+ "//runtime/src/iree/modules/hal",
+ "//runtime/src/iree/vm",
+ "//runtime/src/iree/vm:bytecode_module",
+ ],
+)
diff --git a/runtime/src/iree/runtime/CMakeLists.txt b/runtime/src/iree/runtime/CMakeLists.txt
new file mode 100644
index 0000000..143fd67
--- /dev/null
+++ b/runtime/src/iree/runtime/CMakeLists.txt
@@ -0,0 +1,56 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# iree/runtime/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ runtime
+ HDRS
+ "api.h"
+ DEPS
+ ::impl
+ iree::base
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ impl
+ HDRS
+ "call.h"
+ "instance.h"
+ "session.h"
+ SRCS
+ "call.c"
+ "instance.c"
+ "session.c"
+ DEPS
+ iree::base
+ iree::base::core_headers
+ iree::base::internal
+ iree::base::internal::file_io
+ iree::base::tracing
+ iree::hal
+ iree::hal::drivers
+ iree::modules::hal
+ iree::vm
+ iree::vm::bytecode_module
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
+
+iree_cc_unified_library(
+ NAME
+ unified
+ ROOT
+ ::impl
+)
diff --git a/runtime/src/iree/runtime/README.md b/runtime/src/iree/runtime/README.md
new file mode 100644
index 0000000..45dd876
--- /dev/null
+++ b/runtime/src/iree/runtime/README.md
@@ -0,0 +1,11 @@
+# IREE Higher-Level Runtime API
+
+This directory implements a higher-level runtime API on top of the low level
+APIs split across `iree/base/api.h`, `iree/hal/api.h`, and `iree/vm/api.h`.
+
+Using this higher level API may pull in additional dependencies and perform
+additional allocations compared to what you can get by directly going to the
+lower levels. For the most part, the higher level and lower levels APIs may be
+mixed.
+
+See [the demo directory](./demo/) for sample usage.
diff --git a/runtime/src/iree/runtime/api.h b/runtime/src/iree/runtime/api.h
new file mode 100644
index 0000000..850ac52
--- /dev/null
+++ b/runtime/src/iree/runtime/api.h
@@ -0,0 +1,20 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_RUNTIME_API_H_
+#define IREE_RUNTIME_API_H_
+
+// Lower-level APIs:
+#include "iree/base/api.h" // IWYU pragma: export
+#include "iree/hal/api.h" // IWYU pragma: export
+#include "iree/vm/api.h" // IWYU pragma: export
+
+// Runtime API:
+#include "iree/runtime/call.h" // IWYU pragma: export
+#include "iree/runtime/instance.h" // IWYU pragma: export
+#include "iree/runtime/session.h" // IWYU pragma: export
+
+#endif // IREE_RUNTIME_API_H_
diff --git a/runtime/src/iree/runtime/call.c b/runtime/src/iree/runtime/call.c
new file mode 100644
index 0000000..764668a
--- /dev/null
+++ b/runtime/src/iree/runtime/call.c
@@ -0,0 +1,124 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/runtime/call.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/modules/hal/module.h"
+#include "iree/runtime/session.h"
+
+//===----------------------------------------------------------------------===//
+// iree_runtime_call_t
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_runtime_call_initialize(
+ iree_runtime_session_t* session, iree_vm_function_t function,
+ iree_runtime_call_t* out_call) {
+ IREE_ASSERT_ARGUMENT(session);
+ IREE_ASSERT_ARGUMENT(out_call);
+ memset(out_call, 0, sizeof(*out_call));
+
+ // Query the signature of the function to determine the sizes of the lists.
+ iree_vm_function_signature_t signature =
+ iree_vm_function_signature(&function);
+ iree_string_view_t arguments;
+ iree_string_view_t results;
+ IREE_RETURN_IF_ERROR(iree_vm_function_call_get_cconv_fragments(
+ &signature, &arguments, &results));
+
+ out_call->session = session;
+ iree_runtime_session_retain(session);
+ out_call->function = function;
+
+ // Allocate the input and output lists with the required capacity.
+ // A user wanting to avoid dynamic allocations could instead create on-stack
+ // storage for these and use iree_vm_list_initialize instead. This high-level
+ // API keeps things simple, though, and for the frequency of calls through
+ // this interface a few small pooled malloc calls should be fine.
+ iree_allocator_t host_allocator =
+ iree_runtime_session_host_allocator(session);
+ iree_status_t status = iree_vm_list_create(
+ /*element_type=*/NULL, arguments.size, host_allocator, &out_call->inputs);
+ if (iree_status_is_ok(status)) {
+ status = iree_vm_list_create(
+ /*element_type=*/NULL, results.size, host_allocator,
+ &out_call->outputs);
+ }
+
+ if (!iree_status_is_ok(status)) {
+ iree_runtime_call_deinitialize(out_call);
+ }
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_runtime_call_initialize_by_name(
+ iree_runtime_session_t* session, iree_string_view_t full_name,
+ iree_runtime_call_t* out_call) {
+ iree_vm_function_t function;
+ IREE_RETURN_IF_ERROR(
+ iree_runtime_session_lookup_function(session, full_name, &function));
+ return iree_runtime_call_initialize(session, function, out_call);
+}
+
+IREE_API_EXPORT void iree_runtime_call_deinitialize(iree_runtime_call_t* call) {
+ IREE_ASSERT_ARGUMENT(call);
+ iree_vm_list_release(call->inputs);
+ iree_vm_list_release(call->outputs);
+ iree_runtime_session_release(call->session);
+}
+
+IREE_API_EXPORT void iree_runtime_call_reset(iree_runtime_call_t* call) {
+ IREE_ASSERT_ARGUMENT(call);
+ iree_status_ignore(iree_vm_list_resize(call->inputs, 0));
+ iree_status_ignore(iree_vm_list_resize(call->outputs, 0));
+}
+
+IREE_API_EXPORT iree_vm_list_t* iree_runtime_call_inputs(
+ const iree_runtime_call_t* call) {
+ IREE_ASSERT_ARGUMENT(call);
+ return call->inputs;
+}
+
+IREE_API_EXPORT iree_vm_list_t* iree_runtime_call_outputs(
+ const iree_runtime_call_t* call) {
+ IREE_ASSERT_ARGUMENT(call);
+ return call->outputs;
+}
+
+IREE_API_EXPORT iree_status_t iree_runtime_call_invoke(
+ iree_runtime_call_t* call, iree_runtime_call_flags_t flags) {
+ return iree_runtime_session_call(call->session, &call->function, call->inputs,
+ call->outputs);
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining call I/O
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_runtime_call_inputs_push_back_buffer_view(
+ iree_runtime_call_t* call, iree_hal_buffer_view_t* buffer_view) {
+ IREE_ASSERT_ARGUMENT(call);
+ IREE_ASSERT_ARGUMENT(buffer_view);
+ iree_vm_ref_t value = {0};
+ IREE_RETURN_IF_ERROR(iree_vm_ref_wrap_assign(
+ buffer_view, iree_hal_buffer_view_type_id(), &value));
+ return iree_vm_list_push_ref_retain(call->inputs, &value);
+}
+
+// Pops a buffer view from the front of the call outputs list.
+// Ownership of the buffer view transfers to the caller.
+IREE_API_EXPORT iree_status_t iree_runtime_call_outputs_pop_front_buffer_view(
+ iree_runtime_call_t* call, iree_hal_buffer_view_t** out_buffer_view) {
+ IREE_ASSERT_ARGUMENT(call);
+ IREE_ASSERT_ARGUMENT(out_buffer_view);
+ *out_buffer_view = NULL;
+ iree_vm_ref_t value = {0};
+ IREE_RETURN_IF_ERROR(iree_vm_list_pop_front_ref_move(call->outputs, &value));
+ return iree_hal_buffer_view_check_deref(value, out_buffer_view);
+}
diff --git a/runtime/src/iree/runtime/call.h b/runtime/src/iree/runtime/call.h
new file mode 100644
index 0000000..69d0540
--- /dev/null
+++ b/runtime/src/iree/runtime/call.h
@@ -0,0 +1,118 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_RUNTIME_CALL_H_
+#define IREE_RUNTIME_CALL_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/vm/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_runtime_session_t iree_runtime_session_t;
+
+//===----------------------------------------------------------------------===//
+// iree_runtime_call_t
+//===----------------------------------------------------------------------===//
+
+// TODO(benvanik): determine if we want to control behavior like non-blocking
+// or whether to consume inputs like this or by having separate call types.
+// For example, an async_call may make things more clear when using semaphores
+// without having to pollute this interface.
+enum iree_runtime_call_flag_bits_t {
+ IREE_RUNTIME_CALL_FLAG_RESERVED = 0u,
+};
+typedef uint32_t iree_runtime_call_flags_t;
+
+// A stateful VM function call builder.
+//
+// Applications that will be calling the same function repeatedly can reuse the
+// call to avoid having to construct the inputs lists each time. Outputs of
+// prior calls will be retained unless iree_runtime_call_reset is used and will
+// be provided to the VM on subsequent calls to reuse (if able): when reusing a
+// call like this callers are required to either reset the call, copy their
+// data out, or reset the particular output they are consuming.
+//
+// Thread-compatible; these are designed to be stack-local or embedded in a user
+// data structure that can provide synchronization when required.
+typedef struct iree_runtime_call_t {
+ iree_runtime_session_t* session;
+ iree_vm_function_t function;
+ iree_vm_list_t* inputs;
+ iree_vm_list_t* outputs;
+} iree_runtime_call_t;
+
+// Initializes call state for a call to |function| within |session|.
+IREE_API_EXPORT iree_status_t iree_runtime_call_initialize(
+ iree_runtime_session_t* session, iree_vm_function_t function,
+ iree_runtime_call_t* out_call);
+
+// Initializes call state for a call to |full_name| within |session|.
+//
+// The function name matches the original MLIR module and function symbols.
+// Example:
+// module @foo {
+// func.func @bar()
+// }
+// The full name of '@bar' is 'foo.bar'.
+// By default modules have the name 'module'.
+IREE_API_EXPORT iree_status_t iree_runtime_call_initialize_by_name(
+ iree_runtime_session_t* session, iree_string_view_t full_name,
+ iree_runtime_call_t* out_call);
+
+// Deinitializes a call by releasing its input and output lists.
+IREE_API_EXPORT void iree_runtime_call_deinitialize(iree_runtime_call_t* call);
+
+// Resets the input and output lists back to 0-length in preparation for
+// construction of another call.
+IREE_API_EXPORT void iree_runtime_call_reset(iree_runtime_call_t* call);
+
+// Returns an initially-empty variant list for passing in function inputs.
+// The list must be fully populated based on the required arguments of the
+// function.
+IREE_API_EXPORT iree_vm_list_t* iree_runtime_call_inputs(
+ const iree_runtime_call_t* call);
+
+// Returns an initially-empty variant list for passing in function outputs or
+// for reading back the results of a call.
+IREE_API_EXPORT iree_vm_list_t* iree_runtime_call_outputs(
+ const iree_runtime_call_t* call);
+
+// Synchronously invokes the call and returns the status.
+// The inputs list will remain unchanged to allow for subsequent reuse and the
+// output list will be populated with the results of the call.
+IREE_API_EXPORT iree_status_t iree_runtime_call_invoke(
+ iree_runtime_call_t* call, iree_runtime_call_flags_t flags);
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining call I/O
+//===----------------------------------------------------------------------===//
+// NOTE: these are mostly useful for one-shot tests and samples. Applications
+// that will be reusing the same inputs and outputs should prefer to track them
+// themselves. If applications are able it's strongly recommended that they
+// produce and consume the iree_hal_buffer_ts directly to avoid additional
+// copies and allocations.
+
+// Pushes |buffer_view| to the call inputs list.
+// The value will be retained by the list.
+IREE_API_EXPORT iree_status_t iree_runtime_call_inputs_push_back_buffer_view(
+ iree_runtime_call_t* call, iree_hal_buffer_view_t* buffer_view);
+
+// Pops a buffer view from the front of the call outputs list.
+// Ownership of the buffer view transfers to the caller.
+IREE_API_EXPORT iree_status_t iree_runtime_call_outputs_pop_front_buffer_view(
+ iree_runtime_call_t* call, iree_hal_buffer_view_t** out_buffer_view);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_RUNTIME_CALL_H_
diff --git a/runtime/src/iree/runtime/demo/BUILD b/runtime/src/iree/runtime/demo/BUILD
new file mode 100644
index 0000000..84a5f73
--- /dev/null
+++ b/runtime/src/iree/runtime/demo/BUILD
@@ -0,0 +1,75 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_cmake_extra_content")
+load("//build_tools/bazel:native_binary.bzl", "native_test")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+#===------------------------------------------------------------------------===#
+# Hello World!
+#===------------------------------------------------------------------------===#
+
+cc_binary(
+ name = "hello_world_file",
+ srcs = ["hello_world_explained.c"],
+ defines = [
+ # Load data from a file passed on the command line.
+ "IREE_RUNTIME_DEMO_LOAD_FILE_FROM_COMMAND_LINE_ARG",
+ ],
+ deps = [
+ "//runtime/src/iree/runtime",
+ ],
+)
+
+# TODO(benvanik): native_test that passes the file as a flag. Right now we
+# can't specify data through native_test, though, so this isn't possible to
+# automate.
+
+iree_cmake_extra_content(
+ content = """
+if (NOT ${IREE_HAL_DRIVER_VMVX} OR NOT ${IREE_TARGET_BACKEND_VMVX})
+ return()
+endif()
+""",
+ inline = True,
+)
+
+cc_binary(
+ name = "hello_world_embedded",
+ srcs = ["hello_world_explained.c"],
+ defines = [
+ # Load data directly from memory.
+ "IREE_RUNTIME_DEMO_LOAD_FILE_FROM_EMBEDDED_DATA",
+ ],
+ deps = [
+ "//runtime/src/iree/runtime",
+ "//runtime/src/iree/runtime/testdata:simple_mul_module_c",
+ ],
+)
+
+native_test(
+ name = "hello_world_embedded_test",
+ src = ":hello_world_embedded",
+)
+
+cc_binary(
+ name = "hello_world_terse",
+ srcs = ["hello_world_terse.c"],
+ deps = [
+ "//runtime/src/iree/runtime",
+ "//runtime/src/iree/runtime/testdata:simple_mul_module_c",
+ ],
+)
+
+native_test(
+ name = "hello_world_terse_test",
+ src = ":hello_world_terse",
+)
diff --git a/runtime/src/iree/runtime/demo/CMakeLists.txt b/runtime/src/iree/runtime/demo/CMakeLists.txt
new file mode 100644
index 0000000..c06f15a
--- /dev/null
+++ b/runtime/src/iree/runtime/demo/CMakeLists.txt
@@ -0,0 +1,53 @@
+# NOTE: not using bazel-to-cmake here because of the runtime unified rule.
+# We should figure out how to make bazel/cmake consistent with that.
+
+iree_cc_binary(
+ NAME
+ hello_world_file
+ SRCS
+ "hello_world_explained.c"
+ DEFINES
+ "IREE_RUNTIME_DEMO_LOAD_FILE_FROM_COMMAND_LINE_ARG"
+ DEPS
+ iree::runtime::unified
+)
+
+if (NOT ${IREE_HAL_DRIVER_VMVX} OR NOT ${IREE_TARGET_BACKEND_VMVX})
+ return()
+endif()
+
+iree_cc_binary(
+ NAME
+ hello_world_embedded
+ SRCS
+ "hello_world_explained.c"
+ DEFINES
+ "IREE_RUNTIME_DEMO_LOAD_FILE_FROM_EMBEDDED_DATA"
+ DEPS
+ iree::runtime::unified
+ iree::runtime::testdata::simple_mul_module_c
+)
+
+iree_native_test(
+ NAME
+ "hello_world_embedded_test"
+ SRC
+ ::hello_world_embedded
+)
+
+iree_cc_binary(
+ NAME
+ hello_world_terse
+ SRCS
+ "hello_world_terse.c"
+ DEPS
+ iree::runtime::unified
+ iree::runtime::testdata::simple_mul_module_c
+)
+
+iree_native_test(
+ NAME
+ "hello_world_terse_test"
+ SRC
+ ::hello_world_terse
+)
diff --git a/runtime/src/iree/runtime/demo/README.md b/runtime/src/iree/runtime/demo/README.md
new file mode 100644
index 0000000..b4b0f02
--- /dev/null
+++ b/runtime/src/iree/runtime/demo/README.md
@@ -0,0 +1,33 @@
+# IREE C Runtime API Demo
+
+This demonstrates how to use the higher-level IREE C API to load a compiled
+module and call the functions within it.
+
+The module used has a single exported function `@simple_mul` that multiplies two
+tensors and returns the result:
+
+```mlir
+func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32>
+ {
+ %0 = "mhlo.multiply"(%arg0, %arg1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+ return %0 : tensor<4xf32>
+}
+```
+
+The demo here sets up the shared `iree_runtime_instance_t`, loads the module
+into an `iree_runtime_session_t`, and makes a call via `iree_runtime_call_t`.
+
+[`hello_world_terse.c`](hello_world_terse.c) highlights the steps while
+[`hello_world_explained.c`](hello_world_explained.c) has more discussion over
+what is happening and things to watch out for.
+
+Modules can be loaded from the file system or into memory by the application.
+The `iree_runtime_demo_hello_world_file` target shows loading from a file
+passed in as a command line argument and
+`iree_runtime_demo_hello_world_embedded` shows loading from a blob of memory
+where the test file has been built directly into the binary.
+
+NOTE: for brevity the `_terse.c` example uses `IREE_CHECK_OK` to abort the
+program on errors. Real applications - especially ones hosting IREE such as
+Android apps - would want to follow the patterns in `_explained.c` for how to
+propagate errors and clean up allocated resources.
diff --git a/runtime/src/iree/runtime/demo/hello_world_explained.c b/runtime/src/iree/runtime/demo/hello_world_explained.c
new file mode 100644
index 0000000..3abaca0
--- /dev/null
+++ b/runtime/src/iree/runtime/demo/hello_world_explained.c
@@ -0,0 +1,277 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdio.h>
+
+#include "iree/runtime/api.h"
+
+static int iree_runtime_demo_main(void);
+static iree_status_t iree_runtime_demo_run_session(
+ iree_runtime_instance_t* instance);
+static iree_status_t iree_runtime_demo_perform_mul(
+ iree_runtime_session_t* session);
+
+#if defined(IREE_RUNTIME_DEMO_LOAD_FILE_FROM_COMMAND_LINE_ARG)
+
+static const char* demo_file_path = NULL;
+
+// Takes the first argument on the command line as a file path and loads it.
+int main(int argc, char** argv) {
+ if (argc < 2) {
+ fprintf(stderr, "usage: session_demo module_file.vmfb\n");
+ return 1;
+ }
+ demo_file_path = argv[1];
+ return iree_runtime_demo_main();
+}
+
+// Loads a compiled IREE module from the file system.
+static iree_status_t iree_runtime_demo_load_module(
+ iree_runtime_session_t* session) {
+ return iree_runtime_session_append_bytecode_module_from_file(session,
+ demo_file_path);
+}
+
+#elif defined(IREE_RUNTIME_DEMO_LOAD_FILE_FROM_EMBEDDED_DATA)
+
+#include "iree/runtime/testdata/simple_mul_module_c.h"
+
+int main(int argc, char** argv) { return iree_runtime_demo_main(); }
+
+// Loads the bytecode module directly from memory.
+//
+// Embedding the compiled output into your binary is not always possible (or
+// recommended) but is a fairly painless way to get things working on a variety
+// of targets without worrying about how to deploy files or pass flags.
+//
+// In cases like this the module file is in .rodata and does not need to be
+// freed; if the memory needs to be released when the module is unloaded then a
+// custom allocator can be provided to get a callback instead.
+static iree_status_t iree_runtime_demo_load_module(
+ iree_runtime_session_t* session) {
+ const iree_file_toc_t* module_file =
+ iree_runtime_testdata_simple_mul_module_create();
+ return iree_runtime_session_append_bytecode_module_from_memory(
+ session, iree_make_const_byte_span(module_file->data, module_file->size),
+ iree_allocator_null());
+}
+
+#else
+#error "must specify a way to load the module data"
+#endif // IREE_RUNTIME_DEMO_LOAD_FILE_FROM_*
+
+//===----------------------------------------------------------------------===//
+// 1. Entry point / shared iree_runtime_instance_t setup
+//===----------------------------------------------------------------------===//
+// Applications should create and share a single instance across all sessions.
+
+// This would live in your application startup/shutdown code or scoped to the
+// usage of IREE. Creating and destroying instances is expensive and should be
+// avoided.
+static int iree_runtime_demo_main(void) {
+ // Set up the shared runtime instance.
+ // An application should usually only have one of these and share it across
+ // all of the sessions it has. The instance is thread-safe, while the
+ // sessions are only thread-compatible (you need to lock if its required).
+ iree_runtime_instance_options_t instance_options;
+ iree_runtime_instance_options_initialize(IREE_API_VERSION_LATEST,
+ &instance_options);
+ iree_runtime_instance_options_use_all_available_drivers(&instance_options);
+ iree_runtime_instance_t* instance = NULL;
+ iree_status_t status = iree_runtime_instance_create(
+ &instance_options, iree_allocator_system(), &instance);
+
+ // Run the demo.
+ // A real application would load its models (at startup, on-demand, etc) and
+ // retain them somewhere to be reused. Startup time and likelihood of failure
+ // varies across different HAL backends; the synchronous CPU backend is nearly
+ // instantaneous and will never fail (unless out of memory) while the Vulkan
+ // backend may take significantly longer and fail if there are not supported
+ // devices.
+ if (iree_status_is_ok(status)) {
+ status = iree_runtime_demo_run_session(instance);
+ }
+
+ // Release the shared instance - it will be deallocated when all sessions
+ // using it have been released (here it is deallocated immediately).
+ iree_runtime_instance_release(instance);
+
+ int ret = (int)iree_status_code(status);
+ if (!iree_status_is_ok(status)) {
+ // Dump nice status messages to stderr on failure.
+ // An application can route these through its own logging infrastructure as
+ // needed. Note that the status is a handle and must be freed!
+ iree_status_fprint(stderr, status);
+ iree_status_ignore(status);
+ }
+ return ret;
+}
+
+//===----------------------------------------------------------------------===//
+// 2. Load modules and initialize state in iree_runtime_session_t
+//===----------------------------------------------------------------------===//
+// Each instantiation of a module will live in its own session. Module state
+// like variables will be retained across calls within the same session.
+
+// Loads the demo module and uses it to perform some math.
+// In a real application you'd want to hang on to the iree_runtime_session_t
+// and reuse it for future calls - especially if it holds state internally.
+static iree_status_t iree_runtime_demo_run_session(
+ iree_runtime_instance_t* instance) {
+ // TODO(#5724): move device selection into the compiled modules.
+ iree_hal_device_t* device = NULL;
+ IREE_RETURN_IF_ERROR(iree_runtime_instance_try_create_default_device(
+ instance, iree_make_cstring_view("vmvx"), &device));
+
+ // Set up the session to run the demo module.
+ // Sessions are like OS processes and are used to isolate modules from each
+ // other and hold runtime state such as the variables used within the module.
+ // The same module loaded into two sessions will see their own private state.
+ iree_runtime_session_options_t session_options;
+ iree_runtime_session_options_initialize(&session_options);
+ iree_runtime_session_t* session = NULL;
+ iree_status_t status = iree_runtime_session_create_with_device(
+ instance, &session_options, device,
+ iree_runtime_instance_host_allocator(instance), &session);
+ iree_hal_device_release(device);
+
+ // Load the compiled user module in a demo-specific way.
+ // Applications could specify files, embed the outputs directly in their
+ // binaries, fetch them over the network, etc.
+ if (iree_status_is_ok(status)) {
+ status = iree_runtime_demo_load_module(session);
+ }
+
+ // Build and issue the call.
+ if (iree_status_is_ok(status)) {
+ status = iree_runtime_demo_perform_mul(session);
+ }
+
+ // Release the session and free all resources.
+ iree_runtime_session_release(session);
+ return status;
+}
+
+//===----------------------------------------------------------------------===//
+// 3. Call a function within a module with buffer views
+//===----------------------------------------------------------------------===//
+// The inputs and outputs of a call are reusable across calls (and possibly
+// across sessions depending on device compatibility) and can be setup by the
+// application as needed. For example, an application could perform
+// multi-threaded buffer view creation and then issue the call from a single
+// thread when all inputs are ready. This simple demo just allocates them
+// per-call and throws them away.
+
+// Sets up and calls the simple_mul function and dumps the results:
+// func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) ->
+// tensor<4xf32>
+//
+// NOTE: this is a demo and as such this performs no memoization; a real
+// application could reuse a lot of these structures and cache lookups of
+// iree_vm_function_t to reduce the amount of per-call overhead.
+static iree_status_t iree_runtime_demo_perform_mul(
+ iree_runtime_session_t* session) {
+ // Initialize the call to the function.
+ iree_runtime_call_t call;
+ IREE_RETURN_IF_ERROR(iree_runtime_call_initialize_by_name(
+ session, iree_make_cstring_view("module.simple_mul"), &call));
+
+ // Append the function inputs with the HAL device allocator in use by the
+ // session. The buffers will be usable within the session and _may_ be usable
+ // in other sessions depending on whether they share a compatible device.
+ iree_hal_allocator_t* device_allocator =
+ iree_runtime_session_device_allocator(session);
+ iree_allocator_t host_allocator =
+ iree_runtime_session_host_allocator(session);
+ iree_status_t status = iree_ok_status();
+ {
+ // %arg0: tensor<4xf32>
+ iree_hal_buffer_view_t* arg0 = NULL;
+ if (iree_status_is_ok(status)) {
+ static const iree_hal_dim_t arg0_shape[1] = {4};
+ static const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};
+ status = iree_hal_buffer_view_allocate_buffer(
+ device_allocator,
+ // Shape dimensions and rank:
+ arg0_shape, IREE_ARRAYSIZE(arg0_shape),
+ // Element type:
+ IREE_HAL_ELEMENT_TYPE_FLOAT_32,
+ // Encoding type:
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+ (iree_hal_buffer_params_t){
+ // Where to allocate (host or device):
+ .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
+ // Access to allow to this memory (this is .rodata so READ only):
+ .access = IREE_HAL_MEMORY_ACCESS_READ,
+ // Intended usage of the buffer (transfers, dispatches, etc):
+ .usage = IREE_HAL_BUFFER_USAGE_DISPATCH |
+ IREE_HAL_BUFFER_USAGE_TRANSFER,
+ },
+ // The actual heap buffer to wrap or clone and its allocator:
+ iree_make_const_byte_span(arg0_data, sizeof(arg0_data)),
+ // Buffer view + storage are returned and owned by the caller:
+ &arg0);
+ }
+ if (iree_status_is_ok(status)) {
+ IREE_IGNORE_ERROR(iree_hal_buffer_view_fprint(
+ stdout, arg0, /*max_element_count=*/4096, host_allocator));
+ // Add to the call inputs list (which retains the buffer view).
+ status = iree_runtime_call_inputs_push_back_buffer_view(&call, arg0);
+ }
+ // Since the call retains the buffer view we can release it here.
+ iree_hal_buffer_view_release(arg0);
+
+ fprintf(stdout, "\n * \n");
+
+ // %arg1: tensor<4xf32>
+ iree_hal_buffer_view_t* arg1 = NULL;
+ if (iree_status_is_ok(status)) {
+ static const iree_hal_dim_t arg1_shape[1] = {4};
+ static const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};
+ status = iree_hal_buffer_view_allocate_buffer(
+ device_allocator, arg1_shape, IREE_ARRAYSIZE(arg1_shape),
+ IREE_HAL_ELEMENT_TYPE_FLOAT_32,
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+ (iree_hal_buffer_params_t){
+ .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
+ .access = IREE_HAL_MEMORY_ACCESS_READ,
+ .usage = IREE_HAL_BUFFER_USAGE_DISPATCH |
+ IREE_HAL_BUFFER_USAGE_TRANSFER,
+ },
+ iree_make_const_byte_span(arg1_data, sizeof(arg1_data)), &arg1);
+ }
+ if (iree_status_is_ok(status)) {
+ IREE_IGNORE_ERROR(iree_hal_buffer_view_fprint(
+ stdout, arg1, /*max_element_count=*/4096, host_allocator));
+ status = iree_runtime_call_inputs_push_back_buffer_view(&call, arg1);
+ }
+ iree_hal_buffer_view_release(arg1);
+ }
+
+ // Synchronously perform the call.
+ if (iree_status_is_ok(status)) {
+ status = iree_runtime_call_invoke(&call, /*flags=*/0);
+ }
+
+ fprintf(stdout, "\n = \n");
+
+ // Dump the function outputs.
+ iree_hal_buffer_view_t* ret0 = NULL;
+ if (iree_status_is_ok(status)) {
+ // Try to get the first call result as a buffer view.
+ status = iree_runtime_call_outputs_pop_front_buffer_view(&call, &ret0);
+ }
+ if (iree_status_is_ok(status)) {
+ // This prints the buffer view out but an application could read its
+ // contents, pass it to another call, etc.
+ status = iree_hal_buffer_view_fprint(
+ stdout, ret0, /*max_element_count=*/4096, host_allocator);
+ }
+ iree_hal_buffer_view_release(ret0);
+
+ iree_runtime_call_deinitialize(&call);
+ return status;
+}
diff --git a/runtime/src/iree/runtime/demo/hello_world_terse.c b/runtime/src/iree/runtime/demo/hello_world_terse.c
new file mode 100644
index 0000000..35ca476
--- /dev/null
+++ b/runtime/src/iree/runtime/demo/hello_world_terse.c
@@ -0,0 +1,136 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdio.h>
+
+#include "iree/runtime/api.h"
+#include "iree/runtime/testdata/simple_mul_module_c.h"
+
+static void iree_runtime_demo_run_session(iree_runtime_instance_t* instance);
+static void iree_runtime_demo_perform_mul(iree_runtime_session_t* session);
+
+//===----------------------------------------------------------------------===//
+// 1. Entry point / shared iree_runtime_instance_t setup
+//===----------------------------------------------------------------------===//
+
+int main(int argc, char** argv) {
+ // Create and configure the instance shared across all sessions.
+ iree_runtime_instance_options_t instance_options;
+ iree_runtime_instance_options_initialize(IREE_API_VERSION_LATEST,
+ &instance_options);
+ iree_runtime_instance_options_use_all_available_drivers(&instance_options);
+ iree_runtime_instance_t* instance = NULL;
+ IREE_CHECK_OK(iree_runtime_instance_create(
+ &instance_options, iree_allocator_system(), &instance));
+
+ // All sessions should share the same instance.
+ iree_runtime_demo_run_session(instance);
+
+ iree_runtime_instance_release(instance);
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// 2. Load modules and initialize state in iree_runtime_session_t
+//===----------------------------------------------------------------------===//
+
+static void iree_runtime_demo_run_session(iree_runtime_instance_t* instance) {
+ // TODO(#5724): move device selection into the compiled modules.
+ iree_hal_device_t* device = NULL;
+ IREE_CHECK_OK(iree_runtime_instance_try_create_default_device(
+ instance, iree_make_cstring_view("vmvx"), &device));
+
+ // Create one session per loaded module to hold the module state.
+ iree_runtime_session_options_t session_options;
+ iree_runtime_session_options_initialize(&session_options);
+ iree_runtime_session_t* session = NULL;
+ IREE_CHECK_OK(iree_runtime_session_create_with_device(
+ instance, &session_options, device,
+ iree_runtime_instance_host_allocator(instance), &session));
+ iree_hal_device_release(device);
+
+ // Load your user module into the session (from memory, from file, etc).
+ const iree_file_toc_t* module_file =
+ iree_runtime_testdata_simple_mul_module_create();
+ IREE_CHECK_OK(iree_runtime_session_append_bytecode_module_from_memory(
+ session, iree_make_const_byte_span(module_file->data, module_file->size),
+ iree_allocator_null()));
+
+ // Run your functions; you should reuse the session to make multiple calls.
+ iree_runtime_demo_perform_mul(session);
+
+ iree_runtime_session_release(session);
+}
+
+//===----------------------------------------------------------------------===//
+// 3. Call a function within a module with buffer views
+//===----------------------------------------------------------------------===//
+
+// func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) ->
+// tensor<4xf32>
+static void iree_runtime_demo_perform_mul(iree_runtime_session_t* session) {
+ iree_runtime_call_t call;
+ IREE_CHECK_OK(iree_runtime_call_initialize_by_name(
+ session, iree_make_cstring_view("module.simple_mul"), &call));
+
+ // %arg0: tensor<4xf32>
+ iree_hal_buffer_view_t* arg0 = NULL;
+ static const iree_hal_dim_t arg0_shape[1] = {4};
+ static const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};
+ IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer(
+ iree_runtime_session_device_allocator(session), arg0_shape,
+ IREE_ARRAYSIZE(arg0_shape), IREE_HAL_ELEMENT_TYPE_FLOAT_32,
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+ (iree_hal_buffer_params_t){
+ .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
+ .access = IREE_HAL_MEMORY_ACCESS_READ,
+ .usage =
+ IREE_HAL_BUFFER_USAGE_DISPATCH | IREE_HAL_BUFFER_USAGE_TRANSFER,
+ },
+ iree_make_const_byte_span(arg0_data, sizeof(arg0_data)), &arg0));
+ IREE_CHECK_OK(iree_hal_buffer_view_fprint(
+ stdout, arg0, /*max_element_count=*/4096,
+ iree_runtime_session_host_allocator(session)));
+ IREE_CHECK_OK(iree_runtime_call_inputs_push_back_buffer_view(&call, arg0));
+ iree_hal_buffer_view_release(arg0);
+
+ fprintf(stdout, "\n * \n");
+
+ // %arg1: tensor<4xf32>
+ iree_hal_buffer_view_t* arg1 = NULL;
+ static const iree_hal_dim_t arg1_shape[1] = {4};
+ static const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};
+ IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer(
+ iree_runtime_session_device_allocator(session), arg1_shape,
+ IREE_ARRAYSIZE(arg1_shape), IREE_HAL_ELEMENT_TYPE_FLOAT_32,
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+ (iree_hal_buffer_params_t){
+ .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
+ .access = IREE_HAL_MEMORY_ACCESS_READ,
+ .usage =
+ IREE_HAL_BUFFER_USAGE_DISPATCH | IREE_HAL_BUFFER_USAGE_TRANSFER,
+ },
+ iree_make_const_byte_span(arg1_data, sizeof(arg1_data)), &arg1));
+ IREE_CHECK_OK(iree_hal_buffer_view_fprint(
+ stdout, arg1, /*max_element_count=*/4096,
+ iree_runtime_session_host_allocator(session)));
+ IREE_CHECK_OK(iree_runtime_call_inputs_push_back_buffer_view(&call, arg1));
+ iree_hal_buffer_view_release(arg1);
+
+ IREE_CHECK_OK(iree_runtime_call_invoke(&call, /*flags=*/0));
+
+ fprintf(stdout, "\n = \n");
+
+ // -> tensor<4xf32>
+ iree_hal_buffer_view_t* ret0 = NULL;
+ IREE_CHECK_OK(iree_runtime_call_outputs_pop_front_buffer_view(&call, &ret0));
+ IREE_CHECK_OK(iree_hal_buffer_view_fprint(
+ stdout, ret0, /*max_element_count=*/4096,
+ iree_runtime_session_host_allocator(session)));
+ iree_hal_buffer_view_release(ret0);
+
+ iree_runtime_call_deinitialize(&call);
+}
diff --git a/runtime/src/iree/runtime/instance.c b/runtime/src/iree/runtime/instance.c
new file mode 100644
index 0000000..352bfc5
--- /dev/null
+++ b/runtime/src/iree/runtime/instance.c
@@ -0,0 +1,166 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/runtime/instance.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/internal/atomics.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/api.h"
+#include "iree/hal/drivers/init.h"
+#include "iree/modules/hal/module.h"
+#include "iree/vm/api.h"
+
+//===----------------------------------------------------------------------===//
+// iree_runtime_instance_options_t
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT void iree_runtime_instance_options_initialize(
+ iree_api_version_t api_version,
+ iree_runtime_instance_options_t* out_options) {
+ memset(out_options, 0, sizeof(*out_options));
+ out_options->api_version = api_version;
+}
+
+IREE_API_EXPORT void iree_runtime_instance_options_use_all_available_drivers(
+ iree_runtime_instance_options_t* options) {
+ options->driver_registry = iree_hal_driver_registry_default();
+ // TODO(benvanik): remove status result from this; it can't (meaningfully)
+ // fail and is just extra bookkeeping.
+ iree_status_ignore(
+ iree_hal_register_all_available_drivers(options->driver_registry));
+}
+
+//===----------------------------------------------------------------------===//
+// iree_runtime_instance_t
+//===----------------------------------------------------------------------===//
+
+struct iree_runtime_instance_t {
+ iree_atomic_ref_count_t ref_count;
+
+ // Allocator used to allocate the instance and all of its resources.
+ iree_allocator_t host_allocator;
+
+ // An optional driver registry used to enumerate and create HAL devices.
+ iree_hal_driver_registry_t* driver_registry;
+
+ // TODO(#5724): we should have a device cache here so that multiple sessions
+ // can find the same devices. This may mean a new HAL type like
+ // iree_hal_device_pool_t to prevent too much coupling and make weak
+ // references easier.
+};
+
+IREE_API_EXPORT iree_status_t iree_runtime_instance_create(
+ const iree_runtime_instance_options_t* options,
+ iree_allocator_t host_allocator, iree_runtime_instance_t** out_instance) {
+ IREE_ASSERT_ARGUMENT(options);
+ IREE_ASSERT_ARGUMENT(out_instance);
+ *out_instance = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Check that the API version matches what the runtime expects. The check here
+ // should always succeed when the runtime and the underlying system are linked
+ // together into the same binary.
+ iree_api_version_t actual_version = IREE_API_VERSION_0;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_api_version_check(options->api_version, &actual_version));
+
+ // Register builtin types.
+ // TODO(benvanik): change to per-instance type registries to avoid these
+ // global (UNSAFE!) calls. For now hosting applications should really only
+ // be using a single instance anyway.
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(z0, iree_vm_register_builtin_types());
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(z0, iree_hal_module_register_types());
+
+ // Allocate the instance state.
+ iree_runtime_instance_t* instance = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(host_allocator, sizeof(*instance),
+ (void**)&instance));
+ instance->host_allocator = host_allocator;
+ iree_atomic_ref_count_init(&instance->ref_count);
+
+ instance->driver_registry = options->driver_registry;
+ // TODO(benvanik): driver registry ref counting.
+
+ *out_instance = instance;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void iree_runtime_instance_destroy(iree_runtime_instance_t* instance) {
+ IREE_ASSERT_ARGUMENT(instance);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(instance->host_allocator, instance);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT void iree_runtime_instance_retain(
+ iree_runtime_instance_t* instance) {
+ if (instance) {
+ iree_atomic_ref_count_inc(&instance->ref_count);
+ }
+}
+
+IREE_API_EXPORT void iree_runtime_instance_release(
+ iree_runtime_instance_t* instance) {
+ if (instance && iree_atomic_ref_count_dec(&instance->ref_count) == 1) {
+ iree_runtime_instance_destroy(instance);
+ }
+}
+
+IREE_API_EXPORT iree_allocator_t
+iree_runtime_instance_host_allocator(const iree_runtime_instance_t* instance) {
+ IREE_ASSERT_ARGUMENT(instance);
+ return instance->host_allocator;
+}
+
+IREE_API_EXPORT iree_hal_driver_registry_t*
+iree_runtime_instance_driver_registry(const iree_runtime_instance_t* instance) {
+ IREE_ASSERT_ARGUMENT(instance);
+ return instance->driver_registry;
+}
+
+IREE_API_EXPORT iree_status_t iree_runtime_instance_try_create_default_device(
+ iree_runtime_instance_t* instance, iree_string_view_t driver_name,
+ iree_hal_device_t** out_device) {
+ IREE_ASSERT_ARGUMENT(instance);
+ IREE_ASSERT_ARGUMENT(out_device);
+ *out_device = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, driver_name.data, driver_name.size);
+
+ // This is only supported when we have a driver registry we can use to create
+ // the drivers.
+ iree_hal_driver_registry_t* driver_registry =
+ iree_runtime_instance_driver_registry(instance);
+ if (!driver_registry) {
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "instance was created without a driver registry "
+ "and cannot perform enumeration");
+ }
+
+ // Create a driver with the given name (if one exists).
+ iree_allocator_t host_allocator =
+ iree_runtime_instance_host_allocator(instance);
+ iree_hal_driver_t* driver = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_driver_registry_try_create_by_name(
+ driver_registry, driver_name, host_allocator, &driver));
+
+ // Create the default device on that driver.
+ iree_status_t status =
+ iree_hal_driver_create_default_device(driver, host_allocator, out_device);
+
+ iree_hal_driver_release(driver);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/runtime/instance.h b/runtime/src/iree/runtime/instance.h
new file mode 100644
index 0000000..6bf5423
--- /dev/null
+++ b/runtime/src/iree/runtime/instance.h
@@ -0,0 +1,122 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_RUNTIME_INSTANCE_H_
+#define IREE_RUNTIME_INSTANCE_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Shared runtime instance responsible for isolating runtime usage, enumerating
+// and creating hardware device interfaces, and managing device resource pools.
+//
+// A single runtime instance can service multiple sessions and hosting
+// applications should try to reuse instances as much as possible. This ensures
+// that resource allocation across contexts is handled and extraneous device
+// interaction is avoided. For devices that may have exclusive access
+// restrictions it is mandatory to share instances, so plan accordingly.
+//
+// In multi-tenant systems separate instances can be used to isolate each tenant
+// in cases where the underlying devices do not cleanly support isolation
+// themselves and otherwise multiple tenants can share the same instance.
+// Consider an instance as isolating IREE from itself rather than being the only
+// mechanism that can be used to isolate individual tenants or sessions.
+//
+// Caches and allocator pools are associated with an instance and resources may
+// be reused among any sessions sharing the same instance. In multi-tenant
+// environments where all tenants are trusted (and here "tenant" may just mean
+// "a single session" where there are many sessions) then they can often receive
+// large benefits in terms of peak memory consumption, startup time, and
+// interoperation by sharing an instance. If two tenants must never share any
+// data (PII) then they should be placed in different instances.
+//
+// As with all of iree/runtime/ this API is a higher-level wrapper for the
+// low-level IREE HAL and VM. Using this may pull in additional dependencies and
+// perform additional allocations compared to what you can get by directly going
+// to the lower levels.
+//
+// Thread-safe.
+typedef struct iree_runtime_instance_t iree_runtime_instance_t;
+
+//===----------------------------------------------------------------------===//
+// iree_runtime_instance_options_t
+//===----------------------------------------------------------------------===//
+
+// Options used to configure instance creation.
+typedef struct iree_runtime_instance_options_t {
+ // Should be set to IREE_API_VERSION_LATEST.
+ iree_api_version_t api_version;
+
+ // TODO(benvanik): inject logging hooks.
+
+ // A driver registry used to enumerate and create HAL devices.
+ // When not provided a device must be specified when creating sessions via
+ // iree_runtime_session_create_with_device.
+ iree_hal_driver_registry_t* driver_registry;
+} iree_runtime_instance_options_t;
+
+// Initializes |out_options| to its default values.
+IREE_API_EXPORT void iree_runtime_instance_options_initialize(
+ iree_api_version_t api_version,
+ iree_runtime_instance_options_t* out_options);
+
+// Sets the instance to use all available drivers registered in the current
+// binary. This allows for control over driver selection from the build system
+// using the IREE_HAL_DRIVER_* CMake options.
+// Sessions may query for the driver listing and select one(s) that are
+// appropriate.
+IREE_API_EXPORT void iree_runtime_instance_options_use_all_available_drivers(
+ iree_runtime_instance_options_t* options);
+
+//===----------------------------------------------------------------------===//
+// iree_runtime_instance_t
+//===----------------------------------------------------------------------===//
+
+// Creates a new instance with the given |options|.
+// Instances should be shared with as many sessions in an application as is
+// reasonable to ensure that resources are tracked properly and threads are
+// managed correctly.
+//
+// |host_allocator| will be used to allocate the instance and any associated
+// resources. |out_instance| must be released by the caller.
+IREE_API_EXPORT iree_status_t iree_runtime_instance_create(
+ const iree_runtime_instance_options_t* options,
+ iree_allocator_t host_allocator, iree_runtime_instance_t** out_instance);
+
+// Retains the given |instance| for the caller.
+IREE_API_EXPORT void iree_runtime_instance_retain(
+ iree_runtime_instance_t* instance);
+
+// Releases the given |instance| from the caller.
+IREE_API_EXPORT void iree_runtime_instance_release(
+ iree_runtime_instance_t* instance);
+
+// Returns the host allocator used to allocate the instance and its resources.
+// Callers should use this to allocate resources so that any memory tracking
+// being performed correctly attributes the allocations to the instance.
+IREE_API_EXPORT iree_allocator_t
+iree_runtime_instance_host_allocator(const iree_runtime_instance_t* instance);
+
+// Returns the optional driver registry used to enumerate drivers and devices.
+// If not provided then iree_runtime_session_create_with_device must be used
+// to specify the device that a session should use.
+IREE_API_EXPORT iree_hal_driver_registry_t*
+iree_runtime_instance_driver_registry(const iree_runtime_instance_t* instance);
+
+// TODO(#5724): remove this once user modules query devices themselves.
+IREE_API_EXPORT iree_status_t iree_runtime_instance_try_create_default_device(
+ iree_runtime_instance_t* instance, iree_string_view_t driver_name,
+ iree_hal_device_t** out_device);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_RUNTIME_INSTANCE_H_
diff --git a/runtime/src/iree/runtime/session.c b/runtime/src/iree/runtime/session.c
new file mode 100644
index 0000000..2394a39
--- /dev/null
+++ b/runtime/src/iree/runtime/session.c
@@ -0,0 +1,309 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/runtime/session.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/internal/atomics.h"
+#include "iree/base/internal/file_io.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/api.h"
+#include "iree/modules/hal/module.h"
+#include "iree/runtime/instance.h"
+#include "iree/vm/api.h"
+#include "iree/vm/bytecode_module.h"
+
+//===----------------------------------------------------------------------===//
+// iree_runtime_session_options_t
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT void iree_runtime_session_options_initialize(
+ iree_runtime_session_options_t* out_options) {
+ memset(out_options, 0, sizeof(*out_options));
+ out_options->context_flags = IREE_VM_CONTEXT_FLAG_NONE;
+ out_options->builtin_modules = IREE_RUNTIME_SESSION_BUILTIN_ALL;
+}
+
+//===----------------------------------------------------------------------===//
+// iree_runtime_session_t
+//===----------------------------------------------------------------------===//
+
+struct iree_runtime_session_t {
+ iree_atomic_ref_count_t ref_count;
+
+ // Allocator used to allocate the session and all of its resources.
+ // Independent sessions within the same instance can have unique allocators to
+ // enable session-level tagging of allocations and pooling.
+ iree_allocator_t host_allocator;
+
+ // The instance this session is a part of; may be shared across many sessions.
+ // Devices and pools are stored on the instance so that multiple sessions can
+ // share resources. The session will keep the instance retained for its
+ // lifetime to ensure that these resources remain available.
+ iree_runtime_instance_t* instance;
+
+ // VM context containing the loaded modules (both builtins and user).
+ // Thread-compatible; a context carries state that must be externally
+ // synchronized.
+ iree_vm_context_t* context;
+
+ // The HAL module state bound to the target devices.
+ // This is used internally by the loaded modules to interact with the devices
+ // but can also be used by the caller to perform allocation and custom device
+ // execution.
+ //
+ // The state is owned by the context and we have it cached here for faster
+ // lookup. An application directly using the API may never need this, or could
+ // perform VM calls into HAL module exports to gain more portability.
+ iree_vm_module_state_t* hal_module_state;
+};
+
+IREE_API_EXPORT iree_status_t iree_runtime_session_create_with_device(
+ iree_runtime_instance_t* instance,
+ const iree_runtime_session_options_t* options, iree_hal_device_t* device,
+ iree_allocator_t host_allocator, iree_runtime_session_t** out_session) {
+ IREE_ASSERT_ARGUMENT(instance);
+ IREE_ASSERT_ARGUMENT(options);
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(out_session);
+ *out_session = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Allocate the session state.
+ iree_runtime_session_t* session = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(host_allocator, sizeof(*session),
+ (void**)&session));
+ session->host_allocator = host_allocator;
+ iree_atomic_ref_count_init(&session->ref_count);
+
+ session->instance = instance;
+ iree_runtime_instance_retain(session->instance);
+
+ // Create the context empty so that we can add our modules to it.
+ iree_status_t status = iree_vm_context_create(
+ /*instance=*/NULL, options->context_flags, host_allocator,
+ &session->context);
+
+ // Add the HAL module; it is always required when using the runtime API.
+ // Lower-level usage of the VM can avoid the HAL if it's not required.
+ iree_vm_module_t* hal_module = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_module_create(device, host_allocator, &hal_module);
+ }
+ if (iree_status_is_ok(status)) {
+ status = iree_vm_context_register_modules(session->context, &hal_module, 1);
+ }
+ if (iree_status_is_ok(status)) {
+ status = iree_vm_context_resolve_module_state(session->context, hal_module,
+ &session->hal_module_state);
+ }
+ iree_vm_module_release(hal_module);
+
+ if (iree_status_is_ok(status)) {
+ *out_session = session;
+ } else {
+ iree_runtime_session_release(session);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_runtime_session_destroy(iree_runtime_session_t* session) {
+ IREE_ASSERT_ARGUMENT(session);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_vm_context_release(session->context);
+ iree_runtime_instance_release(session->instance);
+
+ iree_allocator_free(session->host_allocator, session);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT void iree_runtime_session_retain(
+ iree_runtime_session_t* session) {
+ if (session) {
+ iree_atomic_ref_count_inc(&session->ref_count);
+ }
+}
+
+IREE_API_EXPORT void iree_runtime_session_release(
+ iree_runtime_session_t* session) {
+ if (session && iree_atomic_ref_count_dec(&session->ref_count) == 1) {
+ iree_runtime_session_destroy(session);
+ }
+}
+
+IREE_API_EXPORT iree_allocator_t
+iree_runtime_session_host_allocator(const iree_runtime_session_t* session) {
+ IREE_ASSERT_ARGUMENT(session);
+ return session->host_allocator;
+}
+
+IREE_API_EXPORT iree_runtime_instance_t* iree_runtime_session_instance(
+ const iree_runtime_session_t* session) {
+ IREE_ASSERT_ARGUMENT(session);
+ return session->instance;
+}
+
+IREE_API_EXPORT iree_vm_context_t* iree_runtime_session_context(
+ const iree_runtime_session_t* session) {
+ IREE_ASSERT_ARGUMENT(session);
+ return session->context;
+}
+
+IREE_API_EXPORT iree_hal_device_t* iree_runtime_session_device(
+ const iree_runtime_session_t* session) {
+ IREE_ASSERT_ARGUMENT(session);
+ return iree_hal_module_state_device(session->hal_module_state);
+}
+
+IREE_API_EXPORT iree_hal_allocator_t* iree_runtime_session_device_allocator(
+ const iree_runtime_session_t* session) {
+ iree_hal_device_t* device = iree_runtime_session_device(session);
+ if (!device) return NULL;
+ return iree_hal_device_allocator(device);
+}
+
+IREE_API_EXPORT iree_status_t
+iree_runtime_session_trim(iree_runtime_session_t* session) {
+ IREE_ASSERT_ARGUMENT(session);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = iree_vm_context_notify(
+ iree_runtime_session_context(session), IREE_VM_SIGNAL_LOW_MEMORY);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_runtime_session_append_module(
+ iree_runtime_session_t* session, iree_vm_module_t* module) {
+ IREE_ASSERT_ARGUMENT(session);
+ IREE_ASSERT_ARGUMENT(module);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, iree_vm_module_name(module).data,
+ iree_vm_module_name(module).size);
+
+ iree_status_t status = iree_vm_context_register_modules(
+ iree_runtime_session_context(session), &module, 1);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_runtime_session_append_bytecode_module_from_memory(
+ iree_runtime_session_t* session, iree_const_byte_span_t flatbuffer_data,
+ iree_allocator_t flatbuffer_allocator) {
+ IREE_ASSERT_ARGUMENT(session);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_vm_module_t* module = NULL;
+ iree_status_t status = iree_vm_bytecode_module_create(
+ flatbuffer_data, flatbuffer_allocator,
+ iree_runtime_session_host_allocator(session), &module);
+ if (iree_status_is_ok(status)) {
+ status = iree_runtime_session_append_module(session, module);
+ }
+ iree_vm_module_release(module);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_runtime_session_append_bytecode_module_from_file(
+ iree_runtime_session_t* session, const char* file_path) {
+ IREE_ASSERT_ARGUMENT(session);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, file_path);
+
+ // TODO(#3909): actually map the memory here. For now we just load the
+ // contents.
+ iree_file_contents_t* flatbuffer_contents = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_file_read_contents(file_path,
+ iree_runtime_session_host_allocator(session),
+ &flatbuffer_contents));
+
+ iree_status_t status =
+ iree_runtime_session_append_bytecode_module_from_memory(
+ session, flatbuffer_contents->const_buffer,
+ iree_file_contents_deallocator(flatbuffer_contents));
+ if (!iree_status_is_ok(status)) {
+ iree_file_contents_free(flatbuffer_contents);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_runtime_session_lookup_function(
+ const iree_runtime_session_t* session, iree_string_view_t full_name,
+ iree_vm_function_t* out_function) {
+ IREE_ASSERT_ARGUMENT(session);
+ IREE_ASSERT_ARGUMENT(out_function);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status = iree_vm_context_resolve_function(
+ iree_runtime_session_context(session), full_name, out_function);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_runtime_session_call(
+ iree_runtime_session_t* session, const iree_vm_function_t* function,
+ iree_vm_list_t* input_list, iree_vm_list_t* output_list) {
+ IREE_ASSERT_ARGUMENT(session);
+ IREE_ASSERT_ARGUMENT(function);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status =
+ iree_vm_invoke(iree_runtime_session_context(session), *function,
+ IREE_VM_INVOCATION_FLAG_NONE,
+ /*policy=*/NULL, input_list, output_list,
+ iree_runtime_session_host_allocator(session));
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_runtime_session_call_by_name(
+ iree_runtime_session_t* session, iree_string_view_t full_name,
+ iree_vm_list_t* input_list, iree_vm_list_t* output_list) {
+ IREE_ASSERT_ARGUMENT(session);
+ iree_vm_function_t function;
+ IREE_RETURN_IF_ERROR(
+ iree_runtime_session_lookup_function(session, full_name, &function));
+ return iree_runtime_session_call(session, &function, input_list, output_list);
+}
+
+IREE_API_EXPORT iree_status_t iree_runtime_session_call_direct(
+ iree_runtime_session_t* session, const iree_vm_function_call_t* call) {
+ IREE_ASSERT_ARGUMENT(session);
+ IREE_ASSERT_ARGUMENT(call);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Allocate a VM stack on the host stack and initialize it.
+ IREE_VM_INLINE_STACK_INITIALIZE(
+ stack, IREE_VM_INVOCATION_FLAG_NONE,
+ iree_vm_context_state_resolver(iree_runtime_session_context(session)),
+ iree_runtime_session_host_allocator(session));
+
+ // Issue the call.
+ iree_vm_execution_result_t result;
+ iree_status_t status = call->function.module->begin_call(
+ call->function.module->self, stack, call, &result);
+
+ // Cleanup the stack.
+ iree_vm_stack_deinitialize(stack);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/runtime/session.h b/runtime/src/iree/runtime/session.h
new file mode 100644
index 0000000..b96a497
--- /dev/null
+++ b/runtime/src/iree/runtime/session.h
@@ -0,0 +1,226 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_RUNTIME_SESSION_H_
+#define IREE_RUNTIME_SESSION_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/vm/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_runtime_instance_t iree_runtime_instance_t;
+
+// A session containing a set of loaded VM modules and their runtime state.
+// Each session has its own isolated module state and though multiple sessions
+// may share the same device they will all see their own individual timelines.
+// Think of a session like a process in an operating system: able to communicate
+// and share syscalls but with a strict separation.
+//
+// Only sessions that share an instance may directly share resources as
+// different instances may have different HAL devices and have incompatible
+// memory. Import and export APIs must be used to transfer the resources across
+// instances or incompatible devices within the same instance.
+//
+// As with all of iree/runtime/ this API is a higher-level wrapper for the
+// low-level IREE HAL and VM. Using this may pull in additional dependencies and
+// perform additional allocations compared to what you can get by directly going
+// to the lower levels.
+//
+// Thread-compatible; only a single thread may use the session at any time and
+// the caller must use external synchronization if they will be using it or any
+// resource derived from it concurrently. Any two sessions may be executed
+// concurrently without interference.
+typedef struct iree_runtime_session_t iree_runtime_session_t;
+
+//===----------------------------------------------------------------------===//
+// iree_runtime_session_options_t
+//===----------------------------------------------------------------------===//
+
+// Builtin modules that are provided by the runtime.
+enum iree_runtime_session_builtins_bits_t {
+ // All built-in modules that are compiled into the runtime will be available.
+ IREE_RUNTIME_SESSION_BUILTIN_ALL = UINT64_MAX,
+};
+typedef uint64_t iree_runtime_session_builtins_t;
+
+// Options used to configure session creation.
+typedef struct iree_runtime_session_options_t {
+ // Flags controlling the execution environment.
+ iree_vm_context_flags_t context_flags;
+
+ // A bitmask identifying which IREE builtin modules should be enabled.
+ // Session creation will fail if a requested module is not built into the
+ // runtime binary.
+ iree_runtime_session_builtins_t builtin_modules;
+} iree_runtime_session_options_t;
+
+// Initializes |out_options| to its default values.
+IREE_API_EXPORT void iree_runtime_session_options_initialize(
+ iree_runtime_session_options_t* out_options);
+
+//===----------------------------------------------------------------------===//
+// iree_runtime_session_t
+//===----------------------------------------------------------------------===//
+
+// Creates a new session forced to use the given |device|.
+// This bypasses any device enumeration performed by the loaded modules but
+// the loaded modules will still verify that the device matches their
+// requirements.
+//
+// A base set of modules may be added by the runtime during creation based on
+// |options| and users may load additional modules - such as the one containing
+// their user code - by using the iree_vm_context_t provided by
+// iree_runtime_session_context.
+//
+// |host_allocator| will be used to allocate the session and any associated
+// resources. |out_session| must be released by the caller.
+IREE_API_EXPORT iree_status_t iree_runtime_session_create_with_device(
+ iree_runtime_instance_t* instance,
+ const iree_runtime_session_options_t* options, iree_hal_device_t* device,
+ iree_allocator_t host_allocator, iree_runtime_session_t** out_session);
+
+// Retains the given |session| for the caller.
+IREE_API_EXPORT void iree_runtime_session_retain(
+ iree_runtime_session_t* session);
+
+// Releases the given |session| from the caller.
+IREE_API_EXPORT void iree_runtime_session_release(
+ iree_runtime_session_t* session);
+
+// Returns the host allocator used to allocate the session and its resources.
+// Callers should use this to allocate resources so that any memory tracking
+// being performed correctly attributes the allocations to the session.
+IREE_API_EXPORT iree_allocator_t
+iree_runtime_session_host_allocator(const iree_runtime_session_t* session);
+
+// Returns the instance the session uses for shared resources.
+IREE_API_EXPORT iree_runtime_instance_t* iree_runtime_session_instance(
+ const iree_runtime_session_t* session);
+
+// Returns the VM context used to load and link modules.
+// The context can be used to perform additional reflection over the loaded
+// modules or load additional modules (if supported).
+IREE_API_EXPORT iree_vm_context_t* iree_runtime_session_context(
+ const iree_runtime_session_t* session);
+
+// Returns the HAL device being used for execution.
+//
+// NOTE: this device will not be available until initialized by a user module
+// and will return NULL if queried prior.
+IREE_API_EXPORT iree_hal_device_t* iree_runtime_session_device(
+ const iree_runtime_session_t* session);
+
+// Returns the device allocator used to allocate compatible buffers.
+// Buffers from other allocators may not be compatible and require importing
+// prior to being usable by the session.
+//
+// NOTE: this device allocator will not be available until initialized by a
+// user module and will return NULL if queried prior.
+IREE_API_EXPORT iree_hal_allocator_t* iree_runtime_session_device_allocator(
+ const iree_runtime_session_t* session);
+
+// Trims transient/cached resources used by the session.
+// Upon resuming these resources may be expensive to rematerialize/reload and
+// as such this should only be called when it is known the resources will not
+// be needed soon.
+IREE_API_EXPORT iree_status_t
+iree_runtime_session_trim(iree_runtime_session_t* session);
+
+// Appends the given |module| to the context.
+// The module will be retained by the context.
+//
+// NOTE: only valid if the context is not yet frozen; see
+// iree_vm_context_freeze for more information.
+IREE_API_EXPORT iree_status_t iree_runtime_session_append_module(
+ iree_runtime_session_t* session, iree_vm_module_t* module);
+
+// Appends a bytecode module to the context loaded from the given memory blob.
+// If a |flatbuffer_allocator| is provided then it will be used to free the
+// |flatbuffer_data| when the module is destroyed and otherwise the ownership of
+// the |flatbuffer_data| remains with the caller. The data must remain valid for
+// the lifetime of the session.
+//
+// If the module exists as a file prefer instead to use
+// iree_runtime_session_append_bytecode_module_from_file to use memory mapped
+// I/O and reduce total memory consumption.
+//
+// NOTE: only valid if the context is not yet frozen; see
+// iree_vm_context_freeze for more information.
+IREE_API_EXPORT iree_status_t
+iree_runtime_session_append_bytecode_module_from_memory(
+ iree_runtime_session_t* session, iree_const_byte_span_t flatbuffer_data,
+ iree_allocator_t flatbuffer_allocator);
+
+// Appends a bytecode module to the context loaded from the given |file_path|.
+//
+// NOTE: only valid if the context is not yet frozen; see
+// iree_vm_context_freeze for more information.
+IREE_API_EXPORT iree_status_t
+iree_runtime_session_append_bytecode_module_from_file(
+ iree_runtime_session_t* session, const char* file_path);
+
+// Sets |out_function| to to an exported function with the fully-qualified name
+// of |full_name| or returns IREE_STATUS_NOT_FOUND. The function reference is
+// valid for the lifetime of |session|.
+//
+// The function name matches the original MLIR module and function symbols.
+// Example:
+// module @foo {
+// func.func @bar()
+// }
+// The full name of '@bar' is 'foo.bar'.
+// By default modules have the name 'module'.
+IREE_API_EXPORT iree_status_t iree_runtime_session_lookup_function(
+ const iree_runtime_session_t* session, iree_string_view_t full_name,
+ iree_vm_function_t* out_function);
+
+// Synchronously issues a generic function call.
+//
+// |input_list| is used to pass values and objects into the target function and
+// must match the signature defined by the compiled function. List ownership
+// remains with the caller.
+//
+// |output_list| is populated after the function completes execution with the
+// output values and objects of the function. List ownership remains with the
+// caller.
+//
+// Functions with either no inputs or outputs may provide NULL for the
+// respective list.
+IREE_API_EXPORT iree_status_t iree_runtime_session_call(
+ iree_runtime_session_t* session, const iree_vm_function_t* function,
+ iree_vm_list_t* input_list, iree_vm_list_t* output_list);
+
+// Synchronously issues a generic function call by fully-qualified name.
+// This is equivalent to performing a iree_runtime_session_lookup_function
+// followed by a iree_runtime_session_call. When calling the same function
+// repeatedly callers should perform the lookup and cache the resulting function
+// handle to avoid repeated lookups.
+IREE_API_EXPORT iree_status_t iree_runtime_session_call_by_name(
+ iree_runtime_session_t* session, iree_string_view_t full_name,
+ iree_vm_list_t* input_list, iree_vm_list_t* output_list);
+
+// Synchronously issues a direct function call.
+// This bypasses signature verification and directly calls through the VM ABI.
+// Though still safe(ish) the errors reported on a signature mismatch will be
+// much less useful than a call performed via the more generic methods. Treat
+// this as a low-level technique only to be used when the calling host code and
+// callee modules are known to be compatible.
+//
+// See iree_vm_function_call_t for more information.
+IREE_API_EXPORT iree_status_t iree_runtime_session_call_direct(
+ iree_runtime_session_t* session, const iree_vm_function_call_t* call);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_RUNTIME_SESSION_H_
diff --git a/runtime/src/iree/runtime/testdata/BUILD b/runtime/src/iree/runtime/testdata/BUILD
new file mode 100644
index 0000000..974ed60
--- /dev/null
+++ b/runtime/src/iree/runtime/testdata/BUILD
@@ -0,0 +1,34 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_cmake_extra_content")
+load("//build_tools/bazel:iree_bytecode_module.bzl", "iree_bytecode_module")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_cmake_extra_content(
+ content = """
+if (NOT ${IREE_HAL_DRIVER_VMVX} OR NOT ${IREE_TARGET_BACKEND_VMVX})
+ return()
+endif()
+""",
+ inline = True,
+)
+
+iree_bytecode_module(
+ name = "simple_mul_module",
+ src = "simple_mul.mlir",
+ c_identifier = "iree_runtime_testdata_simple_mul_module",
+ flags = [
+ "-iree-input-type=mhlo",
+ "-iree-mlir-to-vm-bytecode-module",
+ "-iree-hal-target-backends=vmvx",
+ ],
+)
diff --git a/runtime/src/iree/runtime/testdata/CMakeLists.txt b/runtime/src/iree/runtime/testdata/CMakeLists.txt
new file mode 100644
index 0000000..8278eea
--- /dev/null
+++ b/runtime/src/iree/runtime/testdata/CMakeLists.txt
@@ -0,0 +1,31 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/runtime/testdata/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+if (NOT ${IREE_HAL_DRIVER_VMVX} OR NOT ${IREE_TARGET_BACKEND_VMVX})
+ return()
+endif()
+
+iree_bytecode_module(
+ NAME
+ simple_mul_module
+ SRC
+ "simple_mul.mlir"
+ C_IDENTIFIER
+ "iree_runtime_testdata_simple_mul_module"
+ FLAGS
+ "-iree-input-type=mhlo"
+ "-iree-mlir-to-vm-bytecode-module"
+ "-iree-hal-target-backends=vmvx"
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/runtime/testdata/simple_mul.mlir b/runtime/src/iree/runtime/testdata/simple_mul.mlir
new file mode 100644
index 0000000..a7369d2
--- /dev/null
+++ b/runtime/src/iree/runtime/testdata/simple_mul.mlir
@@ -0,0 +1,4 @@
+func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {
+ %0 = "mhlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+ return %0 : tensor<4xf32>
+}
diff --git a/runtime/src/iree/schemas/BUILD b/runtime/src/iree/schemas/BUILD
new file mode 100644
index 0000000..80acb67
--- /dev/null
+++ b/runtime/src/iree/schemas/BUILD
@@ -0,0 +1,67 @@
+# Copyright 2019 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_build_test")
+load("//build_tools/bazel:iree_flatcc.bzl", "iree_flatbuffer_c_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+FLATCC_ARGS = [
+ "--reader",
+ "--builder",
+ "--verifier",
+ "--json",
+]
+
+iree_flatbuffer_c_library(
+ name = "bytecode_module_def_c_fbs",
+ srcs = ["bytecode_module_def.fbs"],
+ flatcc_args = FLATCC_ARGS,
+)
+
+iree_flatbuffer_c_library(
+ name = "cuda_executable_def_c_fbs",
+ srcs = ["cuda_executable_def.fbs"],
+ flatcc_args = FLATCC_ARGS,
+)
+
+iree_flatbuffer_c_library(
+ name = "rocm_executable_def_c_fbs",
+ srcs = ["rocm_executable_def.fbs"],
+ flatcc_args = FLATCC_ARGS,
+)
+
+iree_flatbuffer_c_library(
+ name = "metal_executable_def_c_fbs",
+ srcs = ["metal_executable_def.fbs"],
+ flatcc_args = FLATCC_ARGS,
+)
+
+iree_flatbuffer_c_library(
+ name = "spirv_executable_def_c_fbs",
+ srcs = ["spirv_executable_def.fbs"],
+ flatcc_args = FLATCC_ARGS,
+)
+
+iree_flatbuffer_c_library(
+ name = "wgsl_executable_def_c_fbs",
+ srcs = ["wgsl_executable_def.fbs"],
+ flatcc_args = FLATCC_ARGS,
+)
+
+iree_build_test(
+ name = "schema_build_test",
+ targets = [
+ ":bytecode_module_def_c_fbs",
+ ":metal_executable_def_c_fbs",
+ ":spirv_executable_def_c_fbs",
+ ":wgsl_executable_def_c_fbs",
+ ],
+)
diff --git a/runtime/src/iree/schemas/CMakeLists.txt b/runtime/src/iree/schemas/CMakeLists.txt
new file mode 100644
index 0000000..bc0b4f5
--- /dev/null
+++ b/runtime/src/iree/schemas/CMakeLists.txt
@@ -0,0 +1,91 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/schemas/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+flatbuffer_c_library(
+ NAME
+ bytecode_module_def_c_fbs
+ SRCS
+ "bytecode_module_def.fbs"
+ FLATCC_ARGS
+ "--reader"
+ "--builder"
+ "--verifier"
+ "--json"
+ PUBLIC
+)
+
+flatbuffer_c_library(
+ NAME
+ cuda_executable_def_c_fbs
+ SRCS
+ "cuda_executable_def.fbs"
+ FLATCC_ARGS
+ "--reader"
+ "--builder"
+ "--verifier"
+ "--json"
+ PUBLIC
+)
+
+flatbuffer_c_library(
+ NAME
+ rocm_executable_def_c_fbs
+ SRCS
+ "rocm_executable_def.fbs"
+ FLATCC_ARGS
+ "--reader"
+ "--builder"
+ "--verifier"
+ "--json"
+ PUBLIC
+)
+
+flatbuffer_c_library(
+ NAME
+ metal_executable_def_c_fbs
+ SRCS
+ "metal_executable_def.fbs"
+ FLATCC_ARGS
+ "--reader"
+ "--builder"
+ "--verifier"
+ "--json"
+ PUBLIC
+)
+
+flatbuffer_c_library(
+ NAME
+ spirv_executable_def_c_fbs
+ SRCS
+ "spirv_executable_def.fbs"
+ FLATCC_ARGS
+ "--reader"
+ "--builder"
+ "--verifier"
+ "--json"
+ PUBLIC
+)
+
+flatbuffer_c_library(
+ NAME
+ wgsl_executable_def_c_fbs
+ SRCS
+ "wgsl_executable_def.fbs"
+ FLATCC_ARGS
+ "--reader"
+ "--builder"
+ "--verifier"
+ "--json"
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/schemas/bytecode_module_def.fbs b/runtime/src/iree/schemas/bytecode_module_def.fbs
new file mode 100644
index 0000000..df15e9e
--- /dev/null
+++ b/runtime/src/iree/schemas/bytecode_module_def.fbs
@@ -0,0 +1,246 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+namespace iree.vm;
+
+// IREE bytecode module.
+file_identifier "IREE";
+file_extension "vmfb";
+
+// Arbitrary key/value reflection attribute.
+table ReflectionAttrDef {
+ key:string;
+ value:string;
+}
+
+// Defines a type within the type table.
+table TypeDef {
+ // Fully-qualified name of the type, such as `hal.buffer`.
+ full_name:string;
+}
+
+// Defines a function signature.
+table FunctionSignatureDef {
+ // Arguments, in order, as described in the FunctionSignatureDef.
+ // Maps to an entry in the module type table.
+ argument_types:[int32];
+
+ // Results, in order, as described in the FunctionSignatureDef.
+ // Maps to an entry in the module type table.
+ result_types:[int32];
+
+ // The VM calling convention declaration used to marshal arguments and
+ // results into and out of the function.
+ // Optional for imports and internal functions but required for exports.
+ //
+ // See iree/vm/module.h for more information.
+ calling_convention:string;
+
+ // Function level reflection attributes.
+ // These are typically used to communicate additional ABI metadata needed
+ // for dynamic invocation and host language mapping.
+ // See: docs/developers/design_docs/function_abi.md
+ reflection_attrs:[ReflectionAttrDef];
+}
+
+enum ImportFlagBits:uint32 (bit_flags) {
+ REQUIRED = 0, // 1u << 0
+ OPTIONAL = 1, // 1u << 1
+}
+
+// Defines a runtime-resolved import function.
+table ImportFunctionDef {
+ // Fully-qualified name of the function (including the module namespace).
+ full_name:string;
+
+ // Signature of the function expected used for verifying that imports match.
+ signature:FunctionSignatureDef;
+
+ // Version flags controlling the behavior of import resolution.
+ flags:ImportFlagBits = REQUIRED;
+}
+
+// Defines a runtime-resolved export function.
+table ExportFunctionDef {
+ // Local name of the function (excluding the module namespace).
+ local_name:string;
+
+ // Signature of the function expected used for verifying that imports match.
+ signature:FunctionSignatureDef;
+
+ // Ordinal in the internal_functions table that implements this function.
+ internal_ordinal:int32;
+}
+
+// Defines a bytecode function.
+table InternalFunctionDef {
+ // Local name of the function or empty if the names have been stripped.
+ // The full name of the function when referenced from external modules will
+ // include the BytecodeModuleDef.name prefix.
+ local_name:string;
+
+ // Signature of the function used for reflection.
+ signature:FunctionSignatureDef;
+}
+
+table UncompressedDataDef {
+}
+
+union CompressionTypeDef {
+ UncompressedDataDef,
+}
+
+// Read-only data segment.
+table RodataSegmentDef {
+ // The compression format used for the data, including required decompression
+ // arguments. Omitted if the data is uncompressed.
+ compression_type:CompressionTypeDef;
+
+ // Contents in a format defined by CompressionTypeDef.
+ data:[uint8];
+}
+
+// Read-write data segment.
+table RwdataSegmentDef {
+ // Total byte capacity.
+ byte_size:int32;
+}
+
+// Defines the per-instance module state.
+table ModuleStateDef {
+ // Bytes used for global primitive value storage. All are initialized to zero
+ // on module state allocation.
+ global_bytes_capacity:int32;
+
+ // Total number of global ref values.
+ global_ref_count:int32;
+}
+
+// Static function descriptor used for stack frame allocation.
+struct FunctionDescriptor {
+ // Offset and length within the larger bytecode data block.
+ bytecode_offset:int32;
+ bytecode_length:int32;
+
+ // TODO(benvanik): remove counts and embed directly in bytecode.
+ // Total number of i32 registers used by the function.
+ i32_register_count:int16;
+ // Total number of ref registers used by the function.
+ ref_register_count:int16;
+}
+
+// mlir/IR/BuiltinLocationAttributes.td : CallSiteLoc
+table CallSiteLocDef {
+ callee:int32;
+ caller:int32;
+}
+
+// mlir/IR/BuiltinLocationAttributes.td : FileLineColLoc
+table FileLineColLocDef {
+ filename:string;
+ line:int32;
+ column:int32;
+}
+
+// mlir/IR/BuiltinLocationAttributes.td : FusedLoc
+table FusedLocDef {
+ metadata:string;
+ locations:[int32];
+}
+
+// mlir/IR/BuiltinLocationAttributes.td : FusedLoc
+table NameLocDef {
+ name:string;
+ child_location:int32;
+}
+
+// A location - possibly nested.
+union LocationTypeDef {
+ CallSiteLocDef,
+ FileLineColLocDef,
+ FusedLocDef,
+ NameLocDef,
+}
+
+// Maps a relative bytecode offset within a function to a source location.
+struct BytecodeLocationDef {
+ // Bytecode offset of the start of the operation.
+ bytecode_offset:int32;
+ // Index into the debug database location_table.
+ location:int32;
+}
+
+// Debug data for a single function mapping back into source IR.
+table FunctionSourceMapDef {
+ // Operation locations for all ops within the function.
+ locations:[BytecodeLocationDef];
+}
+
+// VM debug information database.
+table DebugDatabaseDef {
+ // Location table. Source maps reference this table.
+ location_table:[LocationTypeDef];
+
+ // Internal function source maps; 1:1 with the module function_descriptors.
+ functions:[FunctionSourceMapDef];
+}
+
+// Defines a bytecode module containing the information required to serve the
+// iree_vm_module_interface_t interface.
+//
+// Modules are similar to shared objects in that they provide a set of exported
+// functions that can be queried and called externally as well as any number of
+// internal function definitions. Imports can also be used to have the loader
+// dynamically link in exports of other modules upon loading.
+//
+// Modules can contain read-only segments containing (optionally) compressed
+// data that is used by the module. Read-write segments define uninitialized
+// reservations and are similar to .bss, and custom initializer functions can
+// be embedded to treat them as if they were .data segments.
+//
+// State can be defined per active runtime context (effectively like
+// thread-local storage) using ModuleStateDef. The runtime will prepare this
+// state and maintain it for the lifetime of contexts and ensure that ops that
+// use it (such as vm.global.load.*) are always associated with the appropriate
+// state.
+table BytecodeModuleDef {
+ // Module namespace used for fully-qualified function lookups.
+ name:string (required);
+
+ // Type table mapping type IDs used within the module to type signatures.
+ types:[TypeDef];
+
+ // Imported function definitions used to resolve imports.
+ imported_functions:[ImportFunctionDef];
+
+ // Exported function definitions used to resolve imports.
+ exported_functions:[ExportFunctionDef];
+
+ // Read-only data segments (like non-code .text).
+ // May optionally be compressed and decompressed by the loader.
+ rodata_segments:[RodataSegmentDef];
+
+ // Read-write data segments of uninitialized memory (like .bss).
+ rwdata_segments:[RwdataSegmentDef];
+
+ // Global module state information (like TLS).
+ module_state:ModuleStateDef;
+
+ // References to ranges in the bytecode contents buffer where each internal
+ // function is located. This table is kept unnested within InternalFunctionDef
+ // to avoid the need to walk the FlatBuffer hierarchy at runtime when
+ // resolving call targets. Multiple functions may alias the same ranges in
+ // bytecode_data.
+ function_descriptors:[FunctionDescriptor];
+
+ // Bytecode contents. One large buffer containing all of the function op data.
+ bytecode_data:[uint8];
+
+ // Optional module debug database.
+ debug_database:DebugDatabaseDef;
+}
+
+root_type BytecodeModuleDef;
diff --git a/runtime/src/iree/schemas/cuda_executable_def.fbs b/runtime/src/iree/schemas/cuda_executable_def.fbs
new file mode 100644
index 0000000..77e5290
--- /dev/null
+++ b/runtime/src/iree/schemas/cuda_executable_def.fbs
@@ -0,0 +1,39 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+namespace iree;
+
+// 'CUDA Executable'.
+file_identifier "CUDA";
+file_extension "cuda";
+
+// A struct for the kernel block size along each dimensions.
+struct CUDABlockSizeDef {
+ x:uint32;
+ y:uint32;
+ z:uint32;
+}
+
+table CUDAExecutableDef {
+ // A map of entry point ordinals to string names as used in the shader
+ // library.
+ entry_points:[string];
+
+ // Block sizes for each entry point.
+ //
+ // Currently the thread group size/block size is decided during code gen but
+ // in CUDA it is set by the runtime.
+ block_sizes:[CUDABlockSizeDef];
+ // Size of dynamic shared memory.
+ shared_memory_size:[uint32];
+
+ // PTX string of the module.
+ ptx_image:string;
+
+ // TODO(thomasraoux): Add potential cuBin binary specialized for some targets.
+}
+
+root_type CUDAExecutableDef;
diff --git a/runtime/src/iree/schemas/metal_executable_def.fbs b/runtime/src/iree/schemas/metal_executable_def.fbs
new file mode 100644
index 0000000..19a81ab
--- /dev/null
+++ b/runtime/src/iree/schemas/metal_executable_def.fbs
@@ -0,0 +1,47 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+namespace iree;
+
+// 'Metal Executable'.
+file_identifier "MTLE";
+file_extension "mtle";
+
+// A struct for Metal threadgroup size along each dimension.
+struct MetalThreadgroupSize {
+ x:uint32;
+ y:uint32;
+ z:uint32;
+}
+
+// A Metal shader library and runtime pipeline state description.
+// This information is used to create MTLLibrary, MTLFunction and pipeline
+// state objects.
+table MetalExecutableDef {
+ // A map of entry point ordinals to string names as used in the shader
+ // library.
+ entry_points:[string];
+
+ // Threadgroup sizes for each entry point.
+ //
+ // We need this because workgroup size is directly baked inside SPIR-V code,
+ // but in Metal it's specified when dispatching workload. So when cross
+ // compiling SPIR-V to MSL, we need to persist the information here so that
+ // later it can be used for dispatching.
+ // TODO(antiagainst): support SPIR-V specialization constant.
+ threadgroup_sizes:[MetalThreadgroupSize];
+
+ // Shader content can be provided as either a serialized library or in the
+ // form of source code strings.
+
+ // Serialized Metal shader library.
+ shader_library:[uint8];
+ // Original Metal shader source code.
+ shader_sources:[string];
+}
+
+root_type MetalExecutableDef;
+
diff --git a/runtime/src/iree/schemas/rocm_executable_def.fbs b/runtime/src/iree/schemas/rocm_executable_def.fbs
new file mode 100644
index 0000000..e88d8ed
--- /dev/null
+++ b/runtime/src/iree/schemas/rocm_executable_def.fbs
@@ -0,0 +1,33 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+namespace iree;
+
+// 'ROCM Executable'.
+file_identifier "ROCM";
+file_extension "rocm";
+
+// A struct for the kernel block size along each dimensions.
+struct ROCMBlockSizeDef {
+ x:uint32;
+ y:uint32;
+ z:uint32;
+}
+
+table ROCMExecutableDef {
+ // A map of entry point ordinals to string names as used in the shader
+ // library.
+ entry_points:[string];
+
+ // Block sizes for each entry point.
+ //
+ block_sizes:[ROCMBlockSizeDef];
+
+ // HSACO string of the module.
+ hsaco_image:string;
+}
+
+root_type ROCMExecutableDef;
diff --git a/runtime/src/iree/schemas/spirv_executable_def.fbs b/runtime/src/iree/schemas/spirv_executable_def.fbs
new file mode 100644
index 0000000..4dec3a4
--- /dev/null
+++ b/runtime/src/iree/schemas/spirv_executable_def.fbs
@@ -0,0 +1,24 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+namespace iree;
+
+// 'SPIR-V Executable'.
+file_identifier "SPVE";
+file_extension "spve";
+
+// A SPIR-V shader module and runtime pipeline layout description.
+// This information is used to create the VkShaderModule, VkPipelineLayout, and
+// any required VkDescriptorSetLayouts.
+table SpirVExecutableDef {
+ // A map of entry point ordinals to string names as used in the shader module.
+ entry_points:[string];
+
+ // SPIR-V code words.
+ code:[uint32];
+}
+
+root_type SpirVExecutableDef;
diff --git a/runtime/src/iree/schemas/wgsl_executable_def.fbs b/runtime/src/iree/schemas/wgsl_executable_def.fbs
new file mode 100644
index 0000000..c3ac1f6
--- /dev/null
+++ b/runtime/src/iree/schemas/wgsl_executable_def.fbs
@@ -0,0 +1,33 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+namespace iree;
+
+// 'WGSL Executable'.
+file_identifier "WGSL";
+file_extension "wgsl";
+
+// Contents of one WGPUShaderModule, possibly with multiple entry points.
+// Entry points have the name "dN" where N is the executable-wide entry point
+// ordinal.
+table WGSLShaderModuleDef {
+ // WGSL source code.
+ code:string;
+
+ // Optional `source-map-v3` format source map.
+ source_map:string;
+}
+
+table WGSLExecutableDef {
+ // An ordered list of shader modules, each containing 1+ entry points.
+ shader_modules:[WGSLShaderModuleDef];
+
+ // A mapping of executable entry point ordinals to the shader module in which
+ // they reside.
+ entry_points:[int];
+}
+
+root_type WGSLExecutableDef;
diff --git a/runtime/src/iree/task/BUILD b/runtime/src/iree/task/BUILD
new file mode 100644
index 0000000..6ef13e3
--- /dev/null
+++ b/runtime/src/iree/task/BUILD
@@ -0,0 +1,199 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_cmake_extra_content", "iree_runtime_cc_library", "iree_runtime_cc_test")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_cmake_extra_content(
+ content = """
+# Task-based executor requires threading support.
+if(NOT ${IREE_ENABLE_THREADING})
+ return()
+endif()
+
+# cpuinfo can be conditionally disabled when it is not supported.
+# If disabled then by default the task system will use 1 thread.
+set(IREE_CPUINFO_TARGET)
+if(IREE_ENABLE_CPUINFO)
+ set(IREE_CPUINFO_TARGET cpuinfo)
+endif()
+""",
+ inline = True,
+)
+
+iree_runtime_cc_library(
+ name = "api",
+ srcs = ["api.c"],
+ hdrs = ["api.h"],
+ deps = [
+ ":task",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal:flags",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "task",
+ srcs = [
+ "executor.c",
+ "executor_impl.h",
+ "list.c",
+ "poller.c",
+ "pool.c",
+ "post_batch.c",
+ "post_batch.h",
+ "queue.c",
+ "scope.c",
+ "submission.c",
+ "task.c",
+ "task_impl.h",
+ "topology.c",
+ "topology_cpuinfo.c",
+ "worker.c",
+ "worker.h",
+ ],
+ hdrs = [
+ "affinity_set.h",
+ "executor.h",
+ "list.h",
+ "poller.h",
+ "pool.h",
+ "queue.h",
+ "scope.h",
+ "submission.h",
+ "task.h",
+ "topology.h",
+ "tuning.h",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal",
+ "//runtime/src/iree/base/internal:atomic_slist",
+ "//runtime/src/iree/base/internal:cpu",
+ "//runtime/src/iree/base/internal:event_pool",
+ "//runtime/src/iree/base/internal:fpu_state",
+ "//runtime/src/iree/base/internal:prng",
+ "//runtime/src/iree/base/internal:synchronization",
+ "//runtime/src/iree/base/internal:threading",
+ "//runtime/src/iree/base/internal:wait_handle",
+ "@cpuinfo",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "executor_demo",
+ srcs = ["executor_demo.cc"],
+ deps = [
+ ":task",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal:prng",
+ "//runtime/src/iree/task/testing:test_util",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "executor_test",
+ srcs = ["executor_test.cc"],
+ deps = [
+ ":task",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/task/testing:test_util",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "list_test",
+ srcs = ["list_test.cc"],
+ deps = [
+ ":task",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/task/testing:test_util",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "pool_test",
+ srcs = ["pool_test.cc"],
+ deps = [
+ ":task",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/task/testing:test_util",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "queue_test",
+ srcs = ["queue_test.cc"],
+ deps = [
+ ":task",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/task/testing:test_util",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "scope_test",
+ srcs = [
+ "scope_test.cc",
+ "task_impl.h",
+ ],
+ deps = [
+ ":task",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/task/testing:test_util",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "task_tests",
+ srcs = [
+ "task_test_barrier.cc",
+ "task_test_call.cc",
+ "task_test_dispatch.cc",
+ "task_test_fence.cc",
+ "task_test_nop.cc",
+ "task_test_wait.cc",
+ ],
+ deps = [
+ ":task",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/task/testing:task_test",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "topology_test",
+ srcs = ["topology_test.cc"],
+ tags = [
+ "noasan", # TODO(8469): Does not work on machines with large numbers of cores.
+ ],
+ deps = [
+ ":task",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
diff --git a/runtime/src/iree/task/CMakeLists.txt b/runtime/src/iree/task/CMakeLists.txt
new file mode 100644
index 0000000..0e55722
--- /dev/null
+++ b/runtime/src/iree/task/CMakeLists.txt
@@ -0,0 +1,206 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/task/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+# Task-based executor requires threading support.
+if(NOT ${IREE_ENABLE_THREADING})
+ return()
+endif()
+
+# cpuinfo can be conditionally disabled when it is not supported.
+# If disabled then by default the task system will use 1 thread.
+set(IREE_CPUINFO_TARGET)
+if(IREE_ENABLE_CPUINFO)
+ set(IREE_CPUINFO_TARGET cpuinfo)
+endif()
+
+iree_cc_library(
+ NAME
+ api
+ HDRS
+ "api.h"
+ SRCS
+ "api.c"
+ DEPS
+ ::task
+ iree::base::internal::flags
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ task
+ HDRS
+ "affinity_set.h"
+ "executor.h"
+ "list.h"
+ "poller.h"
+ "pool.h"
+ "queue.h"
+ "scope.h"
+ "submission.h"
+ "task.h"
+ "topology.h"
+ "tuning.h"
+ SRCS
+ "executor.c"
+ "executor_impl.h"
+ "list.c"
+ "poller.c"
+ "pool.c"
+ "post_batch.c"
+ "post_batch.h"
+ "queue.c"
+ "scope.c"
+ "submission.c"
+ "task.c"
+ "task_impl.h"
+ "topology.c"
+ "topology_cpuinfo.c"
+ "worker.c"
+ "worker.h"
+ DEPS
+ ${IREE_CPUINFO_TARGET}
+ iree::base
+ iree::base::core_headers
+ iree::base::internal
+ iree::base::internal::atomic_slist
+ iree::base::internal::cpu
+ iree::base::internal::event_pool
+ iree::base::internal::fpu_state
+ iree::base::internal::prng
+ iree::base::internal::synchronization
+ iree::base::internal::threading
+ iree::base::internal::wait_handle
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ executor_demo
+ SRCS
+ "executor_demo.cc"
+ DEPS
+ ::task
+ iree::base
+ iree::base::internal::prng
+ iree::base::tracing
+ iree::task::testing::test_util
+)
+
+iree_cc_test(
+ NAME
+ executor_test
+ SRCS
+ "executor_test.cc"
+ DEPS
+ ::task
+ iree::base
+ iree::task::testing::test_util
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_test(
+ NAME
+ list_test
+ SRCS
+ "list_test.cc"
+ DEPS
+ ::task
+ iree::base
+ iree::task::testing::test_util
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_test(
+ NAME
+ pool_test
+ SRCS
+ "pool_test.cc"
+ DEPS
+ ::task
+ iree::base
+ iree::task::testing::test_util
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_test(
+ NAME
+ queue_test
+ SRCS
+ "queue_test.cc"
+ DEPS
+ ::task
+ iree::base
+ iree::task::testing::test_util
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_test(
+ NAME
+ scope_test
+ SRCS
+ "scope_test.cc"
+ "task_impl.h"
+ DEPS
+ ::task
+ iree::base
+ iree::task::testing::test_util
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_test(
+ NAME
+ task_tests
+ SRCS
+ "task_test_barrier.cc"
+ "task_test_call.cc"
+ "task_test_dispatch.cc"
+ "task_test_fence.cc"
+ "task_test_nop.cc"
+ "task_test_wait.cc"
+ DEPS
+ ::task
+ iree::base
+ iree::task::testing::task_test
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_test(
+ NAME
+ topology_test
+ SRCS
+ "topology_test.cc"
+ DEPS
+ ::task
+ iree::base
+ iree::testing::gtest
+ iree::testing::gtest_main
+ LABELS
+ "noasan"
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
+
+if(NOT IREE_ENABLE_CPUINFO)
+ target_compile_definitions(iree_task_task
+ PUBLIC
+ "IREE_TASK_CPUINFO_DISABLED=1"
+ )
+endif()
diff --git a/runtime/src/iree/task/affinity_set.h b/runtime/src/iree/task/affinity_set.h
new file mode 100644
index 0000000..e81e549
--- /dev/null
+++ b/runtime/src/iree/task/affinity_set.h
@@ -0,0 +1,85 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_AFFINITY_SET_H_
+#define IREE_TASK_AFFINITY_SET_H_
+
+#include "iree/base/internal/atomics.h"
+#include "iree/base/internal/math.h"
+#include "iree/task/tuning.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// TODO(benvanik): if IREE_TASK_EXECUTOR_MAX_WORKER_COUNT <= 32 then switch
+// these to using the 32-bit primitives. No real effect on larger 64-bit systems
+// but if we were on a smaller 32-bit system with 2 cores it's kind of silly to
+// be doing expensive 64-bit atomics on a 32-bit bus all for just 2 bits of
+// data :)
+
+//===----------------------------------------------------------------------===//
+// iree_task_affinity_set_t
+//===----------------------------------------------------------------------===//
+
+typedef uint64_t iree_task_affinity_set_t;
+
+// Allows for only a specific worker to be selected.
+static inline iree_task_affinity_set_t iree_task_affinity_for_worker(
+ uint8_t worker_index) {
+ return 1ull << worker_index;
+}
+
+// Allows for a range of workers to be selected.
+static inline iree_task_affinity_set_t iree_task_affinity_for_worker_range(
+ uint8_t worker_start, uint8_t worker_end) {
+ return ((1ull << (worker_start - 1)) - 1) ^ ((1ull << worker_end) - 1);
+}
+
+// Allows for any worker to be selected.
+static inline iree_task_affinity_set_t iree_task_affinity_for_any_worker(void) {
+ return UINT64_MAX;
+}
+
+#define iree_task_affinity_set_count_trailing_zeros \
+ iree_math_count_trailing_zeros_u64
+#define iree_task_affinity_set_count_ones iree_math_count_ones_u64
+#define iree_task_affinity_set_rotr iree_math_rotr_u64
+
+//===----------------------------------------------------------------------===//
+// iree_atomic_task_affinity_set_t
+//===----------------------------------------------------------------------===//
+
+typedef iree_atomic_int64_t iree_atomic_task_affinity_set_t;
+
+static inline iree_task_affinity_set_t iree_atomic_task_affinity_set_load(
+ iree_atomic_task_affinity_set_t* set, iree_memory_order_t order) {
+ return iree_atomic_load_int64(set, order);
+}
+
+static inline void iree_atomic_task_affinity_set_store(
+ iree_atomic_task_affinity_set_t* set, iree_task_affinity_set_t value,
+ iree_memory_order_t order) {
+ iree_atomic_store_int64(set, value, order);
+}
+
+static inline iree_task_affinity_set_t iree_atomic_task_affinity_set_fetch_and(
+ iree_atomic_task_affinity_set_t* set, iree_task_affinity_set_t value,
+ iree_memory_order_t order) {
+ return iree_atomic_fetch_and_int64(set, value, order);
+}
+
+static inline iree_task_affinity_set_t iree_atomic_task_affinity_set_fetch_or(
+ iree_atomic_task_affinity_set_t* set, iree_task_affinity_set_t value,
+ iree_memory_order_t order) {
+ return iree_atomic_fetch_or_int64(set, value, order);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_AFFINITY_SET_H_
diff --git a/runtime/src/iree/task/api.c b/runtime/src/iree/task/api.c
new file mode 100644
index 0000000..d09e5ed
--- /dev/null
+++ b/runtime/src/iree/task/api.c
@@ -0,0 +1,113 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/api.h"
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "iree/base/internal/flags.h"
+#include "iree/base/tracing.h"
+#include "iree/task/topology.h"
+
+//===----------------------------------------------------------------------===//
+// Executor configuration
+//===----------------------------------------------------------------------===//
+
+IREE_FLAG(
+ bool, task_scheduling_defer_worker_startup, false,
+ "Creates all workers suspended and waits until work is first scheduled to\n"
+ "them to resume. This trades off initial blocking startup time waking the\n"
+ "threads for potential latency additions later on as threads take longer\n"
+ "to wake on their first use.");
+
+// TODO(benvanik): enable this when we use it - though hopefully we don't!
+IREE_FLAG(
+ int32_t, task_worker_local_memory, 0, // 64 * 1024,
+ "Specifies the bytes of per-worker local memory allocated for use by\n"
+ "dispatched tiles. Tiles may use less than this but will fail to dispatch\n"
+ "if they require more. Conceptually it is like a stack reservation and\n"
+ "should be treated the same way: the source programs must be built to\n"
+ "only use a specific maximum amount of local memory and the runtime must\n"
+ "be configured to make at least that amount of local memory available.");
+
+//===----------------------------------------------------------------------===//
+// Topology configuration
+//===----------------------------------------------------------------------===//
+
+IREE_FLAG(
+ string, task_topology_mode, "physical_cores",
+ "Available modes:\n"
+ " --task_topology_group_count=non-zero:\n"
+ " Uses whatever the specified group count is and ignores the set mode.\n"
+ " 'physical_cores':\n"
+ " Creates one group per physical core in the machine up to\n"
+ " the value specified by --task_topology_max_group_count.\n");
+
+IREE_FLAG(
+ int32_t, task_topology_group_count, 0,
+ "Defines the total number of task system workers that will be created.\n"
+ "Workers will be distributed across cores. Specifying 0 will use a\n"
+ "heuristic defined by --task_topology_mode= to automatically select the\n"
+ "worker count and distribution.");
+
+IREE_FLAG(
+ int32_t, task_topology_max_group_count, 8,
+ "Sets a maximum value on the worker count that can be automatically\n"
+ "detected and used when --task_topology_group_count=0 and is ignored\n"
+ "otherwise.\n");
+
+// TODO(benvanik): add --task_topology_dump to dump out the current machine
+// configuration as seen by the topology utilities.
+
+//===----------------------------------------------------------------------===//
+// Task system factory functions
+//===----------------------------------------------------------------------===//
+
+iree_status_t iree_task_executor_create_from_flags(
+ iree_allocator_t host_allocator, iree_task_executor_t** out_executor) {
+ IREE_ASSERT_ARGUMENT(out_executor);
+ *out_executor = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_task_scheduling_mode_t scheduling_mode = 0;
+ if (FLAG_task_scheduling_defer_worker_startup) {
+ scheduling_mode |= IREE_TASK_SCHEDULING_MODE_DEFER_WORKER_STARTUP;
+ }
+
+ iree_host_size_t worker_local_memory =
+ (iree_host_size_t)FLAG_task_worker_local_memory;
+
+ iree_status_t status = iree_ok_status();
+
+ iree_task_topology_t topology;
+ iree_task_topology_initialize(&topology);
+
+ if (FLAG_task_topology_group_count != 0) {
+ iree_task_topology_initialize_from_group_count(
+ FLAG_task_topology_group_count, &topology);
+ } else if (strcmp(FLAG_task_topology_mode, "physical_cores") == 0) {
+ iree_task_topology_initialize_from_physical_cores(
+ FLAG_task_topology_max_group_count, &topology);
+ } else {
+ status = iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "one of --task_topology_group_count or --task_topology_mode must be "
+ "specified and be a valid value; have --task_topology_mode=%s.",
+ FLAG_task_topology_mode);
+ }
+
+ if (iree_status_is_ok(status)) {
+ status = iree_task_executor_create(scheduling_mode, &topology,
+ worker_local_memory, host_allocator,
+ out_executor);
+ }
+
+ iree_task_topology_deinitialize(&topology);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/task/api.h b/runtime/src/iree/task/api.h
new file mode 100644
index 0000000..bebaf57
--- /dev/null
+++ b/runtime/src/iree/task/api.h
@@ -0,0 +1,46 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_API_H_
+#define IREE_TASK_API_H_
+
+#include "iree/base/api.h"
+#include "iree/task/executor.h" // IWYU pragma: export
+#include "iree/task/topology.h" // IWYU pragma: export
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// Task system factory functions
+//===----------------------------------------------------------------------===//
+
+// Creates a task system executor from the current command line flags.
+// This configures a topology and all of the executor parameters and returns
+// a newly created instance in |out_executor| that must be released by the
+// caller.
+//
+// This utility method is useful when only a single executor exists within a
+// process as the flags are globals. When multiple executors may exist or
+// programmatic configuration is needed use the iree_task_executor_create method
+// directly.
+iree_status_t iree_task_executor_create_from_flags(
+ iree_allocator_t host_allocator, iree_task_executor_t** out_executor);
+
+//===----------------------------------------------------------------------===//
+// Task system simple invocation utilities
+//===----------------------------------------------------------------------===//
+
+// TODO(benvanik): simple IO completion event callback.
+// TODO(benvanik): simple async function call dispatch.
+// TODO(benvanik): simple parallel-for grid-style function call dispatch.
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_API_H_
diff --git a/runtime/src/iree/task/executor.c b/runtime/src/iree/task/executor.c
new file mode 100644
index 0000000..9c09ee6
--- /dev/null
+++ b/runtime/src/iree/task/executor.c
@@ -0,0 +1,590 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/executor.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/internal/math.h"
+#include "iree/base/tracing.h"
+#include "iree/task/affinity_set.h"
+#include "iree/task/executor_impl.h"
+#include "iree/task/list.h"
+#include "iree/task/pool.h"
+#include "iree/task/post_batch.h"
+#include "iree/task/queue.h"
+#include "iree/task/task_impl.h"
+#include "iree/task/tuning.h"
+#include "iree/task/worker.h"
+
+static void iree_task_executor_destroy(iree_task_executor_t* executor);
+
+iree_status_t iree_task_executor_create(
+ iree_task_scheduling_mode_t scheduling_mode,
+ const iree_task_topology_t* topology,
+ iree_host_size_t worker_local_memory_size, iree_allocator_t allocator,
+ iree_task_executor_t** out_executor) {
+ iree_host_size_t worker_count = iree_task_topology_group_count(topology);
+ if (worker_count > IREE_TASK_EXECUTOR_MAX_WORKER_COUNT) {
+ return iree_make_status(
+ IREE_STATUS_RESOURCE_EXHAUSTED,
+ "requested %zu workers but a maximum of %d is allowed", worker_count,
+ IREE_TASK_EXECUTOR_MAX_WORKER_COUNT);
+ }
+
+ // TODO(benvanik): support a threadless mode where we have one dummy worker
+ // that just holds the lists but is pumped from donate_caller.
+ if (worker_count == 0) {
+ return iree_make_status(
+ IREE_STATUS_UNIMPLEMENTED,
+ "threadless donate-only executor mode not yet implemented");
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(out_executor);
+ *out_executor = NULL;
+
+ // The executor is followed in memory by worker[] + worker_local_memory[].
+ // The whole point is that we don't want destructive sharing between workers
+ // so ensure we are aligned to at least the destructive interference size.
+ worker_local_memory_size = iree_host_align(
+ worker_local_memory_size, iree_hardware_destructive_interference_size);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, (int64_t)worker_local_memory_size);
+ iree_host_size_t executor_base_size =
+ iree_host_align(sizeof(iree_task_executor_t),
+ iree_hardware_destructive_interference_size);
+ iree_host_size_t worker_list_size =
+ iree_host_align(worker_count * sizeof(iree_task_worker_t),
+ iree_hardware_destructive_interference_size);
+ iree_host_size_t executor_size = executor_base_size + worker_list_size +
+ worker_count * worker_local_memory_size;
+
+ iree_task_executor_t* executor = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(allocator, executor_size, (void**)&executor));
+ memset(executor, 0, executor_size);
+ iree_atomic_ref_count_init(&executor->ref_count);
+ executor->allocator = allocator;
+ executor->scheduling_mode = scheduling_mode;
+ iree_atomic_task_slist_initialize(&executor->incoming_ready_slist);
+ iree_slim_mutex_initialize(&executor->coordinator_mutex);
+
+ // Simple PRNG used to generate seeds for the per-worker PRNGs used to
+ // distribute work. This isn't strong (and doesn't need to be); it's just
+ // enough to ensure each worker gets a sufficiently random seed for itself to
+ // then generate entropy with. As a hack we use out_executor's address, as
+ // that should live on the caller stack and with ASLR that's likely pretty
+ // random itself. I'm sure somewhere a mathemetician just cringed :)
+ iree_prng_splitmix64_state_t seed_prng;
+ iree_prng_splitmix64_initialize(/*seed=*/(uint64_t)(out_executor),
+ &seed_prng);
+ iree_prng_minilcg128_initialize(iree_prng_splitmix64_next(&seed_prng),
+ &executor->donation_theft_prng);
+
+ iree_status_t status = iree_ok_status();
+
+ // Pool used for system events; exposed to users of the task system to ensure
+ // we minimize the number of live events and reduce overheads in
+ // high-frequency transient parking operations.
+ if (iree_status_is_ok(status)) {
+ status = iree_event_pool_allocate(IREE_TASK_EXECUTOR_EVENT_POOL_CAPACITY,
+ allocator, &executor->event_pool);
+ }
+
+ // Pool used for all fanout tasks. These only live within the executor and
+ // since we know the precise lifetime of them we can keep them entirely within
+ // the system here.
+ if (iree_status_is_ok(status)) {
+ status = iree_task_pool_initialize(
+ allocator,
+ iree_max(sizeof(iree_task_fence_t), sizeof(iree_task_dispatch_shard_t)),
+ worker_count * IREE_TASK_EXECUTOR_INITIAL_SHARD_RESERVATION_PER_WORKER,
+ &executor->transient_task_pool);
+ }
+
+ // Wait handling polling and waiting use a dedicated thread to ensure that
+ // blocking syscalls stay off the workers.
+ if (iree_status_is_ok(status)) {
+ // For now we allow the poller to run anywhere - we should allow callers to
+ // specify it via the topology (or something).
+ iree_thread_affinity_t poller_thread_affinity;
+ iree_thread_affinity_set_any(&poller_thread_affinity);
+ status = iree_task_poller_initialize(executor, poller_thread_affinity,
+ &executor->poller);
+ }
+
+ // Bring up the workers; the threads will be created here but be suspended
+ // (if the platform supports it) awaiting the first tasks getting scheduled.
+ if (iree_status_is_ok(status)) {
+ executor->worker_count = worker_count;
+ executor->workers =
+ (iree_task_worker_t*)((uint8_t*)executor + executor_base_size);
+ uint8_t* worker_local_memory =
+ (uint8_t*)executor->workers + worker_list_size;
+
+ iree_task_affinity_set_t worker_idle_mask = 0;
+ iree_task_affinity_set_t worker_live_mask = 0;
+ iree_task_affinity_set_t worker_suspend_mask = 0;
+ for (iree_host_size_t i = 0; i < worker_count; ++i) {
+ iree_task_affinity_set_t worker_bit = iree_task_affinity_for_worker(i);
+ worker_idle_mask |= worker_bit;
+ worker_live_mask |= worker_bit;
+ if (executor->scheduling_mode &
+ IREE_TASK_SCHEDULING_MODE_DEFER_WORKER_STARTUP) {
+ worker_suspend_mask |= worker_bit;
+ }
+
+ iree_task_worker_t* worker = &executor->workers[i];
+ status = iree_task_worker_initialize(
+ executor, i, iree_task_topology_get_group(topology, i),
+ iree_make_byte_span(worker_local_memory, worker_local_memory_size),
+ &seed_prng, worker);
+ worker_local_memory += worker_local_memory_size;
+ if (!iree_status_is_ok(status)) break;
+ }
+ iree_atomic_task_affinity_set_store(&executor->worker_suspend_mask,
+ worker_suspend_mask,
+ iree_memory_order_relaxed);
+ iree_atomic_task_affinity_set_store(&executor->worker_idle_mask,
+ worker_idle_mask,
+ iree_memory_order_relaxed);
+ iree_atomic_task_affinity_set_store(&executor->worker_live_mask,
+ worker_live_mask,
+ iree_memory_order_release);
+ }
+
+ if (!iree_status_is_ok(status)) {
+ // NOTE: destroy will ensure that any workers we have initialized are
+ // properly cleaned up.
+ iree_task_executor_destroy(executor);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+
+ *out_executor = executor;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void iree_task_executor_destroy(iree_task_executor_t* executor) {
+ if (!executor) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // First ask all workers to exit. We do this prior to waiting on them to exit
+ // so that we parallelize the shutdown logic (which may flush pending tasks).
+ for (iree_host_size_t i = 0; i < executor->worker_count; ++i) {
+ iree_task_worker_t* worker = &executor->workers[i];
+ iree_task_worker_request_exit(worker);
+ }
+
+ // Also ask the poller to exit - it'll wake from any system waits it's in and
+ // abort all the remaining waits.
+ iree_task_poller_request_exit(&executor->poller);
+
+ // Now that all workers and the poller should be in the process of exiting we
+ // can join with them. Some may take longer than others to exit but that's
+ // fine as we can't return from here until they exit anyway.
+ for (iree_host_size_t i = 0; i < executor->worker_count; ++i) {
+ iree_task_worker_t* worker = &executor->workers[i];
+ iree_task_worker_await_exit(worker);
+ }
+ iree_task_poller_await_exit(&executor->poller);
+
+ // Tear down all workers and the poller now that no more threads are live.
+ // Any live threads may still be touching their own data structures or those
+ // of others (for example when trying to steal work).
+ for (iree_host_size_t i = 0; i < executor->worker_count; ++i) {
+ iree_task_worker_t* worker = &executor->workers[i];
+ iree_task_worker_deinitialize(worker);
+ }
+ iree_task_poller_deinitialize(&executor->poller);
+
+ iree_event_pool_free(executor->event_pool);
+ iree_slim_mutex_deinitialize(&executor->coordinator_mutex);
+ iree_atomic_task_slist_deinitialize(&executor->incoming_ready_slist);
+ iree_task_pool_deinitialize(&executor->transient_task_pool);
+ iree_allocator_free(executor->allocator, executor);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_task_executor_retain(iree_task_executor_t* executor) {
+ if (executor) {
+ iree_atomic_ref_count_inc(&executor->ref_count);
+ }
+}
+
+void iree_task_executor_release(iree_task_executor_t* executor) {
+ if (executor && iree_atomic_ref_count_dec(&executor->ref_count) == 1) {
+ iree_task_executor_destroy(executor);
+ }
+}
+
+void iree_task_executor_trim(iree_task_executor_t* executor) {
+ // TODO(benvanik): figure out a good way to do this; the pools require that
+ // no tasks are in-flight to trim but our caller can't reliably make that
+ // guarantee. We'd need some global executor lock that we did here and
+ // on submit - or rework pools to not have this limitation.
+ // iree_task_pool_trim(&executor->fence_task_pool);
+ // iree_task_pool_trim(&executor->transient_task_pool);
+}
+
+iree_host_size_t iree_task_executor_worker_count(
+ iree_task_executor_t* executor) {
+ return executor->worker_count;
+}
+
+iree_event_pool_t* iree_task_executor_event_pool(
+ iree_task_executor_t* executor) {
+ return executor->event_pool;
+}
+
+iree_status_t iree_task_executor_acquire_fence(iree_task_executor_t* executor,
+ iree_task_scope_t* scope,
+ iree_task_fence_t** out_fence) {
+ *out_fence = NULL;
+
+ iree_task_fence_t* fence = NULL;
+ IREE_RETURN_IF_ERROR(iree_task_pool_acquire(&executor->transient_task_pool,
+ (iree_task_t**)&fence));
+ iree_task_fence_initialize(scope, iree_wait_primitive_immediate(), fence);
+ fence->header.pool = &executor->transient_task_pool;
+
+ *out_fence = fence;
+ return iree_ok_status();
+}
+
+// Schedules a generic task to a worker matching its affinity.
+// The task will be posted to the worker mailbox and available for the worker to
+// begin processing as soon as the |post_batch| is submitted.
+//
+// Only called during coordination and expects the coordinator lock to be held.
+static void iree_task_executor_relay_to_worker(
+ iree_task_executor_t* executor, iree_task_post_batch_t* post_batch,
+ iree_task_t* task) {
+ iree_host_size_t worker_index =
+ iree_task_post_batch_select_worker(post_batch, task->affinity_set);
+ iree_task_post_batch_enqueue(post_batch, worker_index, task);
+}
+
+// Schedules all ready tasks in the |pending_submission| list.
+// Task may enqueue zero or more new tasks (or newly-ready/waiting tasks) to
+// |pending_submission| or queue work for posting to workers via the
+// |post_batch|.
+//
+// NOTE: the pending submission list we walk here is in FIFO order and the
+// post batch we are building is in LIFO; this means that as we pop off the
+// least recently added tasks from the submission (nice in-order traversal) we
+// are pushing them as what will become the least recent tasks in the batch.
+//
+// Only called during coordination and expects the coordinator lock to be held.
+void iree_task_executor_schedule_ready_tasks(
+ iree_task_executor_t* executor, iree_task_submission_t* pending_submission,
+ iree_task_post_batch_t* post_batch) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_task_t* task = NULL;
+ while ((task = iree_task_list_pop_front(&pending_submission->ready_list))) {
+ // If the scope has been marked as failing then we abort the task.
+ // This needs to happen as a poll here because one or more of the tasks we
+ // are joining may have failed.
+ if (IREE_UNLIKELY(iree_task_scope_has_failed(task->scope))) {
+ iree_task_list_t discard_worklist;
+ iree_task_list_initialize(&discard_worklist);
+ iree_task_discard(task, &discard_worklist);
+ iree_task_list_discard(&discard_worklist);
+ continue;
+ }
+
+ switch (task->type) {
+ case IREE_TASK_TYPE_NOP:
+ // Doesn't do anything; just retire and continue on to any dependents.
+ iree_task_nop_retire((iree_task_nop_t*)task, pending_submission);
+ break;
+ case IREE_TASK_TYPE_CALL: {
+ // Generic routing to workers for tasks that should always run there.
+ iree_task_executor_relay_to_worker(executor, post_batch, task);
+ break;
+ }
+ case IREE_TASK_TYPE_BARRIER: {
+ // Retire the barrier to (possibly) ready up all dependent tasks.
+ // This acts as a fan-out in cases where the dependent task count >1.
+ iree_task_barrier_retire((iree_task_barrier_t*)task,
+ pending_submission);
+ break;
+ }
+ case IREE_TASK_TYPE_FENCE: {
+ // Scope fence hit; notifies the scope so that anyone waiting on the
+ // fence can be notified without us having to do so explicitly.
+ iree_task_fence_retire((iree_task_fence_t*)task, pending_submission);
+ break;
+ }
+ case IREE_TASK_TYPE_WAIT: {
+ // We should only ever see completed waits here; ones that have yet to
+ // resolve are sent to the poller.
+ iree_task_wait_retire(
+ (iree_task_wait_t*)task, pending_submission,
+ iree_all_bits_set(task->flags, IREE_TASK_FLAG_WAIT_COMPLETED)
+ ? iree_ok_status()
+ : iree_make_status(IREE_STATUS_INTERNAL,
+ "unresolved wait task ended up in the "
+ "executor run queue"));
+ break;
+ }
+ case IREE_TASK_TYPE_DISPATCH: {
+ // Dispatches may need to be issued (fanning out the tiles to workers)
+ // or retired (after all tiles have completed).
+ if (task->flags & IREE_TASK_FLAG_DISPATCH_RETIRE) {
+ iree_task_dispatch_retire((iree_task_dispatch_t*)task,
+ pending_submission);
+ } else {
+ iree_task_dispatch_issue((iree_task_dispatch_t*)task,
+ &executor->transient_task_pool,
+ pending_submission, post_batch);
+ }
+ break;
+ }
+ }
+ }
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_task_executor_merge_submission(iree_task_executor_t* executor,
+ iree_task_submission_t* submission) {
+ // Concatenate all of the incoming tasks into the submission list.
+ // Note that the submission stores tasks in LIFO order such that when they are
+ // put into the LIFO atomic slist they match the order across all concats
+ // (earlier concats are later in the LIFO list).
+ iree_atomic_task_slist_concat(&executor->incoming_ready_slist,
+ submission->ready_list.head,
+ submission->ready_list.tail);
+
+ // Enqueue waiting tasks with the poller immediately: this may issue a
+ // syscall to kick the poller. If we see bad context switches here then we
+ // should split this into an enqueue/flush pair.
+ iree_task_poller_enqueue(&executor->poller, &submission->waiting_list);
+
+ // NOTE: after concatenating the intrusive next_task pointers may immediately
+ // be modified by other threads. We can no longer assume anything about the
+ // submission lists and can only discard them.
+ iree_task_submission_reset(submission);
+}
+
+void iree_task_executor_submit(iree_task_executor_t* executor,
+ iree_task_submission_t* submission) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Concatenate the submitted tasks onto our primary LIFO incoming lists.
+ iree_task_executor_merge_submission(executor, submission);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_task_executor_flush(iree_task_executor_t* executor) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Mostly a no-op today as we aren't deferring submission with the scheduling
+ // mode. Instead, we'll just run the coordinator inline to ensure all tasks
+ // are pushed to workers. This will not wait - but may block.
+ iree_task_executor_coordinate(executor, /*current_worker=*/NULL);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Dispatches tasks in the global submission queue to workers.
+// This is called by users upon submission of new tasks or by workers when they
+// run out of tasks to process. If |current_worker| is provided then tasks will
+// prefer to be routed back to it for immediate processing.
+//
+// If a coordination run ends up with no ready tasks and |current_worker| is
+// provided the calling thread will enter a wait until the worker has more tasks
+// posted to it.
+void iree_task_executor_coordinate(iree_task_executor_t* executor,
+ iree_task_worker_t* current_worker) {
+ iree_slim_mutex_lock(&executor->coordinator_mutex);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // We may be adding tasks/waiting/etc on each pass through coordination - to
+ // ensure we completely drain the incoming queues and satisfied waits we loop
+ // until there's nothing left to coordinate.
+ bool schedule_dirty = true;
+ do {
+ // Check for incoming submissions and move their posted tasks into our
+ // local lists. Any of the tasks here are ready to execute immediately and
+ // ones we should be able to distribute to workers without delay. The
+ // waiting tasks are to the best of the caller's knowledge not ready yet.
+ //
+ // Note that we only do this once per coordination; that's so we don't
+ // starve if submissions come in faster than we can schedule them.
+ // Coordination will run again when workers become idle and will pick up
+ // any changes then.
+ //
+ // As we schedule tasks we may spawn new ones (like a dispatch -> many
+ // dispatch shards) and we keep track of those here. By doing a pass through
+ // all ready tasks and only then merging in the new submission we get
+ // breadth-first traversal of task graphs even if they originate from
+ // various places and have no relation - hopefully leading to better average
+ // latency.
+ iree_task_submission_t pending_submission;
+ iree_task_submission_initialize_from_lifo_slist(
+ &executor->incoming_ready_slist, &pending_submission);
+ if (iree_task_list_is_empty(&pending_submission.ready_list)) break;
+
+ // Scratch coordinator submission batch used during scheduling to batch up
+ // all tasks that will be posted to each worker. We could stash this on the
+ // executor but given that which thread is playing the role of the
+ // coordinator is random it's better to ensure that these bytes never incur
+ // a cache miss by making them live here in the stack of the chosen thread.
+ iree_task_post_batch_t* post_batch =
+ iree_alloca(sizeof(iree_task_post_batch_t) +
+ executor->worker_count * sizeof(iree_task_list_t));
+ iree_task_post_batch_initialize(executor, current_worker, post_batch);
+
+ // Schedule all ready tasks in this batch. Some may complete inline (such
+ // as ready barriers with all their dependencies resolved) while others may
+ // be scheduled on workers via the post batch.
+ iree_task_executor_schedule_ready_tasks(executor, &pending_submission,
+ post_batch);
+
+ // Route waiting tasks to the poller.
+ iree_task_poller_enqueue(&executor->poller,
+ &pending_submission.waiting_list);
+
+ // Post all new work to workers; they may wake and begin executing
+ // immediately. Returns whether this worker has new tasks for it to work on.
+ schedule_dirty = iree_task_post_batch_submit(post_batch);
+ } while (schedule_dirty);
+
+ iree_slim_mutex_unlock(&executor->coordinator_mutex);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_task_t* iree_task_executor_try_steal_task_from_affinity_set(
+ iree_task_executor_t* executor, iree_task_affinity_set_t victim_mask,
+ uint32_t max_theft_attempts, int rotation_offset,
+ iree_task_queue_t* local_task_queue) {
+ if (!victim_mask) return NULL;
+ max_theft_attempts = iree_min(max_theft_attempts,
+ iree_task_affinity_set_count_ones(victim_mask));
+ victim_mask = iree_task_affinity_set_rotr(victim_mask, rotation_offset);
+
+ int worker_index = rotation_offset;
+ iree_task_affinity_set_t mask =
+ iree_task_affinity_set_rotr(victim_mask, worker_index);
+ for (uint32_t i = 0; i < max_theft_attempts; ++i) {
+ // Find the last set bit and skip to it. This avoids the need for doing
+ // a full O(n) scan and instead gets us at O(popcnt) * O(ctz).
+ //
+ // Example: sharing mask = 0b01010101
+ // mask_rotation = 3 (randomly selected)
+ // mask = 0b01010101 rotr 3 = 0b10101010
+ // for (i = 0; i < 4; ++i)
+ // offset = ctz(0b10101010) = 1
+ // mask_rotation += 1 = 4
+ // mask >>= 1 = 0b01010101
+ // victim_index = 4 % 64 = 4
+ int offset = iree_task_affinity_set_count_trailing_zeros(mask);
+ int victim_index = (worker_index + offset) % executor->worker_count;
+ worker_index += offset + 1;
+ mask = iree_shr(mask, offset + 1);
+ iree_task_worker_t* victim_worker = &executor->workers[victim_index];
+
+ // Policy: steal a chunk of tasks at the tail of the victim queue.
+ // This will steal multiple tasks from the victim up to the specified max
+ // and move the them into our local task queue. Not all tasks will be stolen
+ // and the assumption is that over a large-enough random distribution of
+ // thievery taking ~half of the tasks each time (across all queues) will
+ // lead to a relatively even distribution.
+ iree_task_t* task = iree_task_worker_try_steal_task(
+ victim_worker, local_task_queue,
+ /*max_tasks=*/IREE_TASK_EXECUTOR_MAX_THEFT_TASK_COUNT);
+ if (task) return task;
+ }
+
+ // No tasks found in victim_mask.
+ return NULL;
+}
+
+// Tries to steal an entire task from a sibling worker (based on topology).
+// Returns a task that is available (has not yet begun processing at all).
+// May steal multiple tasks and add them to the |local_task_queue|.
+//
+// We do a scan through ideal victims indicated by the
+// |constructive_sharing_mask|; these are the workers most likely to have some
+// cache benefits to taking their work as they share some level of the cache
+// hierarchy and should be better to steal from than any random worker.
+//
+// To prevent biasing any particular victim we use a fast prng function to
+// select where in the set of potential victims defined by the topology
+// group we steal. We (probably) don't need anything super complex here so
+// instead of bouncing around at random we just select the starting point in
+// our search and then go in-order.
+iree_task_t* iree_task_executor_try_steal_task(
+ iree_task_executor_t* executor,
+ iree_task_affinity_set_t constructive_sharing_mask,
+ uint32_t max_theft_attempts, iree_prng_minilcg128_state_t* theft_prng,
+ iree_task_queue_t* local_task_queue) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_task_affinity_set_t worker_live_mask =
+ iree_atomic_task_affinity_set_load(&executor->worker_live_mask,
+ iree_memory_order_acquire);
+ iree_task_affinity_set_t worker_idle_mask =
+ iree_atomic_task_affinity_set_load(&executor->worker_idle_mask,
+ iree_memory_order_relaxed);
+ // Limit the workers we will steal from to the ones that are currently live
+ // and not idle.
+ iree_task_affinity_set_t victim_mask = worker_live_mask & ~worker_idle_mask;
+
+ // TODO(benvanik): it may be possible to rework this such that we better
+ // use the prng; for example, instead of all this rotating stuff we could just
+ // generate an 8-bit number (or even split it into two 4-bit numbers) per
+ // theft attempt. The current rotation strategy is biased toward the same try
+ // ordering vs. what we may really want with an unbiased random selection.
+ int rotation_offset = iree_prng_minilcg128_next_uint8(theft_prng) &
+ (8 * sizeof(iree_task_affinity_set_t) - 1);
+
+ // Try first with the workers we may have some caches shared with. This
+ // helps to prevent cache invalidations/availability updates as it's likely
+ // that we won't need to go back to main memory (or higher cache tiers) in the
+ // event that the thief and victim are running close to each other in time.
+ iree_task_t* task = iree_task_executor_try_steal_task_from_affinity_set(
+ executor, victim_mask & constructive_sharing_mask, max_theft_attempts,
+ rotation_offset, local_task_queue);
+ if (task) {
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, "local");
+ } else {
+ task = iree_task_executor_try_steal_task_from_affinity_set(
+ executor, victim_mask & ~constructive_sharing_mask, max_theft_attempts,
+ rotation_offset, local_task_queue);
+ if (task) {
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, "non-local");
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return task;
+}
+
+iree_status_t iree_task_executor_donate_caller(iree_task_executor_t* executor,
+ iree_wait_source_t wait_source,
+ iree_timeout_t timeout) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Perform an immediate flush/coordination (in case the caller queued).
+ iree_task_executor_flush(executor);
+
+ // Wait until completed.
+ // TODO(benvanik): make this steal tasks until wait_handle resolves?
+ // Somewhat dangerous as we don't know what kind of thread we are running on;
+ // it may have a smaller stack than we are expecting or have some weird thread
+ // local state (FPU rounding modes/etc).
+ iree_status_t status = iree_wait_source_wait_one(wait_source, timeout);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/task/executor.h b/runtime/src/iree/task/executor.h
new file mode 100644
index 0000000..f060ac4
--- /dev/null
+++ b/runtime/src/iree/task/executor.h
@@ -0,0 +1,396 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_EXECUTOR_H_
+#define IREE_TASK_EXECUTOR_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/atomics.h"
+#include "iree/base/internal/event_pool.h"
+#include "iree/task/scope.h"
+#include "iree/task/submission.h"
+#include "iree/task/task.h"
+#include "iree/task/topology.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//==============================================================================
+// IREE Task Executor
+//==============================================================================
+//
+// Roughly models wavefront-style GPU dispatch. Users submit task DAGs with
+// fine-grained dependency information for the executor to schedule across a set
+// of workers. As tasks become ready to execute they are placed into per-worker
+// FIFOs and workers run through them in a breadth-first fashion executing and
+// resolving tasks and building up new waves of ready tasks. Workers will always
+// make forward progress and only when they run out of work will they attempt to
+// self-nominate to play the role of coordinator and schedule any newly-
+// submitted or readied tasks. Only once all tasks have been retired and
+// waits on external resources remain does the task system suspend itself until
+// more tasks are submitted or an external wait resolves.
+//
+// Our goal is to do the minimal amount of work to get the maximum amount of
+// concurrency the user requests or allows (by way of their dependencies).
+// Whether on a single core where you want to timeshare with an application or
+// across hundreds the same architecture holds. Where there is inefficiency it's
+// almost always surmountable with properly constructed tasks: choose the right
+// granularity for dispatches, choose the right fan-out for tiles within those
+// dispatches, choose the right places to insert barriers to force fan-in to
+// reduce memory utilization or right places to batch barriers to allow less
+// synchronization with the work queue, etc. All of those choices are ones this
+// system is designed to handle dynamically via the task graphs provided that
+// are themselves (in the IREE world) mapped 1:1 with the GPU-esque grid
+// dispatch and command buffer model. It's a super-power if a human is authoring
+// all that information but what makes it particularly powerful here is that we
+// are authoring that in the compiler based on a tremendous amount of
+// higher-level information we can derive from the whole program. Every bit of
+// dynamism here is matched with the ability to tighten down the screws and gain
+// back anything lost by way of compiler improvements while also being able to
+// generalize out to far more complex systems (higher parallelism, higher and
+// more efficient concurrency, etc).
+//
+// The design of this system allows for a spectrum of dynamic behavior based on
+// desired usage scenarios:
+// - variable number of persistent workers based on compute/memory topology
+// - per-task scope and per-task worker affinity to control for:
+// - power islands on multi-core systems with fine-grained power management
+// - heterogenous microarchitectures in big.LITTLE/etc compute complexes
+// - task isolation between multiple active requests or users
+// - latency prioritization by partitioning workloads by priority
+// - scheduling overhead tradeoffs by varying:
+// - coordination/flush frequency to reduce cross-thread communication
+// - by statically inserting dispatch shards to avoid dynamic fan-out
+// - thread donation to avoid likely context switches upon submit+wait
+// - multi-wait across all users by sharing a wait set
+// - per-worker work-stealing specification of victim workers in the topology
+// - limited work-stealing to prevent chained stealing/cascading theft
+//
+// Required reading:
+// https://www.usenix.org/conference/osdi20/presentation/ma
+// (closest equivalent to this scheduling model)
+// https://www.cister-labs.pt/summer2017/w3/Parallelism%20-%20Dag%20Model.pdf
+// (good overall, our worker local lists/mailboxes are work-stealing queues)
+// http://people.csail.mit.edu/shanir/publications/Flat%20Combining%20SPAA%2010.pdf
+// (what we model with the coordinator)
+// http://mcg.cs.tau.ac.il/papers/opodis2010-quasi.pdf
+// (we exploit relaxed consistency for all our cross-thread queuing, see ^)
+// https://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++.htm
+// (moodycamel is the state of the art on scaling queues; read it all)
+// https://blog.molecular-matters.com/2015/08/24/job-system-2-0-lock-free-work-stealing-part-1-basics/
+// https://blog.molecular-matters.com/2015/09/08/job-system-2-0-lock-free-work-stealing-part-2-a-specialized-allocator/
+// https://blog.molecular-matters.com/2015/09/25/job-system-2-0-lock-free-work-stealing-part-3-going-lock-free/
+// https://blog.molecular-matters.com/2015/11/09/job-system-2-0-lock-free-work-stealing-part-4-parallel_for/
+// https://blog.molecular-matters.com/2016/04/04/job-system-2-0-lock-free-work-stealing-part-5-dependencies/
+// (fantastic 5 part blog series; very similar to this)
+// http://www.cs.cmu.edu/afs/cs.cmu.edu/Web/People/blelloch/papers/jacm99.pdf
+// (provably optimal dynamic nested parallelism in 1999; basically: GPUs)
+// http://www.cs.cmu.edu/~blelloch/papers/locality2000.pdf
+// (followup to jacm99; using locality now to guide work stealing)
+// https://www.cs.cmu.edu/afs/cs.cmu.edu/Web/People/blelloch/papers/CGK07.pdf
+// (worker affinity and task locality for constructive cache sharing)
+//
+//==============================================================================
+// Life of an iree_task_t / high level algorithm
+//==============================================================================
+//
+// 1. Users allocate (from iree_task_pool_t, slice from arenas, etc) and
+// construct a DAG of iree_task_ts.
+//
+// a. Task dependency information is setup via completion_tasks for simple
+// dependencies, implicit fan-out/fan-in (dispatches), or explicit fan-in
+// (barriers).
+//
+// b. Tasks are pushed into iree_task_submission_t (LIFO, thread-local list).
+// If the task has no initial unmet initial dependencies it is placed into
+// the ready_list. If it is initially waiting on an external resource such
+// as iree_wait_handle_t then it is placed into the waiting_list.
+//
+// 2. iree_task_executor_submit (LIFO, atomic slist)
+// Submissions have their task thread-local lists concatenated into a LIFO
+// incoming_ready_slist or the wait poller shared by the executor.
+//
+// 3. iree_task_executor_flush (or a worker puts on its coordinator hat 🎩)
+//
+// a. Tasks are flushed from the incoming_ready_slist into a coordinator-local
+// FIFO task queue. This centralizes enqueuing from all threads into a
+// single ordered list.
+//
+// b. iree_task_executor_schedule_ready_tasks: walks the FIFO task queue and
+// builds a iree_task_post_batch_t containing the per-worker tasks
+// in LIFO order.
+//
+// c. iree_task_post_batch_submit: per-worker tasks are pushed to their
+// respective iree_task_worker_t mailbox_slist and the workers with new
+// tasks are notified to wake up (if not already awake).
+//
+// 4. iree_task_worker_main_pump_once (LIFO mailbox -> FIFO thread-local list)
+// When either woken or after completing all available thread-local work
+// each worker will check its mailbox_slist to see if any tasks have been
+// posted.
+//
+// a. Tasks are flushed from the LIFO mailbox into the local_task_queue FIFO
+// for the particular worker.
+//
+// b. If the mailbox is empty the worker *may* attempt to steal work from
+// another nearby worker in the topology.
+//
+// c. Any tasks in the local_task_queue are executed until empty.
+// Tasks are retired and dependent tasks (via completion_task or barriers)
+// are made ready and placed in the executor incoming_ready_slist as with
+// iree_task_executor_submit.
+//
+// d. If no more thread-local work is available and the mailbox_slist is
+// empty the worker will self-nominate for coordination and attempt to don
+// the coordinator hat with iree_task_executor_coordinate. If new work
+// becomes available after coordination step 5 repeats.
+//
+// e. If another worker (or iree_task_executor_flush) is already wearing the
+// coordinator hat then the worker will go to sleep.
+//
+//==============================================================================
+// Scaling Down
+//==============================================================================
+//
+// IREE is built at all levels - and both in the compiler and runtime - to scale
+// to different needs. Everything that IREE imposes on the runtime performance
+// and binary size is a spectrum of choices made that allows a user to only pay
+// for what they use.
+//
+// If a deployment scenario does not need complex multithreading and
+// out-of-order execution then this task system can be used in single-threaded
+// mode to at least allow for offloading from the main application thread. In
+// even more constrained scenarios (or embeddings within other systems that have
+// thread pools of their own) it can be used in zero-threaded mode with only
+// donated threads from the user performing work when the user wants it to
+// happen within its control. It still gives the benefits of wave-style
+// scheduling, multi-waiting, locality-aware work distribution, etc as well as
+// giving us a single target interface from the compiler to communicate
+// fine-grained dependency information to the runtime.
+//
+// If the cost of a few KB of data structures and some cheap uncontended atomic
+// linked list concatenations is still scary (it shouldn't be for 95% of uses)
+// then it's also possible to have a HAL driver that doesn't use this task
+// system at all and instead just executes the command buffers directly just
+// like our Vulkan/Metal/etc GPU backends do. Even though I don't recommend that
+// (one wouldn't be saving as much as they think and be losing a lot instead)
+// the layering holds and it can be useful if there's an existing external
+// sophisticated task execution system (ala taskflow) that is already in present
+// in an application.
+//
+// One assertion of IREE is that for models that take more than milliseconds to
+// execute then asynchronous scheduling is almost always worth it even on
+// systems with single cores. The ability to cooperatively schedule model
+// execution allows applications significant control over their total program
+// scheduling behavior; just as on a Commodore 64 you'd have to interrupt work
+// on vsync to begin scanning out pixels to the screen and then resume afterward
+// it's rare to see any system even scaling down to double-digit MHz
+// microcontrollers that doesn't benefit from the ability to cleanly suspend and
+// resume execution.
+//
+// But even if *all* of that is too much, the compile-time representations in
+// the HAL IR are designed to be lowered away: execution modeling does not need
+// to bottom out on a hal.command_buffer.dispatch that maps 1:1 with the runtime
+// iree_hal_command_buffer_dispatch call: dispatch can be lowered into LLVM
+// IR calls and finally into native code to do precisely what you want. The HAL
+// at runtime is a useful abstraction to allow for switching your target
+// execution system (statically or dynamically across deployments) and to share
+// the same execution system across multiple models that may be executing
+// simultaneously but it is _not_ a requirement that the IREE HAL runtime
+// implementation is used. It's called multi-level IR for a reason and the HAL
+// IR is just one level that may have many more below it.
+//
+// So yeah: don't worry. It's almost certain that the thing making or breaking
+// the performance of models over 1ms of execution time is not the HAL, and that
+// in models at or above that scale the benefits we get from being able to
+// holistically schedule the work far outstrip any specialization that can be
+// done by hand. That's to say: only worry about this if your model is literally
+// 4 floats coming from an IMU and a few hundred scalar instructions to predict
+// whether the user is walking, and that shouldn't be using the runtime HAL at
+// all and really likely doesn't benefit from using IREE at any scale - just go
+// straight to LLVM IR from the source.
+//
+//==============================================================================
+// Scaling Up
+//==============================================================================
+//
+// The task system has an implicit limit of 64 workers. This intentional
+// limitation simplifies several parts of the code while also preventing misuse:
+// it rarely (if ever) makes sense to have more than 64 compute-dominated
+// threads working on a single problem. Achieving high performance in such
+// situations requires extremely careful control over the OS scheduler, memory
+// bandwidth consumption, and synchronization. It's always possible to make the
+// problem more compute-bound or very carefully try to fit in specific cache
+// sizes to avoid more constrained bandwidth paths but it's a non-portable
+// whack-a-mole style solution that is in conflict with a lot of what IREE seeks
+// to do with respect to low-latency and multi-tenant workloads.
+//
+// If more than 64 unique L1/L2 caches (or realistically more than probably ~32)
+// are available *and* all of them are attached to the same memory controllers
+// (no NUMA involved) then the solution is straightfoward: use multiple IREE
+// task executors. Either within a process or in separate processes the
+// granularity is coarse enough to not be a burden and changes the problem from
+// needing 100% perfect work scaling of a single task to needing a naive
+// distributed workload solution at the algorithm level.
+//
+// Many useful effects also fall out of solving the work distribution problem.
+// Even for single-tenant workloads being able to split work between two
+// executors allows for natural mappings on NUMA systems or completely
+// independent machines. When supporting multi-tenant workloads (even if the
+// same program is acting as multiple-tenants in a minibatched-style algorithm)
+// the improvements of isolation both in memory access patterns and in variance
+// from potentially bad system behavior dramatically improve: there aren't many
+// opportunities for contention in this system but one can guarantee zero
+// contention by simply not sharing the resources!
+
+// A bitfield specifying the scheduling mode used for configuring how (or if)
+// work is balanced across queues.
+enum iree_task_scheduling_mode_bits_t {
+ // TODO(benvanik): batch, round-robin, FCFS, SJF, etc.
+ // We can also allow for custom scheduling, though I'm skeptical of the value
+ // of that. We should look into what GPUs do in hardware for balancing things
+ // (if anything this sophisticated at all). The potential benefit here is that
+ // we can optimize for offline workloads by allowing each queue to be drained
+ // until blocking - hopefully optimizing cache coherency and reducing the
+ // total memory high-water mark - or optimize for latency across all queues by
+ // taking tasks from all queues equally. There are other more interesting
+ // scheduling strategies such as preferring the widest tasks available from
+ // any queue such that we are keeping as many workers active as possible to
+ // reach peak utilization or artificially limiting which tasks we allow
+ // through to keep certain CPU cores asleep unless absolutely required.
+ IREE_TASK_SCHEDULING_MODE_RESERVED = 0u,
+
+ // Creates all workers suspended and waits until work is first scheduled to
+ // them to resume. This trades off initial blocking startup time waking the
+ // threads for potential latency additions later on as threads take longer to
+ // wake on their first use.
+ //
+ // Prefer this setting in systems where startup time is the priority and work
+ // may not be scheduled for awhile or scheduled unevenly to start; otherwise
+ // the executor creation will take longer and a thundering herd will occur
+ // forcing context switches even if no work is needed.
+ //
+ // Avoid in systems where the latency from initial submission to worker
+ // execution is critical as this will ensure all worker threads are waiting
+ // for their respective wake notifications. The kernel then will be able to
+ // much faster schedule all worker quantums and in many cases all workers will
+ // begin processing simultaneously immediately after the submission is made.
+ IREE_TASK_SCHEDULING_MODE_DEFER_WORKER_STARTUP = 1u << 0,
+};
+typedef uint32_t iree_task_scheduling_mode_t;
+
+// Base task system executor interface.
+typedef struct iree_task_executor_t iree_task_executor_t;
+
+// Creates a task executor using the specified topology.
+//
+// |worker_local_memory_size| defines the bytes to be allocated and reserved for
+// each worker to use for local memory operations. Will be rounded up to the
+// next power of two. Dispatches performed will be able to request up to this
+// amount of memory for their invocations and no more. May be 0 if no worker
+// local memory is required.
+//
+// |topology| is only used during creation and need not live beyond this call.
+// |out_executor| must be released by the caller.
+iree_status_t iree_task_executor_create(
+ iree_task_scheduling_mode_t scheduling_mode,
+ const iree_task_topology_t* topology,
+ iree_host_size_t worker_local_memory_size, iree_allocator_t allocator,
+ iree_task_executor_t** out_executor);
+
+// Retains the given |executor| for the caller.
+void iree_task_executor_retain(iree_task_executor_t* executor);
+
+// Releases the given |executor| from the caller.
+void iree_task_executor_release(iree_task_executor_t* executor);
+
+// Trims pools and caches used by the executor and its workers.
+void iree_task_executor_trim(iree_task_executor_t* executor);
+
+// Returns the number of live workers usable by the executor.
+// The actual number used for any particular operation is dynamic.
+iree_host_size_t iree_task_executor_worker_count(
+ iree_task_executor_t* executor);
+
+// Returns an iree_event_t pool managed by the executor.
+// Users of the task system should acquire their transient events from this.
+// Long-lived events should be allocated on their own in order to avoid
+// expending the pool and harming high-frequency event acquisition.
+iree_event_pool_t* iree_task_executor_event_pool(
+ iree_task_executor_t* executor);
+
+// Acquires a fence for the given |scope| from the executor fence pool.
+iree_status_t iree_task_executor_acquire_fence(iree_task_executor_t* executor,
+ iree_task_scope_t* scope,
+ iree_task_fence_t** out_fence);
+
+// TODO(benvanik): scheduling mode mutation, compute quota control, etc.
+
+// Submits a batch of tasks for execution.
+// The submission represents a DAG of tasks all reachable from the initial
+// submission lists.
+//
+// Ownership of the tasks remains with the caller for the lifetime of the
+// submission unless tasks have a custom pool specified that they can be
+// returned to.
+//
+// Safe to call from any thread. Wait-free but may block for a small duration
+// during initial scheduling of the submitted tasks.
+//
+// NOTE: it's possible for all work in the submission to complete prior to this
+// function returning.
+void iree_task_executor_submit(iree_task_executor_t* executor,
+ iree_task_submission_t* submission);
+
+// Flushes any pending task batches for execution.
+//
+// Safe to call from any thread. Wait-free but may block for a small duration
+// during initial scheduling of the submitted tasks.
+//
+// NOTE: due to races it's possible for new work to arrive from other threads
+// after the flush has occurred but prior to this call returning.
+void iree_task_executor_flush(iree_task_executor_t* executor);
+
+// Donates the calling thread to the executor until either |wait_source|
+// resolves or |timeout| is exceeded. Flushes any pending task batches prior
+// to doing any work or waiting.
+//
+// If there are no tasks available then the calling thread will block as if
+// iree_wait_source_wait_one had been used on |wait_source|. If tasks are ready
+// then the caller will not block prior to starting to perform work on behalf of
+// the executor.
+//
+// Donation is intended as an optimization to elide context switches when the
+// caller would have waited anyway; now instead of performing a kernel wait and
+// most certainly incurring a context switch the caller immediately begins
+// taking work from the queue - likely even prior to any of the executor workers
+// waking (assuming they were idle).
+//
+// Note that donation may not always be strictly a win: the caller may have an
+// arbitrary thread affinity that may cause oversubscription of resources within
+// the topology. This can cause additional contention for compute resources and
+// increase kernel scheduling overhead as threads are swapped or migrated.
+// Measure, measure, measure! If there is any IO that can be performed during
+// the time that a caller would otherwise donate themselves to the executor that
+// should always be preferred as should smaller computation (again to not
+// oversubscribe resources). Treat donation as a hail mary to prevent a kernel
+// wait and not something that will magically make things execute faster.
+// Especially in large applications it's almost certainly better to do something
+// useful with the calling thread (even if that's go to sleep).
+//
+// Safe to call from any thread (though bad to reentrantly call from workers).
+iree_status_t iree_task_executor_donate_caller(iree_task_executor_t* executor,
+ iree_wait_source_t wait_source,
+ iree_timeout_t timeout);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_EXECUTOR_H_
diff --git a/runtime/src/iree/task/executor_demo.cc b/runtime/src/iree/task/executor_demo.cc
new file mode 100644
index 0000000..b8869d8
--- /dev/null
+++ b/runtime/src/iree/task/executor_demo.cc
@@ -0,0 +1,170 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cstddef>
+
+#include "iree/base/internal/prng.h"
+#include "iree/base/tracing.h"
+#include "iree/task/executor.h"
+
+// TODO(benvanik): clean this up into a reasonable demo; it's currently staging
+// area for testing executor behavior across different platforms and topologies.
+
+namespace {
+
+static thread_local volatile uint64_t xxx = 0;
+
+static void simulate_work(const iree_task_tile_context_t* tile_context) {
+ iree_prng_splitmix64_state_t state;
+ iree_prng_splitmix64_initialize(xxx, &state);
+ bool slow = false; // tile_context->workgroup_xyz[0] % 3 == 1;
+ if (tile_context->workgroup_xyz[0] == 128 ||
+ tile_context->workgroup_xyz[0] == 1023) {
+ // Introduce big variance to highlight work stealing.
+ // std::this_thread::sleep_for(std::chrono::milliseconds(1));
+ }
+ for (int i = 0; i < 256 * 1024; ++i) {
+ uint64_t value = iree_prng_splitmix64_next(&state);
+ xxx += value;
+ if (slow) {
+ for (int j = 0; j < 4; ++j) {
+ value = iree_prng_splitmix64_next(&state);
+ xxx += value;
+ }
+ }
+ }
+}
+
+extern "C" int main(int argc, char* argv) {
+ IREE_TRACE_SCOPE0("ExecutorTest::Any");
+
+ iree_allocator_t allocator = iree_allocator_system();
+
+ iree_task_topology_t topology;
+#if 1
+ iree_task_topology_initialize_from_physical_cores(
+ /*max_core_count=*/6, &topology);
+#else
+ iree_task_topology_initialize_from_group_count(/*group_count=*/6, &topology);
+#endif
+
+ iree_task_executor_t* executor = NULL;
+ iree_task_scheduling_mode_t scheduling_mode =
+ IREE_TASK_SCHEDULING_MODE_RESERVED;
+ iree_host_size_t worker_local_memory_size = 0; // 64 * 1024;
+ IREE_CHECK_OK(iree_task_executor_create(scheduling_mode, &topology,
+ worker_local_memory_size, allocator,
+ &executor));
+ iree_task_topology_deinitialize(&topology);
+
+ //
+ iree_task_scope_t scope_a;
+ iree_task_scope_initialize(iree_make_cstring_view("a"), &scope_a);
+
+ //
+ iree_task_call_t call0;
+ iree_task_call_initialize(&scope_a,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE0("call0");
+ IREE_ASSERT_EQ(0, user_context);
+ return iree_ok_status();
+ },
+ 0),
+ &call0);
+
+ const uint32_t workgroup_size_0[3] = {256, 1, 1};
+ const uint32_t workgroup_count_0[3] = {32, 4, 2};
+ iree_task_dispatch_t dispatch0;
+ iree_task_dispatch_initialize(
+ &scope_a,
+ iree_task_make_dispatch_closure(
+ [](void* user_context, const iree_task_tile_context_t* tile_context,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE0("tile0");
+ IREE_ASSERT_EQ(0, user_context);
+ simulate_work(tile_context);
+ iree_atomic_fetch_add_int32(&tile_context->statistics->reserved, 1,
+ iree_memory_order_relaxed);
+ return iree_ok_status();
+ },
+ 0),
+ workgroup_size_0, workgroup_count_0, &dispatch0);
+
+ const uint32_t workgroup_size_1[3] = {128, 1, 1};
+ const uint32_t workgroup_count_1[3] = {16, 2, 1};
+ iree_task_dispatch_t dispatch1;
+ iree_task_dispatch_initialize(
+ &scope_a,
+ iree_task_make_dispatch_closure(
+ [](void* user_context, const iree_task_tile_context_t* tile_context,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE0("tile1");
+ IREE_ASSERT_EQ(0, user_context);
+ simulate_work(tile_context);
+ iree_atomic_fetch_add_int32(&tile_context->statistics->reserved, 1,
+ iree_memory_order_relaxed);
+ return iree_ok_status();
+ },
+ 0),
+ workgroup_size_1, workgroup_count_1, &dispatch1);
+
+ //
+ iree_task_call_t call1;
+ iree_task_call_initialize(&scope_a,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE0("call1");
+ IREE_ASSERT_EQ((void*)1, user_context);
+ return iree_ok_status();
+ },
+ (void*)1),
+ &call1);
+
+#if 1
+ // no barrier between dispatches; fanout
+ iree_task_t* barrier0_tasks[2] = {&dispatch0.header, &dispatch1.header};
+ iree_task_barrier_t barrier0;
+ iree_task_barrier_initialize(&scope_a, IREE_ARRAYSIZE(barrier0_tasks),
+ barrier0_tasks, &barrier0);
+ iree_task_set_completion_task(&call0.header, &barrier0.header);
+ iree_task_set_completion_task(&dispatch0.header, &call1.header);
+ iree_task_set_completion_task(&dispatch1.header, &call1.header);
+#else
+ // barrier between dispatches
+ iree_task_set_completion_task(&call0.header, &dispatch0.header);
+ iree_task_set_completion_task(&dispatch0.header, &dispatch1.header);
+ iree_task_set_completion_task(&dispatch1.header, &call1.header);
+#endif
+
+ // fence
+ iree_task_fence_t* fence0 = NULL;
+ IREE_CHECK_OK(iree_task_executor_acquire_fence(executor, &scope_a, &fence0));
+ iree_task_set_completion_task(&call1.header, &fence0->header);
+
+ //
+ iree_task_submission_t sub0;
+ iree_task_submission_initialize(&sub0);
+ iree_task_submission_enqueue(&sub0, &call0.header);
+ iree_task_executor_submit(executor, &sub0);
+
+ //
+ // iree_task_submission_t sub1;
+ // iree_task_submission_initialize(&sub1);
+ // IREE_CHECK_OK(iree_task_executor_submit(executor, &sub1));
+
+ iree_task_executor_flush(executor);
+
+ IREE_CHECK_OK(iree_task_scope_wait_idle(&scope_a, IREE_TIME_INFINITE_FUTURE));
+
+ iree_task_scope_deinitialize(&scope_a);
+ iree_task_executor_release(executor);
+ return 0;
+}
+
+} // namespace
diff --git a/runtime/src/iree/task/executor_impl.h b/runtime/src/iree/task/executor_impl.h
new file mode 100644
index 0000000..fc1c04b
--- /dev/null
+++ b/runtime/src/iree/task/executor_impl.h
@@ -0,0 +1,151 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_EXECUTOR_IMPL_H_
+#define IREE_TASK_EXECUTOR_IMPL_H_
+
+#include "iree/base/internal/math.h"
+#include "iree/base/internal/prng.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/internal/wait_handle.h"
+#include "iree/base/tracing.h"
+#include "iree/task/affinity_set.h"
+#include "iree/task/executor.h"
+#include "iree/task/list.h"
+#include "iree/task/poller.h"
+#include "iree/task/pool.h"
+#include "iree/task/post_batch.h"
+#include "iree/task/queue.h"
+#include "iree/task/tuning.h"
+#include "iree/task/worker.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+struct iree_task_executor_t {
+ iree_atomic_ref_count_t ref_count;
+ iree_allocator_t allocator;
+
+ // Defines how work is selected across queues.
+ // TODO(benvanik): make mutable; currently always the same reserved value.
+ iree_task_scheduling_mode_t scheduling_mode;
+
+ // State used by the work-stealing operations performed by donated threads.
+ // This is **NOT SYNCHRONIZED** and relies on the fact that we actually don't
+ // much care about the precise selection of workers enough to mind any tears
+ // we get in the PRNG state that lives inside. Cache write-back order and
+ // incidental cache line availability/visibility update frequency is like an
+ // extra layer of PRNG anyway ;)
+ iree_prng_minilcg128_state_t donation_theft_prng;
+
+ // Pools of transient dispatch tasks shared across all workers.
+ // Depending on configuration the task pool may allocate after creation using
+ // the allocator provided upon executor creation.
+ //
+ // Sized to be able to fit at least:
+ // iree_task_fence_t
+ // iree_task_dispatch_shard_t
+ // Increasing the size larger than these will waste memory.
+ iree_task_pool_t transient_task_pool;
+
+ // A list of incoming tasks that are ready to execute immediately.
+ // The list is LIFO and we require that task lists are reversed by the
+ // submitter so we can use iree_atomic_slist_concat to quickly prepend the
+ // LIFO list to the atomic slist. By doing this we can construct the task
+ // lists in LIFO order prior to submission, concat with a pointer swap into
+ // this list, flush from the list in LIFO order during coordination, and do a
+ // single LIFO->FIFO conversion while distributing work. What could have been
+ // half a dozen task list pointer walks and inverted sequential memory access
+ // becomes one.
+ //
+ // Example:
+ // existing tasks: C B A
+ // new tasks: 1 2 3
+ // updated tasks: 3 2 1 C B A
+ iree_atomic_task_slist_t incoming_ready_slist;
+
+ // iree_event_t pool used to acquire system wait handles.
+ // Many subsystems interacting with the executor will need events to park
+ // their work in the wait set and sharing the pool across all of them ensures
+ // we limit the number we have outstanding and avoid syscalls to allocate
+ // them.
+ iree_event_pool_t* event_pool;
+
+ // Guards coordination logic; only one thread at a time may be acting as the
+ // coordinator.
+ iree_slim_mutex_t coordinator_mutex;
+
+ // Wait task polling and wait thread manager.
+ // This handles all system waits so that we can keep the syscalls off the
+ // worker threads and lower wake latencies (the wait thread can enqueue
+ // completed waits immediately after they resolve instead of waiting for
+ // existing computation on the workers to finish).
+ iree_task_poller_t poller;
+
+ // A bitset indicating which workers are live and usable; all attempts to
+ // push work onto a particular worker should check first with this mask. This
+ // may change over time either automatically or by user request ("don't use
+ // these cores for awhile I'm going to be using them" etc).
+ iree_atomic_task_affinity_set_t worker_live_mask;
+
+ // A bitset indicating which workers may be suspended and need to be resumed
+ // via iree_thread_resume prior to them being able to execute work.
+ iree_atomic_task_affinity_set_t worker_suspend_mask;
+
+ // A bitset indicating which workers are currently idle. Used to bias incoming
+ // tasks to workers that aren't doing much else. This is a balance of latency
+ // to wake the idle workers vs. latency to wait for existing work to complete
+ // on already woken workers.
+ iree_atomic_task_affinity_set_t worker_idle_mask;
+
+ // Specifies how many workers threads there are.
+ // For now this number is fixed per executor however if we wanted to enable
+ // live join/leave behavior we could change this to a registration mechanism.
+ iree_host_size_t worker_count;
+ iree_task_worker_t* workers; // [worker_count]
+};
+
+// Merges a submission into the primary FIFO queues.
+// Coordinators will fetch items from here as workers demand them but otherwise
+// not be notified of the changes (waiting until coordination runs again).
+//
+// May be called from any thread.
+void iree_task_executor_merge_submission(iree_task_executor_t* executor,
+ iree_task_submission_t* submission);
+
+// Schedules all ready tasks in the |pending_submission| list.
+// Only called during coordination and expects the coordinator lock to be held.
+void iree_task_executor_schedule_ready_tasks(
+ iree_task_executor_t* executor, iree_task_submission_t* pending_submission,
+ iree_task_post_batch_t* post_batch);
+
+// Dispatches tasks in the global submission queue to workers.
+// |current_worker| will be NULL if called from a non-worker thread and
+// otherwise be the current worker; used to avoid round-tripping through the
+// whole system to post to oneself.
+//
+// If the |current_worker| has no more work remaining then the calling thread
+// may wait on any pending wait tasks until one resolves or more work is
+// scheduled for the worker. If no worker is provided the call will return
+// without waiting.
+void iree_task_executor_coordinate(iree_task_executor_t* executor,
+ iree_task_worker_t* current_worker);
+
+// Tries to steal an entire task from a sibling worker (based on topology).
+// Returns a task that is available (has not yet begun processing at all).
+// May steal multiple tasks and add them to the |local_task_queue|.
+iree_task_t* iree_task_executor_try_steal_task(
+ iree_task_executor_t* executor,
+ iree_task_affinity_set_t constructive_sharing_mask,
+ uint32_t max_theft_attempts, iree_prng_minilcg128_state_t* theft_prng,
+ iree_task_queue_t* local_task_queue);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_EXECUTOR_IMPL_H_
diff --git a/runtime/src/iree/task/executor_test.cc b/runtime/src/iree/task/executor_test.cc
new file mode 100644
index 0000000..7e96a8e
--- /dev/null
+++ b/runtime/src/iree/task/executor_test.cc
@@ -0,0 +1,139 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/executor.h"
+
+#include <cstddef>
+
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace {
+
+// Tests that an executor can be created and destroyed repeatedly without
+// running out of system resources. Since all systems are different there's no
+// guarantee this will fail but it does give ASAN/TSAN some nice stuff to chew
+// on.
+TEST(ExecutorTest, Lifetime) {
+ iree_task_topology_t topology;
+ iree_task_topology_initialize_from_group_count(/*group_count=*/4, &topology);
+
+ for (int i = 0; i < 100; ++i) {
+ iree_task_executor_t* executor = NULL;
+ iree_task_scheduling_mode_t scheduling_mode =
+ IREE_TASK_SCHEDULING_MODE_RESERVED;
+ iree_host_size_t worker_local_memory_size = 64 * 1024;
+ IREE_ASSERT_OK(iree_task_executor_create(
+ scheduling_mode, &topology, worker_local_memory_size,
+ iree_allocator_system(), &executor));
+ // -- idle --
+ iree_task_executor_release(executor);
+ }
+
+ iree_task_topology_deinitialize(&topology);
+}
+
+// Tests lifetime when issuing submissions before exiting.
+// This tries to catch races in shutdown with pending work.
+TEST(ExecutorTest, LifetimeStress) {
+ iree_task_topology_t topology;
+ iree_task_topology_initialize_from_group_count(/*group_count=*/4, &topology);
+
+ for (int i = 0; i < 100; ++i) {
+ iree_task_executor_t* executor = NULL;
+ iree_task_scheduling_mode_t scheduling_mode =
+ IREE_TASK_SCHEDULING_MODE_RESERVED;
+ iree_host_size_t worker_local_memory_size = 64 * 1024;
+ IREE_ASSERT_OK(iree_task_executor_create(
+ scheduling_mode, &topology, worker_local_memory_size,
+ iree_allocator_system(), &executor));
+ iree_task_scope_t scope;
+ iree_task_scope_initialize(iree_make_cstring_view("scope"), &scope);
+
+ static std::atomic<int> received_value = {0};
+ iree_task_call_t call;
+ iree_task_call_initialize(
+ &scope,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ received_value = (int)(uintptr_t)user_context;
+ return iree_ok_status();
+ },
+ (void*)(uintptr_t)i),
+ &call);
+
+ iree_task_fence_t* fence = NULL;
+ IREE_ASSERT_OK(iree_task_executor_acquire_fence(executor, &scope, &fence));
+ iree_task_set_completion_task(&call.header, &fence->header);
+
+ iree_task_submission_t submission;
+ iree_task_submission_initialize(&submission);
+ iree_task_submission_enqueue(&submission, &call.header);
+ iree_task_executor_submit(executor, &submission);
+ iree_task_executor_flush(executor);
+ IREE_ASSERT_OK(
+ iree_task_scope_wait_idle(&scope, IREE_TIME_INFINITE_FUTURE));
+
+ EXPECT_EQ(received_value, i) << "call did not correlate to loop";
+
+ iree_task_scope_deinitialize(&scope);
+ iree_task_executor_release(executor);
+ }
+
+ iree_task_topology_deinitialize(&topology);
+}
+
+// Tests heavily serialized submission to an executor.
+// This puts pressure on the overheads involved in spilling up threads.
+TEST(ExecutorTest, SubmissionStress) {
+ iree_task_topology_t topology;
+ iree_task_topology_initialize_from_group_count(/*group_count=*/4, &topology);
+ iree_task_executor_t* executor = NULL;
+ iree_task_scheduling_mode_t scheduling_mode =
+ IREE_TASK_SCHEDULING_MODE_RESERVED;
+ iree_host_size_t worker_local_memory_size = 64 * 1024;
+ IREE_ASSERT_OK(iree_task_executor_create(scheduling_mode, &topology,
+ worker_local_memory_size,
+ iree_allocator_system(), &executor));
+ iree_task_scope_t scope;
+ iree_task_scope_initialize(iree_make_cstring_view("scope"), &scope);
+
+ for (int i = 0; i < 1000; ++i) {
+ static std::atomic<int> received_value = {0};
+ iree_task_call_t call;
+ iree_task_call_initialize(
+ &scope,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ received_value = (int)(uintptr_t)user_context;
+ return iree_ok_status();
+ },
+ (void*)(uintptr_t)i),
+ &call);
+
+ iree_task_fence_t* fence = NULL;
+ IREE_ASSERT_OK(iree_task_executor_acquire_fence(executor, &scope, &fence));
+ iree_task_set_completion_task(&call.header, &fence->header);
+
+ iree_task_submission_t submission;
+ iree_task_submission_initialize(&submission);
+ iree_task_submission_enqueue(&submission, &call.header);
+ iree_task_executor_submit(executor, &submission);
+ iree_task_executor_flush(executor);
+ IREE_ASSERT_OK(
+ iree_task_scope_wait_idle(&scope, IREE_TIME_INFINITE_FUTURE));
+
+ EXPECT_EQ(received_value, i) << "call did not correlate to loop";
+ }
+
+ iree_task_scope_deinitialize(&scope);
+ iree_task_executor_release(executor);
+ iree_task_topology_deinitialize(&topology);
+}
+
+} // namespace
diff --git a/runtime/src/iree/task/list.c b/runtime/src/iree/task/list.c
new file mode 100644
index 0000000..765e0b6
--- /dev/null
+++ b/runtime/src/iree/task/list.c
@@ -0,0 +1,207 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/list.h"
+
+#include <string.h>
+
+void iree_atomic_task_slist_discard(iree_atomic_task_slist_t* slist) {
+ iree_task_list_t discard_list;
+ iree_task_list_initialize(&discard_list);
+ iree_task_list_append_from_fifo_slist(&discard_list, slist);
+ iree_task_list_discard(&discard_list);
+}
+
+void iree_task_list_initialize(iree_task_list_t* out_list) {
+ memset(out_list, 0, sizeof(*out_list));
+}
+
+void iree_task_list_move(iree_task_list_t* list, iree_task_list_t* out_list) {
+ memcpy(out_list, list, sizeof(*out_list));
+ memset(list, 0, sizeof(*list));
+}
+
+void iree_task_list_discard(iree_task_list_t* list) {
+ // Fixed point iteration over the task list and all its transitive dependent
+ // tasks that get discarded. This is in contrast to a recursive discard that
+ // could potentially be thousands of calls deep in a large graph.
+ while (!iree_task_list_is_empty(list)) {
+ iree_task_t* task = iree_task_list_pop_front(list);
+ iree_task_discard(task, list);
+ task = NULL; // invalidated during discard
+ }
+}
+
+bool iree_task_list_is_empty(const iree_task_list_t* list) {
+ return list->head == NULL;
+}
+
+iree_host_size_t iree_task_list_calculate_size(const iree_task_list_t* list) {
+ iree_host_size_t count = 0;
+ iree_task_t* p = list->head;
+ while (p) {
+ ++count;
+ p = p->next_task;
+ }
+ return count;
+}
+
+iree_task_t* iree_task_list_front(iree_task_list_t* list) { return list->head; }
+
+iree_task_t* iree_task_list_back(iree_task_list_t* list) { return list->tail; }
+
+void iree_task_list_push_back(iree_task_list_t* list, iree_task_t* task) {
+ if (!list->head) {
+ list->head = task;
+ }
+ if (list->tail) {
+ list->tail->next_task = task;
+ }
+ list->tail = task;
+ task->next_task = NULL;
+}
+
+void iree_task_list_push_front(iree_task_list_t* list, iree_task_t* task) {
+ task->next_task = list->head;
+ list->head = task;
+ if (!list->tail) {
+ list->tail = task;
+ }
+}
+
+iree_task_t* iree_task_list_pop_front(iree_task_list_t* list) {
+ if (!list->head) return NULL;
+ iree_task_t* task = list->head;
+ list->head = task->next_task;
+ if (list->tail == task) {
+ list->tail = NULL;
+ }
+ task->next_task = NULL;
+ return task;
+}
+
+void iree_task_list_erase(iree_task_list_t* list, iree_task_t* prev_task,
+ iree_task_t* task) {
+ if (task == list->head) {
+ // Removing head (which may _also_ be the tail).
+ list->head = task->next_task;
+ if (list->tail == task) list->tail = task->next_task;
+ } else if (task == list->tail) {
+ // Removing tail.
+ list->tail = prev_task;
+ prev_task->next_task = NULL;
+ } else {
+ // Removing inner.
+ prev_task->next_task = task->next_task;
+ }
+ task->next_task = NULL;
+}
+
+void iree_task_list_prepend(iree_task_list_t* list, iree_task_list_t* prefix) {
+ if (iree_task_list_is_empty(prefix)) return;
+ if (iree_task_list_is_empty(list)) {
+ list->head = prefix->head;
+ list->tail = prefix->tail;
+ } else {
+ prefix->tail->next_task = list->head;
+ list->head = prefix->head;
+ }
+ memset(prefix, 0, sizeof(*prefix));
+}
+
+void iree_task_list_append(iree_task_list_t* list, iree_task_list_t* suffix) {
+ if (iree_task_list_is_empty(suffix)) return;
+ if (iree_task_list_is_empty(list)) {
+ list->head = suffix->head;
+ list->tail = suffix->tail;
+ } else {
+ list->tail->next_task = suffix->head;
+ list->tail = suffix->tail;
+ }
+ memset(suffix, 0, sizeof(*suffix));
+}
+
+void iree_task_list_append_from_fifo_slist(iree_task_list_t* list,
+ iree_atomic_task_slist_t* slist) {
+ iree_task_list_t suffix;
+ iree_task_list_initialize(&suffix);
+ if (!iree_atomic_task_slist_flush(
+ slist, IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_FIFO, &suffix.head,
+ &suffix.tail)) {
+ return; // empty
+ }
+ iree_task_list_append(list, &suffix);
+}
+
+void iree_task_list_reverse(iree_task_list_t* list) {
+ if (iree_task_list_is_empty(list)) return;
+ iree_task_t* tail = list->head;
+ iree_task_t* head = list->tail;
+ iree_task_t* p = list->head;
+ do {
+ iree_task_t* next = p->next_task;
+ p->next_task = head;
+ head = p;
+ p = next;
+ } while (p != NULL);
+ tail->next_task = NULL;
+ list->head = head;
+ list->tail = tail;
+}
+
+void iree_task_list_split(iree_task_list_t* head_list,
+ iree_host_size_t max_tasks,
+ iree_task_list_t* out_tail_list) {
+ iree_task_list_initialize(out_tail_list);
+ if (head_list->head == NULL) return;
+ if (head_list->head == head_list->tail) {
+ // 1 task in the source list; always prefer to steal it.
+ // This is because the victim is likely working on their last item and we
+ // can help them out by popping this off. It also has the side-effect of
+ // handling cases of donated workers wanting to steal all tasks to
+ // synchronously execute things.
+ iree_task_list_move(head_list, out_tail_list);
+ return;
+ }
+
+ // Walk through the |head_list| with two iterators; one at double-rate.
+ // If we ever notice this function showing up in profiling then we should
+ // build an acceleration structure to avoid the full walk of the first half
+ // (e.g. skip list).
+ iree_task_t* p_x1_m1 = head_list->head; // p_x1 - 1 (previous to p_x1)
+ iree_task_t* p_x1 = head_list->head; // x1 speed ptr
+ iree_task_t* p_x2 = head_list->head; // x2 speed ptr
+ while (p_x2->next_task != NULL) {
+ p_x1_m1 = p_x1;
+ p_x1 = p_x1->next_task;
+ p_x2 = p_x2->next_task;
+ if (p_x2->next_task) p_x2 = p_x2->next_task;
+ }
+
+ // p_x1 now points at the half way point in the head_list. This is where we
+ // *start* our windowed walk for pulling out max_tasks, implicitly limiting us
+ // to take at most half of the tasks from the list.
+
+ // Advance the tail list keeping an iterator -max_tasks back; when we hit the
+ // end we have our head and tail to form the list.
+ iree_task_t* p_window_prev = p_x1_m1;
+ iree_task_t* p_window_head = p_x1;
+ iree_task_t* p_window_tail = p_x1;
+ while (p_window_tail->next_task != NULL && --max_tasks > 0) {
+ p_window_tail = p_window_tail->next_task;
+ }
+ while (p_window_tail->next_task != NULL) {
+ p_window_prev = p_window_head;
+ p_window_head = p_window_head->next_task;
+ p_window_tail = p_window_tail->next_task;
+ }
+
+ head_list->tail = p_window_prev;
+ p_window_prev->next_task = NULL;
+
+ out_tail_list->head = p_window_head;
+ out_tail_list->tail = p_window_tail;
+}
diff --git a/runtime/src/iree/task/list.h b/runtime/src/iree/task/list.h
new file mode 100644
index 0000000..ee35361
--- /dev/null
+++ b/runtime/src/iree/task/list.h
@@ -0,0 +1,109 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_LIST_H_
+#define IREE_TASK_LIST_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/atomic_slist.h"
+#include "iree/task/task.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// iree_atomic_task_slist_t, an atomic approximately LIFO singly-linked list.
+// iree_task_list_t should be preferred when working with
+// uncontended/thread-local lists as it has no overhead, while the
+// iree_atomic_task_slist_t should be used when multiple threads may need to
+// share lists of tasks (free lists, mailboxes, etc).
+IREE_TYPED_ATOMIC_SLIST_WRAPPER(iree_atomic_task, iree_task_t,
+ offsetof(iree_task_t, next_task));
+
+// Discards a task list; should be used for failure cleanup during list
+// construction to ensure intrusive pointers are reset.
+void iree_atomic_task_slist_discard(iree_atomic_task_slist_t* slist);
+
+// A singly-linked list of tasks using the embedded task next_task pointer.
+//
+// Thread-compatible; designed to be used from a single thread manipulating a
+// list for passing to an API that accepts lists.
+typedef struct iree_task_list_t {
+ iree_task_t* head;
+ iree_task_t* tail;
+} iree_task_list_t;
+
+// Initializes an empty task list.
+void iree_task_list_initialize(iree_task_list_t* out_list);
+
+// Moves |list| into |out_list|, leaving |list| empty.
+void iree_task_list_move(iree_task_list_t* list, iree_task_list_t* out_list);
+
+// Discards a task list; should be used for failure cleanup during list
+// construction to ensure intrusive pointers are reset. List is immediately
+// reusable as if it had been initialized.
+void iree_task_list_discard(iree_task_list_t* list);
+
+// Returns true if the list is empty.
+bool iree_task_list_is_empty(const iree_task_list_t* list);
+
+// Counts the total number of tasks in the list.
+// WARNING: this requires an O(n) walk of the entire list; use this only for
+// debugging or when the list is known to be small and hot in cache.
+iree_host_size_t iree_task_list_calculate_size(const iree_task_list_t* list);
+
+// Returns the first task in the list, if any.
+iree_task_t* iree_task_list_front(iree_task_list_t* list);
+
+// Returns the last task in the list, if any.
+iree_task_t* iree_task_list_back(iree_task_list_t* list);
+
+// Pushes a task onto the back of the task list. The task list takes ownership
+// of |task|.
+void iree_task_list_push_back(iree_task_list_t* list, iree_task_t* task);
+
+// Pushes a task onto the front of the task list. The task list takes ownership
+// of |task|.
+void iree_task_list_push_front(iree_task_list_t* list, iree_task_t* task);
+
+// Pops a task from the front of the task list or returns NULL if the list is
+// empty. Caller takes ownership of the returned task.
+iree_task_t* iree_task_list_pop_front(iree_task_list_t* list);
+
+// Erases |task| from the list.
+// |prev_task| must point to the task immediately prior to |task| in the list
+// or NULL if the task was at the head.
+void iree_task_list_erase(iree_task_list_t* list, iree_task_t* prev_task,
+ iree_task_t* task);
+
+// Prepends |prefix| onto the beginning of |list|. |prefix| will be reset.
+void iree_task_list_prepend(iree_task_list_t* list, iree_task_list_t* prefix);
+
+// Appends |suffix| onto the end of |list|. |suffix| will be reset.
+void iree_task_list_append(iree_task_list_t* list, iree_task_list_t* suffix);
+
+// Flushes the given |slist| and appends all tasks to the list in FIFO order.
+void iree_task_list_append_from_fifo_slist(iree_task_list_t* list,
+ iree_atomic_task_slist_t* slist);
+
+// Reverses the list in-place.
+// Requires a full O(n) traversal.
+void iree_task_list_reverse(iree_task_list_t* list);
+
+// Splits |head_list| in half (up to |max_tasks|) and retains the first half
+// in |head_list| and the second half in |tail_list|.
+void iree_task_list_split(iree_task_list_t* head_list,
+ iree_host_size_t max_tasks,
+ iree_task_list_t* out_tail_list);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_LIST_H_
diff --git a/runtime/src/iree/task/list_test.cc b/runtime/src/iree/task/list_test.cc
new file mode 100644
index 0000000..c5cb5b2
--- /dev/null
+++ b/runtime/src/iree/task/list_test.cc
@@ -0,0 +1,655 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/list.h"
+
+#include "iree/task/testing/test_util.h"
+#include "iree/testing/gtest.h"
+
+namespace {
+
+TEST(TaskListTest, Empty) {
+ iree_task_list_t list;
+ iree_task_list_initialize(&list);
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+ EXPECT_EQ(0, iree_task_list_calculate_size(&list));
+ iree_task_list_discard(&list);
+}
+
+TEST(TaskListTest, CalculateSize) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list;
+ iree_task_list_initialize(&list);
+
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+ EXPECT_EQ(0, iree_task_list_calculate_size(&list));
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_back(&list, task0);
+ EXPECT_FALSE(iree_task_list_is_empty(&list));
+ EXPECT_EQ(1, iree_task_list_calculate_size(&list));
+
+ iree_task_list_push_back(&list, task1);
+ EXPECT_EQ(2, iree_task_list_calculate_size(&list));
+ iree_task_list_push_back(&list, task2);
+ EXPECT_EQ(3, iree_task_list_calculate_size(&list));
+ iree_task_list_push_back(&list, task3);
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list));
+}
+
+TEST(TaskListTest, Move) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list_a, list_b;
+ iree_task_list_initialize(&list_a);
+ iree_task_list_initialize(&list_b);
+
+ EXPECT_TRUE(iree_task_list_is_empty(&list_a));
+ EXPECT_TRUE(iree_task_list_is_empty(&list_b));
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+ iree_task_list_push_back(&list_a, task0);
+ iree_task_list_push_back(&list_a, task1);
+ iree_task_list_push_back(&list_a, task2);
+ iree_task_list_push_back(&list_a, task3);
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list_a));
+ EXPECT_TRUE(CheckListOrderFIFO(&list_a));
+
+ iree_task_list_move(&list_a, &list_b);
+ EXPECT_TRUE(iree_task_list_is_empty(&list_a));
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list_b));
+ EXPECT_TRUE(CheckListOrderFIFO(&list_b));
+}
+
+TEST(TaskListTest, DiscardEmpty) {
+ iree_task_list_t list;
+ iree_task_list_initialize(&list);
+
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+ iree_task_list_discard(&list);
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+}
+
+TEST(TaskListTest, Discard) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list;
+ iree_task_list_initialize(&list);
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+ iree_task_list_push_back(&list, task0);
+ iree_task_list_push_back(&list, task1);
+ iree_task_list_push_back(&list, task2);
+ iree_task_list_push_back(&list, task3);
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list));
+ EXPECT_TRUE(CheckListOrderFIFO(&list));
+
+ iree_task_list_discard(&list);
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+
+ // IMPLICIT: if the tasks were not released back to the pool we'll leak.
+}
+
+TEST(TaskListTest, DiscardSequence) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list;
+ iree_task_list_initialize(&list);
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+ iree_task_set_completion_task(task0, task1);
+ iree_task_set_completion_task(task1, task2);
+ iree_task_set_completion_task(task2, task3);
+ iree_task_list_push_back(&list, task0);
+ iree_task_list_push_back(&list, task1);
+ iree_task_list_push_back(&list, task2);
+ iree_task_list_push_back(&list, task3);
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list));
+ EXPECT_TRUE(CheckListOrderFIFO(&list));
+
+ iree_task_list_discard(&list);
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+
+ // IMPLICIT: if the tasks were not released back to the pool we'll leak.
+}
+
+TEST(TaskListTest, DiscardJoin) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list;
+ iree_task_list_initialize(&list);
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+ iree_task_set_completion_task(task0, task3);
+ iree_task_set_completion_task(task1, task3);
+ iree_task_set_completion_task(task2, task3);
+ iree_task_list_push_back(&list, task0);
+ iree_task_list_push_back(&list, task1);
+ iree_task_list_push_back(&list, task2);
+ iree_task_list_push_back(&list, task3);
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list));
+ EXPECT_TRUE(CheckListOrderFIFO(&list));
+
+ iree_task_list_discard(&list);
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+
+ // IMPLICIT: if the tasks were not released back to the pool we'll leak.
+}
+
+TEST(TaskListTest, PushFront) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list;
+ iree_task_list_initialize(&list);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_front(&list, task0);
+ iree_task_list_push_front(&list, task1);
+ iree_task_list_push_front(&list, task2);
+ iree_task_list_push_front(&list, task3);
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list));
+ EXPECT_TRUE(CheckListOrderLIFO(&list));
+
+ EXPECT_EQ(3, iree_task_list_pop_front(&list)->flags);
+ EXPECT_EQ(2, iree_task_list_pop_front(&list)->flags);
+ EXPECT_EQ(1, iree_task_list_pop_front(&list)->flags);
+ EXPECT_EQ(0, iree_task_list_pop_front(&list)->flags);
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+}
+
+TEST(TaskListTest, PopFront) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list;
+ iree_task_list_initialize(&list);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_back(&list, task0);
+ iree_task_list_push_back(&list, task1);
+ iree_task_list_push_back(&list, task2);
+ iree_task_list_push_back(&list, task3);
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list));
+ EXPECT_TRUE(CheckListOrderFIFO(&list));
+
+ EXPECT_EQ(0, iree_task_list_pop_front(&list)->flags);
+ EXPECT_EQ(1, iree_task_list_pop_front(&list)->flags);
+ EXPECT_EQ(2, iree_task_list_pop_front(&list)->flags);
+ EXPECT_EQ(3, iree_task_list_pop_front(&list)->flags);
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+}
+
+TEST(TaskListTest, Erase) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list;
+ iree_task_list_initialize(&list);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_back(&list, task0);
+ iree_task_list_push_back(&list, task1);
+ iree_task_list_push_back(&list, task2);
+ iree_task_list_push_back(&list, task3);
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list));
+ EXPECT_TRUE(CheckListOrderFIFO(&list));
+
+ // Remove head.
+ iree_task_list_erase(&list, NULL, task0);
+ EXPECT_EQ(3, iree_task_list_calculate_size(&list));
+ EXPECT_TRUE(CheckListOrderFIFO(&list));
+ EXPECT_EQ(task1, iree_task_list_front(&list));
+
+ // Remove tail.
+ iree_task_list_erase(&list, task2, task3);
+ EXPECT_EQ(2, iree_task_list_calculate_size(&list));
+ EXPECT_TRUE(CheckListOrderFIFO(&list));
+ EXPECT_EQ(task2, iree_task_list_back(&list));
+
+ // Remove the rest.
+ iree_task_list_erase(&list, task1, task2);
+ EXPECT_EQ(1, iree_task_list_calculate_size(&list));
+ EXPECT_TRUE(CheckListOrderFIFO(&list));
+ EXPECT_EQ(task1, iree_task_list_front(&list));
+ EXPECT_EQ(task1, iree_task_list_back(&list));
+
+ iree_task_list_erase(&list, NULL, task1);
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+ EXPECT_EQ(NULL, iree_task_list_front(&list));
+ EXPECT_EQ(NULL, iree_task_list_back(&list));
+}
+
+TEST(TaskListTest, PrependEmpty) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list_a, list_b;
+ iree_task_list_initialize(&list_a);
+ iree_task_list_initialize(&list_b);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+
+ iree_task_list_push_back(&list_a, task0);
+ iree_task_list_push_back(&list_a, task1);
+
+ EXPECT_TRUE(iree_task_list_is_empty(&list_b));
+ iree_task_list_prepend(&list_a, &list_b);
+ EXPECT_EQ(2, iree_task_list_calculate_size(&list_a));
+ EXPECT_TRUE(CheckListOrderFIFO(&list_a));
+}
+
+TEST(TaskListTest, PrependIntoEmpty) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list_a, list_b;
+ iree_task_list_initialize(&list_a);
+ iree_task_list_initialize(&list_b);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_back(&list_b, task0);
+ iree_task_list_push_back(&list_b, task1);
+ iree_task_list_push_back(&list_b, task2);
+ iree_task_list_push_back(&list_b, task3);
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list_b));
+ EXPECT_TRUE(CheckListOrderFIFO(&list_b));
+
+ EXPECT_TRUE(iree_task_list_is_empty(&list_a));
+ iree_task_list_prepend(&list_a, &list_b);
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list_a));
+ EXPECT_TRUE(CheckListOrderFIFO(&list_a));
+ EXPECT_TRUE(iree_task_list_is_empty(&list_b));
+}
+
+TEST(TaskListTest, PrependInto1) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list_a, list_b;
+ iree_task_list_initialize(&list_a);
+ iree_task_list_initialize(&list_b);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_back(&list_b, task0);
+ iree_task_list_push_back(&list_b, task1);
+ iree_task_list_push_back(&list_b, task2);
+
+ iree_task_list_push_back(&list_a, task3);
+ iree_task_list_prepend(&list_a, &list_b);
+
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list_a));
+ EXPECT_TRUE(CheckListOrderFIFO(&list_a));
+ EXPECT_TRUE(iree_task_list_is_empty(&list_b));
+}
+
+TEST(TaskListTest, PrependInto2) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list_a, list_b;
+ iree_task_list_initialize(&list_a);
+ iree_task_list_initialize(&list_b);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_back(&list_b, task0);
+ iree_task_list_push_back(&list_b, task1);
+ iree_task_list_push_back(&list_a, task2);
+ iree_task_list_push_back(&list_a, task3);
+ iree_task_list_prepend(&list_a, &list_b);
+
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list_a));
+ EXPECT_TRUE(CheckListOrderFIFO(&list_a));
+ EXPECT_TRUE(iree_task_list_is_empty(&list_b));
+}
+
+TEST(TaskListTest, AppendIntoEmpty) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list_a, list_b;
+ iree_task_list_initialize(&list_a);
+ iree_task_list_initialize(&list_b);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_back(&list_b, task0);
+ iree_task_list_push_back(&list_b, task1);
+ iree_task_list_push_back(&list_b, task2);
+ iree_task_list_push_back(&list_b, task3);
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list_b));
+ EXPECT_TRUE(CheckListOrderFIFO(&list_b));
+
+ EXPECT_TRUE(iree_task_list_is_empty(&list_a));
+ iree_task_list_append(&list_a, &list_b);
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list_a));
+ EXPECT_TRUE(CheckListOrderFIFO(&list_a));
+ EXPECT_TRUE(iree_task_list_is_empty(&list_b));
+}
+
+TEST(TaskListTest, AppendInto1) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list_a, list_b;
+ iree_task_list_initialize(&list_a);
+ iree_task_list_initialize(&list_b);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_back(&list_b, task1);
+ iree_task_list_push_back(&list_b, task2);
+
+ iree_task_list_push_back(&list_b, task3);
+ iree_task_list_push_back(&list_a, task0);
+
+ iree_task_list_append(&list_a, &list_b);
+
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list_a));
+ EXPECT_TRUE(CheckListOrderFIFO(&list_a));
+ EXPECT_TRUE(iree_task_list_is_empty(&list_b));
+}
+
+TEST(TaskListTest, AppendInto2) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list_a, list_b;
+ iree_task_list_initialize(&list_a);
+ iree_task_list_initialize(&list_b);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_back(&list_b, task2);
+ iree_task_list_push_back(&list_b, task3);
+
+ iree_task_list_push_back(&list_a, task0);
+ iree_task_list_push_back(&list_a, task1);
+
+ iree_task_list_append(&list_a, &list_b);
+
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list_a));
+ EXPECT_TRUE(CheckListOrderFIFO(&list_a));
+ EXPECT_TRUE(iree_task_list_is_empty(&list_b));
+}
+
+TEST(TaskListTest, Reverse0) {
+ iree_task_list_t list;
+ iree_task_list_initialize(&list);
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+ iree_task_list_reverse(&list);
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+}
+
+TEST(TaskListTest, Reverse1) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list;
+ iree_task_list_initialize(&list);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+
+ iree_task_list_push_back(&list, task0);
+ EXPECT_EQ(1, iree_task_list_calculate_size(&list));
+ EXPECT_TRUE(CheckListOrderFIFO(&list));
+ iree_task_list_reverse(&list);
+ EXPECT_TRUE(CheckListOrderLIFO(&list));
+}
+
+TEST(TaskListTest, Reverse2) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list;
+ iree_task_list_initialize(&list);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+
+ iree_task_list_push_back(&list, task0);
+ iree_task_list_push_back(&list, task1);
+ EXPECT_EQ(2, iree_task_list_calculate_size(&list));
+ EXPECT_TRUE(CheckListOrderFIFO(&list));
+ iree_task_list_reverse(&list);
+ EXPECT_TRUE(CheckListOrderLIFO(&list));
+}
+
+TEST(TaskListTest, Reverse4) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t list;
+ iree_task_list_initialize(&list);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_back(&list, task0);
+ iree_task_list_push_back(&list, task1);
+ iree_task_list_push_back(&list, task2);
+ iree_task_list_push_back(&list, task3);
+ EXPECT_EQ(4, iree_task_list_calculate_size(&list));
+ EXPECT_TRUE(CheckListOrderFIFO(&list));
+ iree_task_list_reverse(&list);
+ EXPECT_TRUE(CheckListOrderLIFO(&list));
+}
+
+TEST(TaskListTest, SplitEmpty) {
+ iree_task_list_t head_list;
+ iree_task_list_initialize(&head_list);
+
+ iree_task_list_t tail_list;
+ iree_task_list_split(&head_list, /*max_tasks=*/64, &tail_list);
+
+ EXPECT_TRUE(iree_task_list_is_empty(&head_list));
+ EXPECT_TRUE(iree_task_list_is_empty(&tail_list));
+}
+
+TEST(TaskListTest, Split1) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t head_list;
+ iree_task_list_initialize(&head_list);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ iree_task_list_push_back(&head_list, task0);
+ EXPECT_EQ(1, iree_task_list_calculate_size(&head_list));
+
+ iree_task_list_t tail_list;
+ iree_task_list_split(&head_list, /*max_tasks=*/64, &tail_list);
+
+ EXPECT_TRUE(iree_task_list_is_empty(&head_list));
+ EXPECT_EQ(1, iree_task_list_calculate_size(&tail_list));
+}
+
+TEST(TaskListTest, Split2) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t head_list;
+ iree_task_list_initialize(&head_list);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+
+ iree_task_list_push_back(&head_list, task0);
+ iree_task_list_push_back(&head_list, task1);
+
+ iree_task_list_t tail_list;
+ iree_task_list_split(&head_list, /*max_tasks=*/64, &tail_list);
+
+ EXPECT_EQ(1, iree_task_list_calculate_size(&head_list));
+ EXPECT_TRUE(CheckListOrderFIFO(&head_list));
+ EXPECT_EQ(1, iree_task_list_calculate_size(&tail_list));
+ EXPECT_TRUE(CheckListOrderFIFO(&tail_list));
+}
+
+TEST(TaskListTest, Split3) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t head_list;
+ iree_task_list_initialize(&head_list);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+
+ iree_task_list_push_back(&head_list, task0);
+ iree_task_list_push_back(&head_list, task1);
+ iree_task_list_push_back(&head_list, task2);
+
+ iree_task_list_t tail_list;
+ iree_task_list_split(&head_list, /*max_tasks=*/64, &tail_list);
+
+ EXPECT_EQ(1, iree_task_list_calculate_size(&head_list));
+ EXPECT_TRUE(CheckListOrderFIFO(&head_list));
+ EXPECT_EQ(2, iree_task_list_calculate_size(&tail_list));
+ EXPECT_TRUE(CheckListOrderFIFO(&tail_list));
+}
+
+TEST(TaskListTest, Split4) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t head_list;
+ iree_task_list_initialize(&head_list);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_back(&head_list, task0);
+ iree_task_list_push_back(&head_list, task1);
+ iree_task_list_push_back(&head_list, task2);
+ iree_task_list_push_back(&head_list, task3);
+
+ iree_task_list_t tail_list;
+ iree_task_list_split(&head_list, /*max_tasks=*/64, &tail_list);
+
+ EXPECT_EQ(2, iree_task_list_calculate_size(&head_list));
+ EXPECT_TRUE(CheckListOrderFIFO(&head_list));
+ EXPECT_EQ(2, iree_task_list_calculate_size(&tail_list));
+ EXPECT_TRUE(CheckListOrderFIFO(&tail_list));
+}
+
+TEST(TaskListTest, SplitMaxTasks1) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t head_list;
+ iree_task_list_initialize(&head_list);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_back(&head_list, task0);
+ iree_task_list_push_back(&head_list, task1);
+ iree_task_list_push_back(&head_list, task2);
+ iree_task_list_push_back(&head_list, task3);
+
+ iree_task_list_t tail_list;
+ iree_task_list_split(&head_list, /*max_tasks=*/1, &tail_list);
+
+ EXPECT_EQ(3, iree_task_list_calculate_size(&head_list));
+ EXPECT_TRUE(CheckListOrderFIFO(&head_list));
+ EXPECT_EQ(1, iree_task_list_calculate_size(&tail_list));
+ EXPECT_TRUE(CheckListOrderFIFO(&tail_list));
+}
+
+TEST(TaskListTest, SplitMaxTasks2) {
+ auto pool = AllocateNopPool();
+ auto scope = AllocateScope("a");
+
+ iree_task_list_t head_list;
+ iree_task_list_initialize(&head_list);
+
+ auto task0 = AcquireNopTask(pool, scope, 0);
+ auto task1 = AcquireNopTask(pool, scope, 1);
+ auto task2 = AcquireNopTask(pool, scope, 2);
+ auto task3 = AcquireNopTask(pool, scope, 3);
+
+ iree_task_list_push_back(&head_list, task0);
+ iree_task_list_push_back(&head_list, task1);
+ iree_task_list_push_back(&head_list, task2);
+ iree_task_list_push_back(&head_list, task3);
+
+ iree_task_list_t tail_list;
+ iree_task_list_split(&head_list, /*max_tasks=*/2, &tail_list);
+
+ EXPECT_EQ(2, iree_task_list_calculate_size(&head_list));
+ EXPECT_TRUE(CheckListOrderFIFO(&head_list));
+ EXPECT_EQ(2, iree_task_list_calculate_size(&tail_list));
+ EXPECT_TRUE(CheckListOrderFIFO(&tail_list));
+}
+
+} // namespace
diff --git a/runtime/src/iree/task/poller.c b/runtime/src/iree/task/poller.c
new file mode 100644
index 0000000..ee6465b
--- /dev/null
+++ b/runtime/src/iree/task/poller.c
@@ -0,0 +1,535 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/poller.h"
+
+#include "iree/base/tracing.h"
+#include "iree/task/executor.h"
+#include "iree/task/executor_impl.h"
+#include "iree/task/submission.h"
+#include "iree/task/task_impl.h"
+#include "iree/task/tuning.h"
+
+static int iree_task_poller_main(iree_task_poller_t* poller);
+
+iree_status_t iree_task_poller_initialize(
+ iree_task_executor_t* executor,
+ iree_thread_affinity_t ideal_thread_affinity,
+ iree_task_poller_t* out_poller) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ out_poller->executor = executor;
+ out_poller->ideal_thread_affinity = ideal_thread_affinity;
+ iree_notification_initialize(&out_poller->state_notification);
+ iree_atomic_task_slist_initialize(&out_poller->mailbox_slist);
+ iree_task_list_initialize(&out_poller->wait_list);
+
+ iree_task_poller_state_t initial_state = IREE_TASK_POLLER_STATE_RUNNING;
+ // TODO(benvanik): support initially suspended wait threads. This can reduce
+ // startup time as we won't give the system a chance to deschedule the calling
+ // thread as it performs the initial resume of the wait thread. We'll need to
+ // check in enqueue to see if the wait thread needs to be resumed.
+ // initial_state = IREE_TASK_POLLER_STATE_SUSPENDED;
+ iree_atomic_store_int32(&out_poller->state, initial_state,
+ iree_memory_order_seq_cst);
+
+ // Acquire an event we can use to wake the wait thread from other threads.
+ iree_status_t status = iree_event_pool_acquire(
+ iree_task_executor_event_pool(out_poller->executor), 1,
+ &out_poller->wake_event);
+
+ // Wait set used to batch syscalls for polling/waiting on wait handles.
+ // This is currently limited to a relatively small max to make bad behavior
+ // clearer with nice RESOURCE_EXHAUSTED errors. If we start to hit that limit
+ // (~63+ simultaneous system waits) we'll need to shard out the wait sets -
+ // possibly with multiple wait threads (one per set).
+ if (iree_status_is_ok(status)) {
+ status = iree_wait_set_allocate(IREE_TASK_EXECUTOR_MAX_OUTSTANDING_WAITS,
+ executor->allocator, &out_poller->wait_set);
+ }
+ if (iree_status_is_ok(status)) {
+ status = iree_wait_set_insert(out_poller->wait_set, out_poller->wake_event);
+ }
+
+ iree_thread_create_params_t thread_params;
+ memset(&thread_params, 0, sizeof(thread_params));
+ thread_params.name = iree_make_cstring_view("iree-poller");
+ thread_params.create_suspended = false;
+ // TODO(benvanik): make high so to reduce latency? The sooner we wake the
+ // sooner we get ready tasks back in the execution queue, though we don't
+ // want to preempt any of the workers.
+ thread_params.priority_class = IREE_THREAD_PRIORITY_CLASS_NORMAL;
+ thread_params.initial_affinity = out_poller->ideal_thread_affinity;
+
+ // NOTE: if the thread creation fails we'll bail here and let the caller
+ // cleanup by calling deinitialize (which is safe because we zero init
+ // everything).
+ if (iree_status_is_ok(status)) {
+ status = iree_thread_create((iree_thread_entry_t)iree_task_poller_main,
+ out_poller, thread_params, executor->allocator,
+ &out_poller->thread);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_task_poller_request_exit(iree_task_poller_t* poller) {
+ if (!poller->thread) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // If the thread is already in the exiting/zombie state we don't need to do
+ // anything.
+ iree_task_poller_state_t prev_state =
+ (iree_task_poller_state_t)iree_atomic_exchange_int32(
+ &poller->state, IREE_TASK_POLLER_STATE_EXITING,
+ iree_memory_order_acq_rel);
+ switch (prev_state) {
+ case IREE_TASK_POLLER_STATE_SUSPENDED:
+ // Poller was suspended; resume it so that it can exit itself.
+ iree_thread_resume(poller->thread);
+ break;
+ case IREE_TASK_POLLER_STATE_ZOMBIE:
+ // Poller already exited; reset state to ZOMBIE.
+ iree_atomic_store_int32(&poller->state, IREE_TASK_POLLER_STATE_ZOMBIE,
+ iree_memory_order_seq_cst);
+ break;
+ default:
+ // Poller now set to EXITING and should exit soon.
+ break;
+ }
+
+ // Kick the wait thread to exit the system wait API, if needed.
+ // It'll check the state and abort ASAP.
+ iree_event_set(&poller->wake_event);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Returns true if the wait thread is in the zombie state (exited and awaiting
+// teardown).
+static bool iree_task_poller_is_zombie(iree_task_poller_t* poller) {
+ return iree_atomic_load_int32(&poller->state, iree_memory_order_seq_cst) ==
+ IREE_TASK_POLLER_STATE_ZOMBIE;
+}
+
+void iree_task_poller_await_exit(iree_task_poller_t* poller) {
+ if (!poller->thread) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_task_poller_request_exit(poller);
+ iree_notification_await(&poller->state_notification,
+ (iree_condition_fn_t)iree_task_poller_is_zombie,
+ poller, iree_infinite_timeout());
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_task_poller_deinitialize(iree_task_poller_t* poller) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Must have called request_exit/await_exit.
+ IREE_ASSERT_TRUE(iree_task_poller_is_zombie(poller));
+
+ iree_thread_release(poller->thread);
+ poller->thread = NULL;
+
+ iree_wait_set_free(poller->wait_set);
+ if (!iree_wait_handle_is_immediate(poller->wake_event)) {
+ iree_event_pool_release(iree_task_executor_event_pool(poller->executor), 1,
+ &poller->wake_event);
+ }
+
+ iree_task_list_discard(&poller->wait_list);
+ iree_atomic_task_slist_discard(&poller->mailbox_slist);
+ iree_atomic_task_slist_deinitialize(&poller->mailbox_slist);
+ iree_notification_deinitialize(&poller->state_notification);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_task_poller_enqueue(iree_task_poller_t* poller,
+ iree_task_list_t* wait_tasks) {
+ if (iree_task_list_is_empty(wait_tasks)) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Move the list into the mailbox. Note that the mailbox is LIFO and this list
+ // is concatenated with its current order preserved (which should be LIFO),
+ // though we don't really care about order here.
+ iree_atomic_task_slist_concat(&poller->mailbox_slist, wait_tasks->head,
+ wait_tasks->tail);
+ memset(wait_tasks, 0, sizeof(*wait_tasks));
+
+ // Kick the wait thread to exit the system wait API, if needed.
+ // It'll merge the new wait tasks and reset the event.
+ iree_event_set(&poller->wake_event);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Acquires a wait handle for |task| and inserts it into |wait_set|.
+static iree_status_t iree_task_poller_insert_wait_handle(
+ iree_wait_set_t* wait_set, iree_task_wait_t* task) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status = iree_ok_status();
+
+ iree_wait_handle_t wait_handle = iree_wait_handle_immediate();
+ iree_wait_handle_t* wait_handle_ptr =
+ iree_wait_handle_from_source(&task->wait_source);
+ if (wait_handle_ptr) {
+ // Already a wait handle - can directly insert it.
+ wait_handle = *wait_handle_ptr;
+ } else {
+ iree_wait_primitive_t wait_primitive = iree_wait_primitive_immediate();
+ status =
+ iree_wait_source_export(task->wait_source, IREE_WAIT_PRIMITIVE_TYPE_ANY,
+ iree_immediate_timeout(), &wait_primitive);
+ if (iree_status_is_ok(status)) {
+ // Swap the wait handle with the exported handle so we can wake it later.
+ // It'd be ideal if we retained the wait handle separate so that we could
+ // still do fast queries for local wait sources.
+ iree_wait_handle_wrap_primitive(wait_primitive.type, wait_primitive.value,
+ &wait_handle);
+ status = iree_wait_source_import(wait_primitive, &task->wait_source);
+ }
+ }
+
+ if (iree_status_is_ok(status)) {
+ status = iree_wait_set_insert(wait_set, wait_handle);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+enum iree_task_poller_prepare_result_bits_e {
+ IREE_TASK_POLLER_PREPARE_OK = 0,
+ IREE_TASK_POLLER_PREPARE_RETIRED = 1u << 0,
+ IREE_TASK_POLLER_PREPARE_CANCELLED = 1u << 1,
+};
+typedef uint32_t iree_task_poller_prepare_result_t;
+
+// Prepares a wait |task| for waiting.
+// The task will be checked for completion or failure such as deadline exceeded
+// and removed from the wait list if resolved. If unresolved the wait will be
+// prepared for the system wait by ensuring a wait handle is available.
+static iree_task_poller_prepare_result_t iree_task_poller_prepare_task(
+ iree_task_poller_t* poller, iree_task_wait_t* task,
+ iree_task_submission_t* pending_submission, iree_time_t now_ns,
+ iree_time_t* earliest_deadline_ns) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Status of the preparation - failures propagate to the task scope.
+ iree_status_t status = iree_ok_status();
+ // Wait status:
+ // OK: wait resolved successfully
+ // DEFERRED: wait unresolved
+ // DEADLINE_EXCEEDED: deadline was hit before the wait resolved
+ // CANCELLED: wait was cancelled via the cancellation flag
+ iree_status_code_t wait_status_code = IREE_STATUS_DEFERRED;
+ if (iree_all_bits_set(task->header.flags, IREE_TASK_FLAG_WAIT_COMPLETED)) {
+ // Wait was marked as resolved and we just pass that through here.
+ // This allows us to bypass more expensive queries when doing a post-wake
+ // scan of tasks.
+ wait_status_code = IREE_STATUS_OK;
+ } else if (task->cancellation_flag != NULL &&
+ iree_atomic_load_int32(task->cancellation_flag,
+ iree_memory_order_acquire) != 0) {
+ // Task was cancelled by the user (or a wait-any). These retire without
+ // failure and it's up to the user to handle what happens to them.
+ wait_status_code = IREE_STATUS_CANCELLED;
+ } else if (iree_wait_source_is_immediate(task->wait_source)) {
+ // Task has been neutered and is treated as an immediately resolved wait.
+ wait_status_code = IREE_STATUS_OK;
+ } else if (iree_wait_source_is_delay(task->wait_source)) {
+ // Task is a delay until some future time; factor that in to our earliest
+ // deadline so that we'll wait in the system until that time. If we wake
+ // earlier because another wait resolved it's still possible for the delay
+ // to have been reached before we get back to this check.
+ iree_time_t delay_deadline_ns = (iree_time_t)task->wait_source.data;
+ if (delay_deadline_ns <= now_ns + IREE_TASK_EXECUTOR_DELAY_SLOP_NS) {
+ // Wait deadline reached.
+ wait_status_code = IREE_STATUS_OK;
+ } else {
+ // Still waiting.
+ *earliest_deadline_ns =
+ iree_min(*earliest_deadline_ns, delay_deadline_ns);
+ wait_status_code = IREE_STATUS_DEFERRED;
+ }
+ } else {
+ // An actual wait. Ensure that the deadline has not been exceeded yet.
+ // If it hasn't yet been hit we'll propagate the deadline to the system wait
+ // API - then on the next pump we'll hit this case and retire the task.
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, task->deadline_ns);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, now_ns);
+ if (task->deadline_ns <= now_ns) {
+ wait_status_code = IREE_STATUS_DEADLINE_EXCEEDED;
+ } else {
+ // Query the status of the wait source to see if it has already been
+ // resolved. Under load we can get lucky and end up with resolved waits
+ // before ever needing to export them for a full system wait. This query
+ // can also avoid making a syscall to check the state of the source such
+ // as when the source is a process-local type.
+ wait_status_code = IREE_STATUS_OK;
+ status = iree_wait_source_query(task->wait_source, &wait_status_code);
+
+ // TODO(benvanik): avoid this query for wait handles: we don't want to
+ // make one syscall per handle and could rely on the completed bit being
+ // set to retire these.
+ }
+
+ // If the wait has not been resolved then we need to ensure there's an
+ // exported wait handle in the wait set. We only do this on the first time
+ // we prepare the task.
+ if (wait_status_code == IREE_STATUS_DEFERRED) {
+ if (!iree_all_bits_set(task->header.flags,
+ IREE_TASK_FLAG_WAIT_EXPORTED)) {
+ task->header.flags |= IREE_TASK_FLAG_WAIT_EXPORTED;
+ status = iree_task_poller_insert_wait_handle(poller->wait_set, task);
+ }
+ *earliest_deadline_ns =
+ iree_min(*earliest_deadline_ns, task->deadline_ns);
+ }
+ }
+
+ if (iree_status_is_ok(status) && wait_status_code == IREE_STATUS_DEFERRED) {
+ // Wait is prepared for use and can be waited on.
+ IREE_TRACE_ZONE_END(z0);
+ return IREE_TASK_POLLER_PREPARE_OK;
+ }
+
+ // If the task was able to be retired (deadline elapsed, completed, etc)
+ // then we need to unregister it from the poller and send it back to the
+ // workers for completion.
+ iree_task_poller_prepare_result_t result = IREE_TASK_POLLER_PREPARE_RETIRED;
+
+ // If this was part of a wait-any operation then set the cancellation flag
+ // such that other waits are cancelled.
+ if (iree_any_bit_set(task->header.flags, IREE_TASK_FLAG_WAIT_ANY)) {
+ if (iree_atomic_fetch_add_int32(task->cancellation_flag, 1,
+ iree_memory_order_release) == 0) {
+ // Ensure we scan again to clean up any potentially cancelled tasks.
+ // If this was task 4 in a wait-any list then tasks 0-3 need to be
+ // retired.
+ result |= IREE_TASK_POLLER_PREPARE_CANCELLED;
+ }
+ }
+
+ // Remove the system wait handle from the wait set, if assigned.
+ if (iree_all_bits_set(task->header.flags, IREE_TASK_FLAG_WAIT_EXPORTED)) {
+ iree_wait_handle_t* wait_handle =
+ iree_wait_handle_from_source(&task->wait_source);
+ if (wait_handle) {
+ iree_wait_set_erase(poller->wait_set, *wait_handle);
+ }
+ task->header.flags &= ~IREE_TASK_FLAG_WAIT_EXPORTED;
+ }
+
+ // Retire the task and enqueue any available completion task.
+ // Note that we pass in the status of the wait query above: that propagates
+ // any query failure into the task/task scope.
+ if (iree_status_is_ok(status) && wait_status_code != IREE_STATUS_OK) {
+ // Cancellation is ok - we just ignore those.
+ if (wait_status_code != IREE_STATUS_CANCELLED) {
+ status = iree_status_from_code(wait_status_code);
+ }
+ }
+ iree_task_wait_retire(task, pending_submission, status);
+
+ IREE_TRACE_ZONE_END(z0);
+ return result;
+}
+
+// Scans all wait tasks in |poller| to see if they have resolved.
+// Resolved/failed waits are enqueued on |pending_submission|.
+// If there are any unresolved delay tasks the earliest deadline will be stored
+// in |out_earliest_deadline_ns| and otherwise it'll be set to
+// IREE_TIME_INFINITE_FUTURE.
+static void iree_task_poller_prepare_wait(
+ iree_task_poller_t* poller, iree_task_submission_t* pending_submission,
+ iree_time_t* out_earliest_deadline_ns) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ *out_earliest_deadline_ns = IREE_TIME_INFINITE_FUTURE;
+
+ // TODO(benvanik): only query if there are pending delays; this is (likely) a
+ // syscall that we only need to perform if we're going to delay.
+ iree_time_t now_ns = iree_time_now();
+
+ // Perform the scan over the task list; we may need to retry the scan if we
+ // encounter a situation that would invalidate other waits - such as
+ // cancellation or scope errors.
+ bool retry_scan = false;
+ do {
+ retry_scan = false;
+
+ // Note that we walk the singly-linked list inline and need to keep track of
+ // the previous task in case we need to unlink one.
+ iree_task_t* prev_task = NULL;
+ iree_task_t* task = iree_task_list_front(&poller->wait_list);
+ while (task != NULL) {
+ iree_task_t* next_task = task->next_task;
+
+ iree_task_poller_prepare_result_t result = iree_task_poller_prepare_task(
+ poller, (iree_task_wait_t*)task, pending_submission, now_ns,
+ out_earliest_deadline_ns);
+ if (iree_all_bits_set(result, IREE_TASK_POLLER_PREPARE_CANCELLED)) {
+ // A task was cancelled; we'll need to retry the scan to clean up any
+ // waits we may have already checked.
+ retry_scan = true;
+ }
+
+ if (iree_all_bits_set(result, IREE_TASK_POLLER_PREPARE_RETIRED)) {
+ // Erase the retired task from the wait list.
+ iree_task_list_erase(&poller->wait_list, prev_task, task);
+ } else {
+ prev_task = task;
+ }
+ task = next_task;
+ }
+ } while (retry_scan);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Finds tasks in |poller| using the given wait handle and marks them as
+// completed.
+static void iree_task_poller_wake_task(iree_task_poller_t* poller,
+ iree_wait_handle_t wake_handle) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // TODO(benvanik): scan the list. We need a way to map wake_handle back to
+ // the zero or more tasks that match it but don't currently store the
+ // handle. Ideally we'd have the wait set tell us precisely which things
+ // woke - possibly by having a bitmap of original insertions that match the
+ // handle - but for now we just eat the extra query syscall.
+ int woken_tasks = 0;
+
+ (void)woken_tasks;
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, woken_tasks);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Commits a system wait on the current wait set in |poller|.
+// The wait will time out after |deadline_ns| is reached and return even if no
+// wait handles were resolved.
+static void iree_task_poller_commit_wait(iree_task_poller_t* poller,
+ iree_time_t deadline_ns) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Enter the system wait API.
+ iree_wait_handle_t wake_handle = iree_wait_handle_immediate();
+ iree_status_t status =
+ iree_wait_any(poller->wait_set, deadline_ns, &wake_handle);
+ if (iree_status_is_ok(status)) {
+ // One or more waiters is ready. We don't support multi-wake right now so
+ // we'll just take the one we got back and try again.
+ //
+ // To avoid extra syscalls we scan the list and mark whatever tasks were
+ // using the handle the wait set reported waking as completed. On the next
+ // scan they'll be retired immediately. Ideally we'd have the wait set be
+ // able to tell us this precise list.
+ if (iree_wait_handle_is_immediate(wake_handle)) {
+ // No-op wait - ignore.
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, "nop");
+ } else if (wake_handle.type == poller->wake_event.type &&
+ memcmp(&wake_handle.value, &poller->wake_event.value,
+ sizeof(wake_handle.value)) == 0) {
+ // Woken on the wake_event used to exit the system wait early.
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, "wake_event");
+ } else {
+ // Route to zero or more tasks using this handle.
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, "task(s)");
+ iree_task_poller_wake_task(poller, wake_handle);
+ }
+ } else if (iree_status_is_deadline_exceeded(status)) {
+ // Indicates nothing was woken within the deadline. We gracefully bail here
+ // and let the scan check for per-task deadline exceeded events or delay
+ // completion.
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, "deadline exceeded");
+ } else {
+ // (Spurious?) error during wait.
+ // TODO(#4026): propagate failure to all scopes involved.
+ // Failures during waits are serious: ignoring them could lead to live-lock
+ // as tasks further in the pipeline expect them to have completed or - even
+ // worse - user code/other processes/drivers/etc may expect them to
+ // complete.
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, "failure");
+ IREE_ASSERT_TRUE(iree_status_is_ok(status));
+ iree_status_ignore(status);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Pumps the |poller| until it is requested to exit.
+static void iree_task_poller_pump_until_exit(iree_task_poller_t* poller) {
+ while (true) {
+ // Check state to see if we've been asked to exit.
+ if (iree_atomic_load_int32(&poller->state, iree_memory_order_seq_cst) ==
+ IREE_TASK_POLLER_STATE_EXITING) {
+ // Thread exit requested - cancel pumping.
+ break;
+ }
+
+ IREE_TRACE_ZONE_BEGIN_NAMED(z0, "iree_task_poller_pump");
+
+ // Reset the wake event and merge any incoming tasks to the wait list.
+ // To avoid races we reset and then merge: this allows another thread
+ // coming in and enqueuing tasks to set the event and ensure that we'll
+ // get the tasks as we'll fall through on the wait below and loop again.
+ iree_event_reset(&poller->wake_event);
+ iree_task_list_append_from_fifo_slist(&poller->wait_list,
+ &poller->mailbox_slist);
+
+ // Scan all wait tasks to see if any have resolved and if so we'll enqueue
+ // their retirement on the executor and drop them from the list.
+ iree_task_submission_t pending_submission;
+ iree_task_submission_initialize(&pending_submission);
+ iree_time_t earliest_deadline_ns = IREE_TIME_INFINITE_FUTURE;
+ iree_task_poller_prepare_wait(poller, &pending_submission,
+ &earliest_deadline_ns);
+ if (!iree_task_submission_is_empty(&pending_submission)) {
+ iree_task_executor_submit(poller->executor, &pending_submission);
+ iree_task_executor_flush(poller->executor);
+ }
+
+ // Enter the system multi-wait API.
+ // We unconditionally do this: if we have nothing to wait on we'll still
+ // wait on the wake_event for new waits to be enqueued - or the first delay
+ // to be reached.
+ iree_task_poller_commit_wait(poller, earliest_deadline_ns);
+
+ IREE_TRACE_ZONE_END(z0);
+ }
+}
+
+// Thread entry point for the poller wait thread.
+static int iree_task_poller_main(iree_task_poller_t* poller) {
+ IREE_TRACE_ZONE_BEGIN(thread_zone);
+
+ // Reset affinity (as it can change over time).
+ // TODO(benvanik): call this after waking in case CPU hotplugging happens.
+ iree_thread_request_affinity(poller->thread, poller->ideal_thread_affinity);
+
+ // Enter the running state immediately. Note that we could have been requested
+ // to exit while suspended/still starting up, so check that here before we
+ // mess with any data structures.
+ const bool should_run =
+ iree_atomic_exchange_int32(&poller->state, IREE_TASK_POLLER_STATE_RUNNING,
+ iree_memory_order_seq_cst) !=
+ IREE_TASK_POLLER_STATE_EXITING;
+ if (IREE_LIKELY(should_run)) {
+ // << work happens here >>
+ iree_task_poller_pump_until_exit(poller);
+ }
+
+ IREE_TRACE_ZONE_END(thread_zone);
+ iree_atomic_store_int32(&poller->state, IREE_TASK_POLLER_STATE_ZOMBIE,
+ iree_memory_order_seq_cst);
+ iree_notification_post(&poller->state_notification, IREE_ALL_WAITERS);
+ return 0;
+}
diff --git a/runtime/src/iree/task/poller.h b/runtime/src/iree/task/poller.h
new file mode 100644
index 0000000..8618682
--- /dev/null
+++ b/runtime/src/iree/task/poller.h
@@ -0,0 +1,146 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_POLLER_H_
+#define IREE_TASK_POLLER_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/internal/threading.h"
+#include "iree/base/internal/wait_handle.h"
+#include "iree/task/affinity_set.h"
+#include "iree/task/list.h"
+#include "iree/task/task.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_task_executor_t iree_task_executor_t;
+
+// Indicates the current state of a poller or, in the case of EXITING, the state
+// the poller should transition to.
+//
+// Transition graph:
+// SUSPENDED -> RUNNING -> EXITING -> ZOMBIE
+//
+// NOTE: state values are ordered such that </> comparisons can be used; ensure
+// that for example all states after resuming are > SUSPENDED and all states
+// before exiting are < EXITING.
+typedef enum iree_task_poller_state_e {
+ // Wait thread has been created in a suspended state and must be resumed to
+ // wake for the first time.
+ IREE_TASK_POLLER_STATE_SUSPENDED = 0,
+ // Wait thread is running and servicing wait tasks.
+ IREE_TASK_POLLER_STATE_RUNNING = 1,
+ // Wait thread should exit (or is exiting) and will soon enter the zombie
+ // state.
+ IREE_TASK_POLLER_STATE_EXITING = 2,
+ // Wait thread has exited and entered a 🧟 state (waiting for join).
+ // The thread handle is still valid and must be destroyed.
+ IREE_TASK_POLLER_STATE_ZOMBIE = 3,
+} iree_task_poller_state_t;
+
+// Wait task poller with a dedicated thread for performing syscalls.
+// This keeps potentially-blocking syscalls off the worker threads and ensures
+// the lowest possible latency for wakes as the poller will always be kept in
+// the system wait queue.
+//
+// During coordination wait tasks are registered with the poller for handling.
+// The wait thread will wake, merge the newly-registered tasks into its lists,
+// and then enter the system multi-wait API to wait for either one or more waits
+// to resolve or the timeout to be hit (representing sleeps). Resolved waits
+// will cause the wait task to be resubmitted to the executor with a flag
+// indicating that they have completed waiting and can be retired. This ensures
+// that all task-related work (completion callbacks, etc) executes on the worker
+// threads and the poller can immediately return to the system for more waiting.
+typedef struct {
+ // Parent executor used to access the global work queue and submit wakes.
+ iree_task_executor_t* executor;
+
+ // Current state of the poller (iree_task_poller_state_t).
+ iree_atomic_int32_t state;
+ // Notification signaled when the wait thread changes state.
+ iree_notification_t state_notification;
+
+ // Ideal affinity for the wait thread. This can be used to keep the wait
+ // thread from contending with the processing threads. To allow the wait
+ // thread to run anywhere use iree_thread_affinity_set_any.
+ iree_thread_affinity_t ideal_thread_affinity;
+
+ // Thread handle of the wait thread. If the thread has exited the handle will
+ // remain valid so that the poller can query its state.
+ iree_thread_t* thread;
+
+ // Event used to force the wait thread to wake.
+ // This allows the wait thread to remain in a syscall but still be woken when
+ // new wait tasks arrive and need to be managed by the wait thread.
+ // Set from threads submitting tasks to the poller and reset after the wait
+ // thread has woken and processed them. All system waits have this event
+ // in the wait set.
+ iree_event_t wake_event;
+
+ // A LIFO mailbox used by coordinators to post wait tasks to the poller.
+ // This allows for submissions to add tasks without needing to synchronize
+ // with the wait thread; tasks are pushed to the mailbox and then merged with
+ // the full wait set by the wait thread the next time it wakes.
+ iree_atomic_task_slist_t mailbox_slist;
+
+ // A list of wait tasks with external handles that need to be waited on.
+ // Managed by the wait thread and must not be accessed from any other thread.
+ // This is the full set of waits actively being managed by the poller.
+ iree_task_list_t wait_list;
+
+ // Wait set containing wait handles from wait_list.
+ // Managed by the wait thread and must not be accessed from any other thread.
+ // This may only contain a subset of the wait_list in cases where some of
+ // the wait tasks do not have full system handles.
+ iree_wait_set_t* wait_set;
+} iree_task_poller_t;
+
+// Initializes |out_poller| with a new poller.
+// |executor| will be used to submit woken tasks for processing.
+iree_status_t iree_task_poller_initialize(
+ iree_task_executor_t* executor,
+ iree_thread_affinity_t ideal_thread_affinity,
+ iree_task_poller_t* out_poller);
+
+// Requests that the poller wait thread begin exiting (if it hasn't already).
+// If the wait thread is in a syscall it will be woken as soon as possible.
+//
+// May be called from any thread. Any active waits will be aborted as possible.
+void iree_task_poller_request_exit(iree_task_poller_t* poller);
+
+// Blocks the caller until |poller| has exited.
+//
+// May be called from any thread.
+void iree_task_poller_await_exit(iree_task_poller_t* poller);
+
+// Deinitializes |poller| after the thread has exited.
+// The poller must be in the IREE_TASK_POLLER_STATE_ZOMBIE state.
+//
+// Expected shutdown sequence:
+// - request_exit
+// - await_exit
+// - deinitialize
+void iree_task_poller_deinitialize(iree_task_poller_t* poller);
+
+// Enqueues |wait_tasks| on the poller and kicks the wait thread.
+// The task pointers will be retained by the poller and must remain valid.
+//
+// May be called from any thread. Waits may begin and complete prior to the
+// function returning.
+void iree_task_poller_enqueue(iree_task_poller_t* poller,
+ iree_task_list_t* wait_tasks);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_POLLER_H_
diff --git a/runtime/src/iree/task/pool.c b/runtime/src/iree/task/pool.c
new file mode 100644
index 0000000..387bdbb
--- /dev/null
+++ b/runtime/src/iree/task/pool.c
@@ -0,0 +1,291 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/pool.h"
+
+#include <stdint.h>
+
+#include "iree/base/internal/math.h"
+#include "iree/base/tracing.h"
+
+// Minimum byte size of a block in bytes, including the tasks as well as the
+// allocation header. This is here to allow us to reduce the number of times
+// we go to the allocator and amortize the overhead of our block header.
+#define IREE_TASK_POOL_MIN_BLOCK_SIZE (4 * 1024)
+
+// Alignment for block allocations; roughly a (likely) page size.
+// Since many allocators after the small byte range (~thousands of bytes) will
+// round up this just prevents us from being 1 over the allocator block size and
+// wasting space in a larger bucket.
+#define IREE_TASK_POOL_BLOCK_ALIGNMENT (4 * 1024)
+
+// The minimum number of tasks that will be allocated when growth is needed.
+// The total number may be larger once rounded to meet block size and alignment
+// requirements. Note that we leave a bit of room here for the block header
+// such that we don't always allocate a nice round number + N bytes that then
+// bumps us into the next power of two bucket.
+#define IREE_TASK_POOL_MIN_GROWTH_CAPACITY (255)
+
+// Grows the task pool by at least |minimum_capacity| on top of its current
+// capacity. The actual number of tasks available may be rounded up to make the
+// allocated blocks more allocator-friendly sizes.
+//
+// As an optimization for on-demand growth cases an |out_task| can be specified
+// to receive a task without the need for acquiring one from the pool
+// immediately after the growth completes. This avoids a race condition where
+// another thread could snipe the tasks we just allocated for the caller prior
+// to the caller getting a chance to acquire one.
+static iree_status_t iree_task_pool_grow(iree_task_pool_t* pool,
+ iree_host_size_t minimum_capacity,
+ iree_task_t** out_task) {
+ if (IREE_UNLIKELY(!minimum_capacity)) return iree_ok_status();
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Allocate a new block of tasks. To try to prevent the allocator from
+ // fragmenting we try to always allocate blocks that are page-aligned and
+ // powers of two.
+ //
+ // Note that we pad out our header to iree_max_align_t bytes so that all tasks
+ // are aligned on the same boundaries as required by atomic operations.
+ iree_host_size_t header_size =
+ iree_host_align(sizeof(iree_task_allocation_header_t), iree_max_align_t);
+ iree_host_size_t pow2_block_size = iree_math_round_up_to_pow2_u64(
+ header_size + minimum_capacity * pool->task_size);
+ iree_host_size_t aligned_block_size =
+ iree_host_align(pow2_block_size, IREE_TASK_POOL_BLOCK_ALIGNMENT);
+ if (aligned_block_size < IREE_TASK_POOL_MIN_BLOCK_SIZE) {
+ aligned_block_size = IREE_TASK_POOL_MIN_BLOCK_SIZE;
+ }
+ iree_task_allocation_header_t* allocation = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(pool->allocator, aligned_block_size,
+ (void**)&allocation));
+
+ // Insert the allocation into the tracking list. Nothing reads the list until
+ // the pool is trimmed/deinitialized so it's safe to do now prior to
+ // populating anything. It's all just empty data anyway.
+ iree_atomic_task_allocation_slist_push(&pool->allocations_slist, allocation);
+
+ // Since we may have rounded up the allocation we may have gotten more space
+ // for tasks than we were asked for. Ensure we actually make use of them.
+ iree_host_size_t actual_capacity =
+ (aligned_block_size - header_size) / pool->task_size;
+
+ // Stitch together the tasks by setting all next pointers.
+ // Since we are going to be touching all the pages the order here is important
+ // as once we insert these new tasks into the available_slist they'll be
+ // popped out head->tail. To ensure the head that gets popped first is still
+ // warm in cache we construct the list backwards, with the tail tasks being
+ // fine to be evicted.
+ //
+ // The nice thing about this walk is that it ensures that if there were any
+ // zero-fill-on-demand trickery going on the pages are all wired here vs.
+ // when the tasks are first acquired from the list where it'd be harder to
+ // track.
+ uintptr_t p = ((uintptr_t)allocation + aligned_block_size) - pool->task_size;
+ iree_task_t* head = (iree_task_t*)p;
+ iree_task_t* tail = head;
+ head->next_task = NULL;
+ head->pool = pool;
+ for (iree_host_size_t i = 0; i < actual_capacity; ++i, p -= pool->task_size) {
+ iree_task_t* task = (iree_task_t*)p;
+ task->next_task = head;
+ task->pool = pool;
+ head = task;
+ }
+
+ // If the caller needs a task we can slice off the head to return prior to
+ // adding it to the slist where it may get stolen.
+ if (out_task) {
+ *out_task = head;
+ head = head->next_task;
+ }
+
+ // Concatenate the list of new free tasks into the pool.
+ iree_atomic_task_slist_concat(&pool->available_slist, head, tail);
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+iree_status_t iree_task_pool_initialize(iree_allocator_t allocator,
+ iree_host_size_t task_size,
+ iree_host_size_t initial_capacity,
+ iree_task_pool_t* out_pool) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, task_size);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, initial_capacity);
+
+ out_pool->allocator = allocator;
+ out_pool->task_size = task_size;
+ iree_atomic_task_allocation_slist_initialize(&out_pool->allocations_slist);
+ iree_atomic_task_slist_initialize(&out_pool->available_slist);
+ iree_status_t status =
+ iree_task_pool_grow(out_pool, initial_capacity, /*out_task=*/NULL);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_task_pool_deinitialize(iree_task_pool_t* pool) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_task_allocation_header_t* allocation = NULL;
+ if (iree_atomic_task_allocation_slist_flush(
+ &pool->allocations_slist,
+ IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_LIFO, &allocation, NULL)) {
+ while (allocation) {
+ iree_task_allocation_header_t* next =
+ iree_atomic_task_allocation_slist_get_next(allocation);
+ iree_allocator_free(pool->allocator, allocation);
+ allocation = next;
+ }
+ }
+ iree_atomic_task_allocation_slist_deinitialize(&pool->allocations_slist);
+ iree_atomic_task_slist_deinitialize(&pool->available_slist);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_task_pool_trim(iree_task_pool_t* pool) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ // NOTE: this is only safe if there are no outstanding tasks.
+ // Hopefully the caller read the docstring!
+
+ // We only need to flush the list to empty it - these are just references into
+ // the allocations and don't need to be released.
+ iree_task_t* task_head = NULL;
+ iree_atomic_task_slist_flush(&pool->available_slist,
+ IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_LIFO,
+ &task_head, /*tail=*/NULL);
+
+ iree_task_allocation_header_t* allocation_head = NULL;
+ if (iree_atomic_task_allocation_slist_flush(
+ &pool->allocations_slist,
+ IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_LIFO, &allocation_head,
+ /*tail=*/NULL)) {
+ do {
+ iree_task_allocation_header_t* next =
+ iree_atomic_task_allocation_slist_get_next(allocation_head);
+ iree_allocator_free(pool->allocator, allocation_head);
+ allocation_head = next;
+ } while (allocation_head != NULL);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_task_pool_acquire(iree_task_pool_t* pool,
+ iree_task_t** out_task) {
+ if (!pool) return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED);
+
+ // Attempt to acquire a task from the available list.
+ iree_task_t* task = iree_atomic_task_slist_pop(&pool->available_slist);
+ if (task) {
+ *out_task = task;
+ return iree_ok_status();
+ }
+
+ // No tasks were available when we tried; force growth now.
+ // Note that due to races it's possible that there are now tasks that have
+ // been released back into the pool, but the fact that we failed once means
+ // we are sitting right at the current limit of the pool and growing will
+ // help ensure we go down the fast path more frequently in the future.
+ return iree_task_pool_grow(pool, IREE_TASK_POOL_MIN_GROWTH_CAPACITY,
+ out_task);
+}
+
+iree_status_t iree_task_pool_acquire_many(iree_task_pool_t* pool,
+ iree_host_size_t count,
+ iree_task_list_t* out_list) {
+ if (!pool) return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED);
+
+ // If we acquire more than the requested count we need to give those leftovers
+ // back to the pool before we leave.
+ iree_task_list_t leftover_tasks;
+ iree_task_list_initialize(&leftover_tasks);
+ iree_task_list_initialize(out_list);
+
+ iree_status_t status = iree_ok_status();
+ while (count) {
+ // Flush the entire available list so we can start operating on it.
+ // This is where the potential race comes in: if another thread goes to
+ // acquire a task while we have the list local here it'll grow the list so
+ // it can meet its demand. That's still correct behavior but will result in
+ // potentially more wasted memory than if the other thread would have
+ // waited. Thankfully we save memory in so many other places that in the
+ // rare case there are multiple concurrent schedulers acquiring tasks it's
+ // not the end of the world.
+ iree_task_list_t acquired_tasks;
+ iree_task_list_initialize(&acquired_tasks);
+ if (iree_atomic_task_slist_flush(
+ &pool->available_slist,
+ IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_LIFO,
+ &acquired_tasks.head,
+ /*tail=*/NULL)) {
+ // Had some items in the pool; eat up to the requested count.
+ // Note that we may run out and need to allocate more or have gotten
+ // too many during the flush and need to track those leftovers.
+ //
+ // Instead of having the slist flush walk the list and give us a tail we
+ // do that here: we need to walk the list anyway to partition it.
+ iree_task_t* p = acquired_tasks.head;
+ while (count > 0) {
+ p = iree_atomic_task_slist_get_next(p);
+ if (!p) break;
+ acquired_tasks.tail = p;
+ --count;
+ }
+
+ // If we got everything we need then we have to put all of the flushed
+ // tasks we didn't use into the leftover list.
+ if (count == 0) {
+ iree_task_list_t acquire_leftovers;
+ iree_task_list_initialize(&acquire_leftovers);
+ acquire_leftovers.head =
+ iree_atomic_task_slist_get_next(acquired_tasks.tail);
+ iree_atomic_task_slist_set_next(acquired_tasks.tail, NULL);
+ p = acquire_leftovers.head;
+ iree_task_t* next;
+ while ((next = iree_atomic_task_slist_get_next(p))) p = next;
+ acquire_leftovers.tail = p;
+ iree_task_list_append(&leftover_tasks, &acquire_leftovers);
+ }
+
+ // Add the tasks we did acquire to our result list.
+ // NOTE: this is unmeasured but the intuition is that we want to put the
+ // tasks we just acquired at the head of the list so that they are warm
+ // upon return to the caller who will then be touching the head of the
+ // list immediately.
+ iree_task_list_prepend(out_list, &acquired_tasks);
+ }
+
+ // If we still need more tasks but ran out of ones in the flush list then we
+ // need to grow some more.
+ if (count > 0) {
+ status = iree_task_pool_grow(pool, count, /*out_task=*/NULL);
+ if (IREE_UNLIKELY(!iree_status_is_ok(status))) break;
+ }
+ }
+
+ // Return leftovers that we acquired but didn't need to the pool.
+ iree_atomic_task_slist_concat(&pool->available_slist, leftover_tasks.head,
+ leftover_tasks.tail);
+
+ // Upon failure return any tasks we may have already acquired from the pool.
+ if (IREE_UNLIKELY(!iree_status_is_ok(status))) {
+ iree_atomic_task_slist_concat(&pool->available_slist, out_list->head,
+ out_list->tail);
+ }
+
+ return status;
+}
+
+void iree_task_pool_release(iree_task_pool_t* pool, iree_task_t* task) {
+ if (!pool) return;
+ IREE_ASSERT_EQ(task->pool, pool);
+ iree_atomic_task_slist_push(&pool->available_slist, task);
+}
diff --git a/runtime/src/iree/task/pool.h b/runtime/src/iree/task/pool.h
new file mode 100644
index 0000000..de9d5e9
--- /dev/null
+++ b/runtime/src/iree/task/pool.h
@@ -0,0 +1,115 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_POOL_H_
+#define IREE_TASK_POOL_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/task/list.h"
+#include "iree/task/task.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// An allocation of tasks in a task pool containing multiple tasks.
+// This struct is at the head of all task allocations made from the allocator.
+// It is used to form a linked list of all allocations made so that they can be
+// easily freed during pool teardown.
+typedef struct iree_task_allocation_header_t {
+ // Next allocation in the linked list of allocations.
+ iree_atomic_slist_intrusive_ptr_t* next;
+} iree_task_allocation_header_t;
+
+// An atomic approximately LIFO singly-linked list.
+IREE_TYPED_ATOMIC_SLIST_WRAPPER(iree_atomic_task_allocation,
+ iree_task_allocation_header_t,
+ offsetof(iree_task_allocation_header_t, next));
+
+// Shared thread-safe pool of iree_task_t structures of a particular size.
+// This can be used to quickly allocate blocks of tasks to be initialized by
+// task producers, enqueued, and then eventually recycled back to the pool.
+//
+// The lifetime of all tasks must be less than the pool they were acquired
+// from. Tasks acquired from one pool must not be released to another pool or
+// via any other mechanism.
+//
+// Pools can either be fixed-size with a maximum number of available tasks that
+// can be outstanding at any time or growable to allow the pool to be grown
+// unbounded after initialization.
+typedef struct iree_task_pool_t {
+ // Allocator used for allocating/freeing each allocation block.
+ iree_allocator_t allocator;
+
+ // Task size, in bytes.
+ iree_host_size_t task_size;
+
+ // NOTE: we don't track current usage count as that would introduce additional
+ // contention as tasks are acquired/released. If we end up finding a lot of
+ // memory idling here we can add a threshold over which we reclaim it, but the
+ // easiest (and most efficient) solution is to force the user to synchronize
+ // with the executor on a low memory event and use iree_task_pool_trim.
+
+ // Head of a linked list of all allocations made by the pool.
+ iree_atomic_task_allocation_slist_t allocations_slist;
+
+ // Linked list of free tasks used as a stack (LIFO).
+ // This is not a great structure for this as over time the tasks will get out
+ // of order and walking the linked list will incur cache misses. We offset
+ // that cost a bit by knowing that the time between walking the list to
+ // acquire tasks and when we initialize the tasks is short and that we would
+ // have triggered a cache miss anyway. In the future we can explore other
+ // approaches (such as small chunked linear lists) that better exploit spatial
+ // locality, if needed.
+ iree_atomic_task_slist_t available_slist;
+} iree_task_pool_t;
+
+// Initializes a task pool and optionally performs an initial task allocation.
+iree_status_t iree_task_pool_initialize(iree_allocator_t allocator,
+ iree_host_size_t task_size,
+ iree_host_size_t initial_capacity,
+ iree_task_pool_t* out_pool);
+
+// Deinitializes a task pool and releases all task allocations back to the
+// allocator specified during initialization. All tasks must have already been
+// released back to the pool.
+void iree_task_pool_deinitialize(iree_task_pool_t* pool);
+
+// Attempts to trim unused allocations from the task pool.
+// Must not be called while any tasks that were acquired from this pool are
+// still live; callers must synchronize with the executor and ensure they aren't
+// pushing any more work during the trim operation.
+void iree_task_pool_trim(iree_task_pool_t* pool);
+
+// Acquires a task from the task pool. The returned task will have undefined
+// contents and must be initialized by the caller.
+iree_status_t iree_task_pool_acquire(iree_task_pool_t* pool,
+ iree_task_t** out_task);
+
+// Acquires a set of tasks from the task pool. The returned tasks will have
+// undefined contents besides their intrusive next pointers and must be
+// intialized by the caller.
+//
+// WARNING: this may cause growth during races if multiple threads are trying to
+// acquire at the same time. Our usage patterns here are such that this is never
+// the case, though, as all acquisition from the internal executor pools happens
+// with the coordination lock held.
+iree_status_t iree_task_pool_acquire_many(iree_task_pool_t* pool,
+ iree_host_size_t count,
+ iree_task_list_t* out_list);
+
+// Releases a task to the task pool.
+// Callers must ensure the task is no longer in use.
+void iree_task_pool_release(iree_task_pool_t* pool, iree_task_t* task);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_POOL_H_
diff --git a/runtime/src/iree/task/pool_test.cc b/runtime/src/iree/task/pool_test.cc
new file mode 100644
index 0000000..107b83b
--- /dev/null
+++ b/runtime/src/iree/task/pool_test.cc
@@ -0,0 +1,92 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/pool.h"
+
+#include <cstdint>
+
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace {
+
+typedef struct iree_test_task_t {
+ iree_task_t base;
+ uint8_t payload[32];
+} iree_test_task_t;
+
+TEST(PoolTest, Lifetime) {
+ iree_task_pool_t pool;
+ IREE_ASSERT_OK(iree_task_pool_initialize(
+ iree_allocator_system(), sizeof(iree_test_task_t), 32, &pool));
+ iree_task_pool_deinitialize(&pool);
+}
+
+TEST(PoolTest, AcquireRelease) {
+ // Start with 2 preallocated tasks so we can test both acquiring existing and
+ // growing to allocate new tasks.
+ iree_task_pool_t pool;
+ IREE_ASSERT_OK(iree_task_pool_initialize(iree_allocator_system(),
+ sizeof(iree_test_task_t), 2, &pool));
+
+ // Acquire 4 tasks (so we test both the initial size and allocated tasks).
+ iree_test_task_t* tasks[4] = {NULL, NULL, NULL, NULL};
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(tasks); ++i) {
+ IREE_ASSERT_OK(iree_task_pool_acquire(&pool, (iree_task_t**)&tasks[i]));
+ EXPECT_TRUE(tasks[i] != NULL);
+ }
+
+ // Release all tasks back to the pool.
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(tasks); ++i) {
+ iree_task_pool_release(&pool, (iree_task_t*)tasks[i]);
+ }
+
+ // Acquire all tasks again to make sure we put them back in correctly.
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(tasks); ++i) {
+ IREE_ASSERT_OK(iree_task_pool_acquire(&pool, (iree_task_t**)&tasks[i]));
+ EXPECT_TRUE(tasks[i] != NULL);
+ }
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(tasks); ++i) {
+ iree_task_pool_release(&pool, (iree_task_t*)tasks[i]);
+ }
+
+ iree_task_pool_deinitialize(&pool);
+}
+
+TEST(PoolTest, Trim) {
+ // Start with 2 preallocated tasks so we can test both acquiring existing and
+ // growing to allocate new tasks.
+ iree_task_pool_t pool;
+ IREE_ASSERT_OK(iree_task_pool_initialize(iree_allocator_system(),
+ sizeof(iree_test_task_t), 2, &pool));
+
+ // Acquire and release some tasks.
+ iree_test_task_t* tasks[8] = {NULL, NULL, NULL, NULL};
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(tasks); ++i) {
+ IREE_ASSERT_OK(iree_task_pool_acquire(&pool, (iree_task_t**)&tasks[i]));
+ EXPECT_TRUE(tasks[i] != NULL);
+ }
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(tasks); ++i) {
+ iree_task_pool_release(&pool, (iree_task_t*)tasks[i]);
+ }
+
+ // Trim to shrink the pool memory.
+ // NOTE: trimming is only supported when there are no outstanding tasks.
+ iree_task_pool_trim(&pool);
+
+ // Acquire again to make sure we can reallocate the pool.
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(tasks); ++i) {
+ IREE_ASSERT_OK(iree_task_pool_acquire(&pool, (iree_task_t**)&tasks[i]));
+ EXPECT_TRUE(tasks[i] != NULL);
+ }
+ for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(tasks); ++i) {
+ iree_task_pool_release(&pool, (iree_task_t*)tasks[i]);
+ }
+
+ iree_task_pool_deinitialize(&pool);
+}
+
+} // namespace
diff --git a/runtime/src/iree/task/post_batch.c b/runtime/src/iree/task/post_batch.c
new file mode 100644
index 0000000..bd6c383
--- /dev/null
+++ b/runtime/src/iree/task/post_batch.c
@@ -0,0 +1,192 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/post_batch.h"
+
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/math.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/internal/threading.h"
+#include "iree/base/tracing.h"
+#include "iree/task/executor_impl.h"
+#include "iree/task/queue.h"
+#include "iree/task/worker.h"
+
+void iree_task_post_batch_initialize(iree_task_executor_t* executor,
+ iree_task_worker_t* current_worker,
+ iree_task_post_batch_t* out_post_batch) {
+ out_post_batch->executor = executor;
+ out_post_batch->current_worker = current_worker;
+ out_post_batch->worker_pending_mask = 0;
+ memset(&out_post_batch->worker_pending_lifos, 0,
+ executor->worker_count * sizeof(iree_task_list_t));
+}
+
+iree_host_size_t iree_task_post_batch_worker_count(
+ const iree_task_post_batch_t* post_batch) {
+ return post_batch->executor->worker_count;
+}
+
+static iree_host_size_t iree_task_post_batch_select_random_worker(
+ iree_task_post_batch_t* post_batch, iree_task_affinity_set_t affinity_set) {
+ iree_task_affinity_set_t worker_live_mask =
+ iree_atomic_task_affinity_set_load(
+ &post_batch->executor->worker_live_mask, iree_memory_order_acquire);
+ iree_task_affinity_set_t valid_worker_mask = affinity_set & worker_live_mask;
+ if (!valid_worker_mask) {
+ // No valid workers as desired; for now just bail to worker 0.
+ return 0;
+ }
+
+ // TODO(benvanik): rotate through workers here. Instead, if the affinity set
+ // has the current_worker allowed we just use that to avoid needing a
+ // cross-thread hop.
+ return iree_task_affinity_set_count_trailing_zeros(valid_worker_mask);
+}
+
+iree_host_size_t iree_task_post_batch_select_worker(
+ iree_task_post_batch_t* post_batch, iree_task_affinity_set_t affinity_set) {
+ if (post_batch->current_worker) {
+ // Posting from a worker - prefer sending right back to this worker if we
+ // haven't already scheduled for it.
+ if ((affinity_set & post_batch->current_worker->worker_bit) &&
+ !(post_batch->worker_pending_mask &
+ post_batch->current_worker->worker_bit)) {
+ return iree_task_affinity_set_count_trailing_zeros(
+ post_batch->current_worker->worker_bit);
+ }
+ }
+
+ // Prefer workers that are idle as though they'll need to wake up it is
+ // guaranteed that they aren't working on something else and the latency of
+ // waking should (hopefully) be less than the latency of waiting for a
+ // worker's queue to finish. Note that we only consider workers idle if we
+ // ourselves in this batch haven't already queued work for them (as then they
+ // aren't going to be idle).
+ iree_task_affinity_set_t worker_idle_mask =
+ iree_atomic_task_affinity_set_load(
+ &post_batch->executor->worker_idle_mask, iree_memory_order_relaxed);
+ worker_idle_mask &= ~post_batch->worker_pending_mask;
+ iree_task_affinity_set_t idle_affinity_set = affinity_set & worker_idle_mask;
+ if (idle_affinity_set) {
+ return iree_task_post_batch_select_random_worker(post_batch,
+ idle_affinity_set);
+ }
+
+ // No more workers are idle; farm out at random. In the worst case work
+ // stealing will help balance things out on the backend.
+ return iree_task_post_batch_select_random_worker(post_batch, affinity_set);
+}
+
+void iree_task_post_batch_enqueue(iree_task_post_batch_t* post_batch,
+ iree_host_size_t worker_index,
+ iree_task_t* task) {
+ iree_task_list_push_front(&post_batch->worker_pending_lifos[worker_index],
+ task);
+ post_batch->worker_pending_mask |=
+ iree_task_affinity_for_worker(worker_index);
+}
+
+// Wakes each worker indicated in the |wake_mask|, if needed.
+static void iree_task_post_batch_wake_workers(
+ iree_task_post_batch_t* post_batch, iree_task_affinity_set_t wake_mask) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, iree_math_count_ones_u64(wake_mask));
+
+ iree_task_executor_t* executor = post_batch->executor;
+
+ // Wake workers that may be suspended. We fetch the set of workers we need to
+ // wake (hopefully none in the common case) and mark that we've woken them so
+ // that we don't double-resume.
+ iree_task_affinity_set_t resume_mask =
+ iree_atomic_task_affinity_set_fetch_and(&executor->worker_suspend_mask,
+ ~wake_mask,
+ iree_memory_order_acquire);
+ resume_mask &= wake_mask;
+ if (IREE_UNLIKELY(resume_mask)) {
+ int resume_count = iree_task_affinity_set_count_ones(resume_mask);
+ int worker_index = 0;
+ for (int i = 0; i < resume_count; ++i) {
+ int offset = iree_task_affinity_set_count_trailing_zeros(resume_mask) + 1;
+ int resume_index = worker_index + offset;
+ worker_index += offset + 1;
+ resume_mask = iree_shr(resume_mask, offset + 1);
+ iree_thread_resume(executor->workers[resume_index].thread);
+ }
+ }
+
+ // TODO(#4016): use a FUTEX_WAKE_BITSET here to wake all of the workers that
+ // have pending work in a single syscall (vs. popcnt(worker_pending_mask)
+ // syscalls). This will reduce wake latency for workers later in the set;
+ // for example today worker[31] will wait until workers[0-30] have had their
+ // syscalls performed before it's even requested to wake. This also loses
+ // information the kernel could use to avoid core migration as it knows when N
+ // threads will be needed simultaneously and can hopefully perform any needed
+ // migrations prior to beginning execution.
+ int wake_count = iree_task_affinity_set_count_ones(wake_mask);
+ int worker_index = 0;
+ for (int i = 0; i < wake_count; ++i) {
+ int offset = iree_task_affinity_set_count_trailing_zeros(wake_mask);
+ int wake_index = worker_index + offset;
+ worker_index += offset + 1;
+ wake_mask = iree_shr(wake_mask, offset + 1);
+
+ // Wake workers if they are waiting - workers are the only thing that can
+ // wait on this notification so this should almost always be either free (an
+ // atomic load) if a particular worker isn't waiting or it's required to
+ // actually wake it and we can't avoid it.
+ iree_task_worker_t* worker = &executor->workers[wake_index];
+ iree_notification_post(&worker->wake_notification, 1);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+bool iree_task_post_batch_submit(iree_task_post_batch_t* post_batch) {
+ if (!post_batch->worker_pending_mask) return false;
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Run through each worker that has a bit set in the pending mask and post
+ // the pending tasks.
+ iree_task_affinity_set_t worker_mask = post_batch->worker_pending_mask;
+ post_batch->worker_pending_mask = 0;
+ int worker_index = 0;
+ int post_count = iree_task_affinity_set_count_ones(worker_mask);
+ iree_task_affinity_set_t worker_wake_mask = 0;
+ for (int i = 0; i < post_count; ++i) {
+ int offset = iree_task_affinity_set_count_trailing_zeros(worker_mask);
+ int target_index = worker_index + offset;
+ worker_index += offset + 1;
+ worker_mask = iree_shr(worker_mask, offset + 1);
+
+ iree_task_worker_t* worker = &post_batch->executor->workers[target_index];
+ iree_task_list_t* target_pending_lifo =
+ &post_batch->worker_pending_lifos[target_index];
+ if (worker == post_batch->current_worker) {
+ // Fast-path for posting to self; this happens when a worker plays the
+ // role of coordinator and we want to ensure we aren't doing a fully
+ // block-and-flush loop when we could just be popping the next new task
+ // off the list.
+ iree_task_queue_append_from_lifo_list_unsafe(&worker->local_task_queue,
+ target_pending_lifo);
+ } else {
+ iree_task_worker_post_tasks(worker, target_pending_lifo);
+ worker_wake_mask |= iree_task_affinity_for_worker(target_index);
+ }
+ }
+
+ // Wake all workers that now have pending work. If a worker is not already
+ // waiting this will be cheap (no syscall).
+ if (worker_wake_mask != 0) {
+ iree_task_post_batch_wake_workers(post_batch, worker_wake_mask);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return post_count != 0;
+}
diff --git a/runtime/src/iree/task/post_batch.h b/runtime/src/iree/task/post_batch.h
new file mode 100644
index 0000000..470b7a9
--- /dev/null
+++ b/runtime/src/iree/task/post_batch.h
@@ -0,0 +1,73 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_POST_BATCH_H_
+#define IREE_TASK_POST_BATCH_H_
+
+#include <stdbool.h>
+
+#include "iree/base/config.h"
+#include "iree/task/affinity_set.h"
+#include "iree/task/executor.h"
+#include "iree/task/list.h"
+#include "iree/task/task.h"
+#include "iree/task/tuning.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_task_worker_t iree_task_worker_t;
+
+// Transient/stack-allocated structure for batching up tasks for posting to
+// worker mailboxes in single operations. This avoids the need to repeatedly
+// thrash caches during coordination as only during submission are the worker
+// mailboxes touched and only once per worker.
+typedef struct iree_task_post_batch_t {
+ iree_task_executor_t* executor;
+
+ // Local worker constructing the post batch.
+ // This is used to know when lighter-weight queuing can occur (no need to
+ // post across a mailbox channel to yourself!).
+ // May be NULL if not being posted from a worker (such as a submission).
+ iree_task_worker_t* current_worker;
+
+ // A bitmask of workers indicating which have pending tasks in their lists.
+ // Used to quickly scan the lists and perform the posts only when required.
+ iree_task_affinity_set_t worker_pending_mask;
+
+ // A per-worker LIFO task list waiting to be posted.
+ iree_task_list_t worker_pending_lifos[0];
+} iree_task_post_batch_t;
+
+void iree_task_post_batch_initialize(iree_task_executor_t* executor,
+ iree_task_worker_t* current_worker,
+ iree_task_post_batch_t* out_post_batch);
+
+// Returns the total number of workers that the post batch is targeting.
+iree_host_size_t iree_task_post_batch_worker_count(
+ const iree_task_post_batch_t* post_batch);
+
+// Selects a random worker from the given affinity set.
+iree_host_size_t iree_task_post_batch_select_worker(
+ iree_task_post_batch_t* post_batch, iree_task_affinity_set_t affinity_set);
+
+// Enqueues a task to the given worker. Note that the pending work lists for
+// each work is kept in LIFO order so that we can easily concatenate it with the
+// worker mailbox slist that's in LIFO order.
+void iree_task_post_batch_enqueue(iree_task_post_batch_t* post_batch,
+ iree_host_size_t worker_index,
+ iree_task_t* task);
+
+// Submits all pending tasks to their worker mailboxes and resets state.
+// Returns true if any tasks were posted to workers.
+bool iree_task_post_batch_submit(iree_task_post_batch_t* post_batch);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_POST_BATCH_H_
diff --git a/runtime/src/iree/task/queue.c b/runtime/src/iree/task/queue.c
new file mode 100644
index 0000000..823947b
--- /dev/null
+++ b/runtime/src/iree/task/queue.c
@@ -0,0 +1,90 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/queue.h"
+
+#include <stddef.h>
+#include <string.h>
+
+void iree_task_queue_initialize(iree_task_queue_t* out_queue) {
+ memset(out_queue, 0, sizeof(*out_queue));
+ iree_slim_mutex_initialize(&out_queue->mutex);
+ iree_task_list_initialize(&out_queue->list);
+}
+
+void iree_task_queue_deinitialize(iree_task_queue_t* queue) {
+ iree_task_list_discard(&queue->list);
+ iree_slim_mutex_deinitialize(&queue->mutex);
+}
+
+bool iree_task_queue_is_empty(iree_task_queue_t* queue) {
+ iree_slim_mutex_lock(&queue->mutex);
+ bool is_empty = iree_task_list_is_empty(&queue->list);
+ iree_slim_mutex_unlock(&queue->mutex);
+ return is_empty;
+}
+
+void iree_task_queue_push_front(iree_task_queue_t* queue, iree_task_t* task) {
+ iree_slim_mutex_lock(&queue->mutex);
+ iree_task_list_push_front(&queue->list, task);
+ iree_slim_mutex_unlock(&queue->mutex);
+}
+
+void iree_task_queue_append_from_lifo_list_unsafe(iree_task_queue_t* queue,
+ iree_task_list_t* list) {
+ // NOTE: reversing the list outside of the lock.
+ iree_task_list_reverse(list);
+ iree_slim_mutex_lock(&queue->mutex);
+ iree_task_list_append(&queue->list, list);
+ iree_slim_mutex_unlock(&queue->mutex);
+}
+
+iree_task_t* iree_task_queue_flush_from_lifo_slist(
+ iree_task_queue_t* queue, iree_atomic_task_slist_t* source_slist) {
+ // Perform the flush and swap outside of the lock; acquiring the list is
+ // atomic and then we own it exclusively.
+ iree_task_list_t suffix;
+ iree_task_list_initialize(&suffix);
+ const bool did_flush = iree_atomic_task_slist_flush(
+ source_slist, IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_FIFO,
+ &suffix.head, &suffix.tail);
+
+ // Append the tasks and pop off the front for return.
+ iree_slim_mutex_lock(&queue->mutex);
+ if (did_flush) iree_task_list_append(&queue->list, &suffix);
+ iree_task_t* next_task = iree_task_list_pop_front(&queue->list);
+ iree_slim_mutex_unlock(&queue->mutex);
+
+ return next_task;
+}
+
+iree_task_t* iree_task_queue_pop_front(iree_task_queue_t* queue) {
+ iree_slim_mutex_lock(&queue->mutex);
+ iree_task_t* next_task = iree_task_list_pop_front(&queue->list);
+ iree_slim_mutex_unlock(&queue->mutex);
+ return next_task;
+}
+
+iree_task_t* iree_task_queue_try_steal(iree_task_queue_t* source_queue,
+ iree_task_queue_t* target_queue,
+ iree_host_size_t max_tasks) {
+ // First attempt to steal up to max_tasks from the source queue.
+ iree_task_list_t stolen_tasks;
+ iree_task_list_initialize(&stolen_tasks);
+ iree_slim_mutex_lock(&source_queue->mutex);
+ iree_task_list_split(&source_queue->list, max_tasks, &stolen_tasks);
+ iree_slim_mutex_unlock(&source_queue->mutex);
+
+ // Add any stolen tasks to the target queue and pop off the head for return.
+ iree_task_t* next_task = NULL;
+ if (!iree_task_list_is_empty(&stolen_tasks)) {
+ iree_slim_mutex_lock(&target_queue->mutex);
+ iree_task_list_append(&target_queue->list, &stolen_tasks);
+ next_task = iree_task_list_pop_front(&target_queue->list);
+ iree_slim_mutex_unlock(&target_queue->mutex);
+ }
+ return next_task;
+}
diff --git a/runtime/src/iree/task/queue.h b/runtime/src/iree/task/queue.h
new file mode 100644
index 0000000..917b872
--- /dev/null
+++ b/runtime/src/iree/task/queue.h
@@ -0,0 +1,166 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_QUEUE_H_
+#define IREE_TASK_QUEUE_H_
+
+#include <stdbool.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/task/list.h"
+#include "iree/task/task.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// A simple work-stealing LIFO queue modeled on a Chase-Lev concurrent deque.
+// This is used by workers to maintain their thread-local working lists. The
+// workers keep the tasks they will process in FIFO order. They allow it to
+// empty and then refresh it with more tasks from the incoming worker mailbox.
+// The performance bias here is to the workers as they are >90% of the
+// accesses and the only other accesses are thieves that hopefully we can just
+// improve our distribution to vs. introducing a slowdown here.
+//
+// A futex is used to synchronize access; because the common case is that of
+// only the worker that owns the queue touching it for pushing and popping items
+// this puts us into the sweet-spot of uncontended lightweight exclusive locks.
+// Since futices are effectively just single machine words managed with atomic
+// ops we can avoid a lot of the traditional atomic tomfoolery one finds in
+// systems like these that originated prior to the introduction of futices while
+// also keeping the tiny overhead of the pure atomic solutions.
+//
+// We can also take advantage of the futex providing an actual exclusive region
+// such that our data structure can be whatever we want as opposed to needing to
+// be something that someone had figured out how to make atomic. For example,
+// common implementations of work-stealing queues are all bounded as unbounded
+// atomic deques are an unsolved problem in CS.
+//
+// Very rarely when another worker runs out of work it'll try to steal tasks
+// from nearby workers and use this queue type to do it: the assumption is that
+// it's better to take the last task the victim worker will get to so that in a
+// long list of tasks it remains chugging through the head of the list with good
+// cache locality. If we end up with a lot of theft, though, it's possible for
+// the cache benefits of the pop_back approach to the worker to outweigh the
+// cache pessimism for all thieves. Let's hope we can schedule deterministic-
+// enough tiles such that theft is rare!
+//
+// Our queue variant here is tuned for the use case we have: we exclusively
+// push in multiple tasks at a time (flushed from the mailbox) and exclusively
+// pop a single task a time (what to work on next). The stealing part is batched
+// so that when a remote worker has to perform a theft it takes a good chunk of
+// tasks in one go (hopefully roughly half) to reduce the total overhead when
+// there is high imbalance in workloads.
+//
+// Flushing from the mailbox slist (LIFO) to our list (FIFO) requires a full
+// walk of the incoming task linked list. This is generally fine as the number
+// of tasks in any given flush is low(ish) and by walking in reverse order to
+// then process forward the cache should be hot as the worker starts making its
+// way back through the tasks. As we walk forward we'll be using the task fields
+// for execution and retiring of tasks (notifing dependencies/etc) and the
+// intrusive next pointer sitting next to those should be in-cache when we need
+// to access it. This, combined with slab allocation of tasks in command buffers
+// to begin with gives us the (probabilistically) same characteristics of a flat
+// array walked with an index as is common in other work queues but with the
+// flexibility to reorder tasks as we see fit (theft, redistribution/rotation,
+// reprioritization, etc).
+//
+// Similar concepts, though implemented with atomics:
+// "Dynamic Circular Work-Stealing Deque":
+// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.170.1097&rep=rep1&type=pdf
+// "Correct and Efficient Work-Stealing for Weak Memory Models":
+// https://fzn.fr/readings/ppopp13.pdf
+// Motivating article:
+// https://blog.molecular-matters.com/2015/08/24/job-system-2-0-lock-free-work-stealing-part-1-basics/
+//
+// Useful diagram from https://github.com/injinj/WSQ
+// Much of this implementation is inspired from that; though significant
+// reworking was required for our FIFO->LIFO->FIFO sandwich.
+// +--------+ <- tasks[0]
+// | top | <- stealers consume here: task = tasks[top++]
+// | |
+// | || |
+// | |
+// | vv |
+// | bottom | <- owner pushes here: tasks[bottom++] = task
+// | | owner consumes here: task = tasks[--bottom]
+// | |
+// +--------+ <- tasks[IREE_TASK_QUEUE_CAPACITY-1]
+//
+// Unlike that implementation, though, our task list is unbounded because we use
+// a linked list. To keep our options open, though, I've left the API of this
+// implementation compatible with classic atomic work-stealing queues. I'm
+// hopeful this will not need to be revisted for awhile, though!
+//
+// Future improvement idea: have the owner of the queue maintain a theft point
+// skip list that makes it possible for thieves to quickly come in and slice
+// off batches of tasks at the tail of the queue. Since we are a singly-linked
+// list we can't easily just walk backward and we don't want to be introducing
+// cache line contention as thieves start touching the same tasks as the worker
+// is while processing.
+typedef struct iree_task_queue_t {
+ // Must be held when manipulating the queue. >90% accesses are by the owner.
+ iree_slim_mutex_t mutex;
+
+ // FIFO task list.
+ iree_task_list_t list IREE_GUARDED_BY(mutex);
+} iree_task_queue_t;
+
+// Initializes a work-stealing task queue in-place.
+void iree_task_queue_initialize(iree_task_queue_t* out_queue);
+
+// Deinitializes a task queue and clears all references.
+// Must not be called while any other worker may be attempting to steal tasks.
+void iree_task_queue_deinitialize(iree_task_queue_t* queue);
+
+// Returns true if the queue is empty.
+// Note that due to races this may return both false-positives and -negatives.
+bool iree_task_queue_is_empty(iree_task_queue_t* queue);
+
+// Pushes a task to the front of the queue.
+// Always prefer the multi-push variants (prepend/append) when adding more than
+// one task to the queue. This is mostly useful for exceptional cases such as
+// when a task may yield and need to be reprocessed after the worker resumes.
+//
+// Must only be called from the owning worker's thread.
+void iree_task_queue_push_front(iree_task_queue_t* queue, iree_task_t* task);
+
+// Appends a LIFO |list| of tasks to the queue.
+//
+// Must only be called from the owning worker's thread.
+void iree_task_queue_append_from_lifo_list_unsafe(iree_task_queue_t* queue,
+ iree_task_list_t* list);
+
+// Flushes the |source_slist| LIFO mailbox into the task queue in FIFO order.
+// Returns the first task in the queue upon success; the task may be
+// pre-existing or from the newly flushed tasks.
+//
+// Must only be called from the owning worker's thread.
+iree_task_t* iree_task_queue_flush_from_lifo_slist(
+ iree_task_queue_t* queue, iree_atomic_task_slist_t* source_slist);
+
+// Pops a task from the front of the queue if any are available.
+//
+// Must only be called from the owning worker's thread.
+iree_task_t* iree_task_queue_pop_front(iree_task_queue_t* queue);
+
+// Tries to steal up to |max_tasks| from the back of the queue.
+// Returns NULL if no tasks are available and otherwise up to |max_tasks| tasks
+// that were at the tail of the |source_queue| will be moved to the
+// |target_queue| and the first of the stolen tasks is returned.
+//
+// It's expected this is not called from the queue's owning worker, though it's
+// valid to do so.
+iree_task_t* iree_task_queue_try_steal(iree_task_queue_t* source_queue,
+ iree_task_queue_t* target_queue,
+ iree_host_size_t max_tasks);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_QUEUE_H_
diff --git a/runtime/src/iree/task/queue_test.cc b/runtime/src/iree/task/queue_test.cc
new file mode 100644
index 0000000..53342fd
--- /dev/null
+++ b/runtime/src/iree/task/queue_test.cc
@@ -0,0 +1,322 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/queue.h"
+
+#include "iree/testing/gtest.h"
+
+namespace {
+
+TEST(QueueTest, Lifetime) {
+ iree_task_queue_t queue;
+ iree_task_queue_initialize(&queue);
+ iree_task_queue_deinitialize(&queue);
+}
+
+TEST(QueueTest, Empty) {
+ iree_task_queue_t queue;
+ iree_task_queue_initialize(&queue);
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+ EXPECT_FALSE(iree_task_queue_pop_front(&queue));
+ iree_task_queue_deinitialize(&queue);
+}
+
+TEST(QueueTest, PushPop) {
+ iree_task_queue_t queue;
+ iree_task_queue_initialize(&queue);
+
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+ EXPECT_FALSE(iree_task_queue_pop_front(&queue));
+
+ iree_task_t task_a = {0};
+ iree_task_queue_push_front(&queue, &task_a);
+
+ EXPECT_FALSE(iree_task_queue_is_empty(&queue));
+
+ iree_task_t task_b = {0};
+ iree_task_queue_push_front(&queue, &task_b);
+
+ EXPECT_FALSE(iree_task_queue_is_empty(&queue));
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue));
+
+ EXPECT_FALSE(iree_task_queue_is_empty(&queue));
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue));
+
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+ EXPECT_FALSE(iree_task_queue_pop_front(&queue));
+
+ iree_task_queue_deinitialize(&queue);
+}
+
+TEST(QueueTest, AppendListEmpty) {
+ iree_task_queue_t queue;
+ iree_task_queue_initialize(&queue);
+
+ iree_task_list_t list = {0};
+
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+ iree_task_queue_append_from_lifo_list_unsafe(&queue, &list);
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+
+ iree_task_queue_deinitialize(&queue);
+}
+
+TEST(QueueTest, AppendList1) {
+ iree_task_queue_t queue;
+ iree_task_queue_initialize(&queue);
+
+ iree_task_list_t list = {0};
+ iree_task_t task_a = {0};
+ iree_task_list_push_front(&list, &task_a);
+
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+ iree_task_queue_append_from_lifo_list_unsafe(&queue, &list);
+ EXPECT_FALSE(iree_task_queue_is_empty(&queue));
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue));
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+
+ iree_task_queue_deinitialize(&queue);
+}
+
+TEST(QueueTest, AppendListOrdered) {
+ iree_task_queue_t queue;
+ iree_task_queue_initialize(&queue);
+
+ // Make a lifo list: b<-a.
+ iree_task_list_t list = {0};
+ iree_task_t task_a = {0};
+ iree_task_list_push_front(&list, &task_a);
+ iree_task_t task_b = {0};
+ iree_task_list_push_front(&list, &task_b);
+
+ // Append the list to the queue; it should swap LIFO->FIFO.
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+ iree_task_queue_append_from_lifo_list_unsafe(&queue, &list);
+ EXPECT_FALSE(iree_task_queue_is_empty(&queue));
+ EXPECT_TRUE(iree_task_list_is_empty(&list));
+
+ // Pop list and ensure order: a->b.
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue));
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue));
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+
+ iree_task_queue_deinitialize(&queue);
+}
+
+TEST(QueueTest, FlushSlistEmpty) {
+ iree_task_queue_t queue;
+ iree_task_queue_initialize(&queue);
+
+ iree_atomic_task_slist_t slist;
+ iree_atomic_task_slist_initialize(&slist);
+
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+ EXPECT_FALSE(iree_task_queue_flush_from_lifo_slist(&queue, &slist));
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+
+ iree_atomic_task_slist_deinitialize(&slist);
+
+ iree_task_queue_deinitialize(&queue);
+}
+
+TEST(QueueTest, FlushSlist1) {
+ iree_task_queue_t queue;
+ iree_task_queue_initialize(&queue);
+
+ iree_atomic_task_slist_t slist;
+ iree_atomic_task_slist_initialize(&slist);
+ iree_task_t task_a = {0};
+ iree_atomic_task_slist_push(&slist, &task_a);
+
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+ EXPECT_EQ(&task_a, iree_task_queue_flush_from_lifo_slist(&queue, &slist));
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+
+ iree_atomic_task_slist_deinitialize(&slist);
+
+ iree_task_queue_deinitialize(&queue);
+}
+
+TEST(QueueTest, FlushSlistOrdered) {
+ iree_task_queue_t queue;
+ iree_task_queue_initialize(&queue);
+
+ // Make a lifo list: c<-b<-a.
+ iree_atomic_task_slist_t slist;
+ iree_atomic_task_slist_initialize(&slist);
+ iree_task_t task_a = {0};
+ iree_atomic_task_slist_push(&slist, &task_a);
+ iree_task_t task_b = {0};
+ iree_atomic_task_slist_push(&slist, &task_b);
+ iree_task_t task_c = {0};
+ iree_atomic_task_slist_push(&slist, &task_c);
+
+ // Flush the list to the queue; it should swap LIFO->FIFO and return the
+ // first task in the queue.
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+ EXPECT_EQ(&task_a, iree_task_queue_flush_from_lifo_slist(&queue, &slist));
+ EXPECT_FALSE(iree_task_queue_is_empty(&queue));
+
+ // Pop list and ensure order: [a->]b->c.
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue));
+ EXPECT_EQ(&task_c, iree_task_queue_pop_front(&queue));
+ EXPECT_TRUE(iree_task_queue_is_empty(&queue));
+
+ iree_atomic_task_slist_deinitialize(&slist);
+
+ iree_task_queue_deinitialize(&queue);
+}
+
+TEST(QueueTest, TryStealEmpty) {
+ iree_task_queue_t source_queue;
+ iree_task_queue_initialize(&source_queue);
+ iree_task_queue_t target_queue;
+ iree_task_queue_initialize(&target_queue);
+
+ iree_task_t task_a = {0};
+ iree_task_queue_push_front(&source_queue, &task_a);
+ iree_task_t task_b = {0};
+ iree_task_queue_push_front(&source_queue, &task_b);
+ iree_task_t task_c = {0};
+ iree_task_queue_push_front(&source_queue, &task_c);
+
+ EXPECT_EQ(&task_a,
+ iree_task_queue_try_steal(&source_queue, &target_queue, 1));
+
+ iree_task_queue_deinitialize(&source_queue);
+ iree_task_queue_deinitialize(&target_queue);
+}
+
+TEST(QueueTest, TryStealLast) {
+ iree_task_queue_t source_queue;
+ iree_task_queue_initialize(&source_queue);
+ iree_task_queue_t target_queue;
+ iree_task_queue_initialize(&target_queue);
+
+ iree_task_t task_a = {0};
+ iree_task_queue_push_front(&source_queue, &task_a);
+
+ EXPECT_EQ(&task_a,
+ iree_task_queue_try_steal(&source_queue, &target_queue, 100));
+ EXPECT_TRUE(iree_task_queue_is_empty(&target_queue));
+ EXPECT_TRUE(iree_task_queue_is_empty(&source_queue));
+
+ iree_task_queue_deinitialize(&source_queue);
+ iree_task_queue_deinitialize(&target_queue);
+}
+
+TEST(QueueTest, TrySteal1) {
+ iree_task_queue_t source_queue;
+ iree_task_queue_initialize(&source_queue);
+ iree_task_queue_t target_queue;
+ iree_task_queue_initialize(&target_queue);
+
+ iree_task_t task_a = {0};
+ iree_task_t task_b = {0};
+ iree_task_t task_c = {0};
+ iree_task_queue_push_front(&source_queue, &task_c);
+ iree_task_queue_push_front(&source_queue, &task_b);
+ iree_task_queue_push_front(&source_queue, &task_a);
+
+ EXPECT_EQ(&task_c,
+ iree_task_queue_try_steal(&source_queue, &target_queue, 1));
+ EXPECT_TRUE(iree_task_queue_is_empty(&target_queue));
+
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue));
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue));
+ EXPECT_TRUE(iree_task_queue_is_empty(&source_queue));
+
+ iree_task_queue_deinitialize(&source_queue);
+ iree_task_queue_deinitialize(&target_queue);
+}
+
+TEST(QueueTest, TryStealIntoExisting) {
+ iree_task_queue_t source_queue;
+ iree_task_queue_initialize(&source_queue);
+ iree_task_queue_t target_queue;
+ iree_task_queue_initialize(&target_queue);
+
+ iree_task_t task_a = {0};
+ iree_task_t task_b = {0};
+ iree_task_queue_push_front(&source_queue, &task_b);
+ iree_task_queue_push_front(&source_queue, &task_a);
+
+ iree_task_t task_existing = {0};
+ iree_task_queue_push_front(&target_queue, &task_existing);
+
+ EXPECT_EQ(&task_existing,
+ iree_task_queue_try_steal(&source_queue, &target_queue, 1));
+
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue));
+ EXPECT_TRUE(iree_task_queue_is_empty(&source_queue));
+
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&target_queue));
+ EXPECT_TRUE(iree_task_queue_is_empty(&target_queue));
+
+ iree_task_queue_deinitialize(&source_queue);
+ iree_task_queue_deinitialize(&target_queue);
+}
+
+TEST(QueueTest, TryStealMany) {
+ iree_task_queue_t source_queue;
+ iree_task_queue_initialize(&source_queue);
+ iree_task_queue_t target_queue;
+ iree_task_queue_initialize(&target_queue);
+
+ iree_task_t task_a = {0};
+ iree_task_t task_b = {0};
+ iree_task_t task_c = {0};
+ iree_task_t task_d = {0};
+ iree_task_queue_push_front(&source_queue, &task_d);
+ iree_task_queue_push_front(&source_queue, &task_c);
+ iree_task_queue_push_front(&source_queue, &task_b);
+ iree_task_queue_push_front(&source_queue, &task_a);
+
+ EXPECT_EQ(&task_c,
+ iree_task_queue_try_steal(&source_queue, &target_queue, 2));
+ EXPECT_EQ(&task_d, iree_task_queue_pop_front(&target_queue));
+ EXPECT_TRUE(iree_task_queue_is_empty(&target_queue));
+
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue));
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue));
+ EXPECT_TRUE(iree_task_queue_is_empty(&source_queue));
+
+ iree_task_queue_deinitialize(&source_queue);
+ iree_task_queue_deinitialize(&target_queue);
+}
+
+TEST(QueueTest, TryStealAll) {
+ iree_task_queue_t source_queue;
+ iree_task_queue_initialize(&source_queue);
+ iree_task_queue_t target_queue;
+ iree_task_queue_initialize(&target_queue);
+
+ iree_task_t task_a = {0};
+ iree_task_t task_b = {0};
+ iree_task_t task_c = {0};
+ iree_task_t task_d = {0};
+ iree_task_queue_push_front(&source_queue, &task_d);
+ iree_task_queue_push_front(&source_queue, &task_c);
+ iree_task_queue_push_front(&source_queue, &task_b);
+ iree_task_queue_push_front(&source_queue, &task_a);
+
+ EXPECT_EQ(&task_c,
+ iree_task_queue_try_steal(&source_queue, &target_queue, 1000));
+ EXPECT_EQ(&task_d, iree_task_queue_pop_front(&target_queue));
+ EXPECT_TRUE(iree_task_queue_is_empty(&target_queue));
+
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue));
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue));
+ EXPECT_TRUE(iree_task_queue_is_empty(&source_queue));
+
+ iree_task_queue_deinitialize(&source_queue);
+ iree_task_queue_deinitialize(&target_queue);
+}
+
+} // namespace
diff --git a/runtime/src/iree/task/scope.c b/runtime/src/iree/task/scope.c
new file mode 100644
index 0000000..ff0f34b
--- /dev/null
+++ b/runtime/src/iree/task/scope.c
@@ -0,0 +1,163 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/scope.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+
+void iree_task_scope_initialize(iree_string_view_t name,
+ iree_task_scope_t* out_scope) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ memset(out_scope, 0, sizeof(*out_scope));
+
+ iree_host_size_t name_length =
+ iree_min(name.size, IREE_ARRAYSIZE(out_scope->name) - 1);
+ memcpy(out_scope->name, name.data, name_length);
+ out_scope->name[name_length] = 0;
+
+ // TODO(benvanik): pick trace colors based on name hash.
+ IREE_TRACE(out_scope->task_trace_color = 0xFFFF0000u);
+
+ iree_slim_mutex_initialize(&out_scope->mutex);
+ iree_notification_initialize(&out_scope->idle_notification);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_task_scope_deinitialize(iree_task_scope_t* scope) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ IREE_ASSERT(
+ iree_task_scope_is_idle(scope),
+ "pending submissions must be aborted prior to deinitializing their "
+ "scope");
+
+ // Makes it easier to see if we were incorrectly using the name even after the
+ // scope is deinitialized. Since scopes may be stack allocated we don't want
+ // to have anyone trying to access them (like tracy).
+ memset(scope->name, 0xCD, sizeof(scope->name));
+
+ // In most cases the status will have been consumed by the scope owner.
+ iree_status_t status = (iree_status_t)iree_atomic_exchange_intptr(
+ &scope->permanent_status, (intptr_t)NULL, iree_memory_order_acquire);
+ IREE_IGNORE_ERROR(status);
+
+ iree_notification_deinitialize(&scope->idle_notification);
+ iree_slim_mutex_deinitialize(&scope->mutex);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+iree_string_view_t iree_task_scope_name(iree_task_scope_t* scope) {
+ return iree_make_cstring_view(scope->name);
+}
+
+iree_task_dispatch_statistics_t iree_task_scope_consume_statistics(
+ iree_task_scope_t* scope) {
+ iree_task_dispatch_statistics_t result = scope->dispatch_statistics;
+ memset(&scope->dispatch_statistics, 0, sizeof(scope->dispatch_statistics));
+ return result;
+}
+
+bool iree_task_scope_has_failed(iree_task_scope_t* scope) {
+ return iree_atomic_load_intptr(&scope->permanent_status,
+ iree_memory_order_seq_cst) != 0;
+}
+
+iree_status_t iree_task_scope_consume_status(iree_task_scope_t* scope) {
+ iree_status_t old_status = iree_ok_status();
+ iree_status_t new_status = iree_ok_status();
+ while (!iree_atomic_compare_exchange_strong_intptr(
+ &scope->permanent_status, (intptr_t*)&old_status, (intptr_t)new_status,
+ iree_memory_order_seq_cst, iree_memory_order_seq_cst)) {
+ // Previous status was not OK; we have it now though and can try again.
+ new_status = iree_status_from_code(iree_status_code(old_status));
+ }
+ return old_status;
+}
+
+static void iree_task_scope_try_set_status(iree_task_scope_t* scope,
+ iree_status_t new_status) {
+ if (IREE_UNLIKELY(iree_status_is_ok(new_status))) return;
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, "failed: ");
+ IREE_TRACE_ZONE_APPEND_TEXT(
+ z0, iree_status_code_string(iree_status_code(new_status)));
+
+ iree_status_t old_status = iree_ok_status();
+ if (!iree_atomic_compare_exchange_strong_intptr(
+ &scope->permanent_status, (intptr_t*)&old_status,
+ (intptr_t)new_status, iree_memory_order_seq_cst,
+ iree_memory_order_seq_cst)) {
+ // Previous status was not OK; drop our new status.
+ IREE_IGNORE_ERROR(new_status);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_task_scope_abort(iree_task_scope_t* scope) {
+ iree_status_t status =
+ iree_make_status(IREE_STATUS_ABORTED, "entire scope aborted by user");
+ iree_task_scope_try_set_status(scope, status);
+}
+
+void iree_task_scope_fail(iree_task_scope_t* scope, iree_status_t status) {
+ iree_task_scope_try_set_status(scope, status);
+}
+
+void iree_task_scope_begin(iree_task_scope_t* scope) {
+ iree_slim_mutex_lock(&scope->mutex);
+ ++scope->pending_submissions;
+ iree_slim_mutex_unlock(&scope->mutex);
+}
+
+void iree_task_scope_end(iree_task_scope_t* scope) {
+ iree_slim_mutex_lock(&scope->mutex);
+ bool signal = (--scope->pending_submissions == 0);
+ iree_slim_mutex_unlock(&scope->mutex);
+ if (signal) {
+ // All submissions have completed in this scope - notify any waiters.
+ iree_notification_post(&scope->idle_notification, IREE_ALL_WAITERS);
+ }
+}
+
+bool iree_task_scope_is_idle(iree_task_scope_t* scope) {
+ iree_slim_mutex_lock(&scope->mutex);
+ bool is_idle = scope->pending_submissions == 0;
+ iree_slim_mutex_unlock(&scope->mutex);
+ return is_idle;
+}
+
+iree_status_t iree_task_scope_wait_idle(iree_task_scope_t* scope,
+ iree_time_t deadline_ns) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_status_t status = iree_ok_status();
+ if (deadline_ns == IREE_TIME_INFINITE_PAST) {
+ // Polling for idle.
+ if (iree_task_scope_is_idle(scope)) {
+ status = iree_ok_status();
+ } else {
+ status = iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+ } else {
+ // Wait for the scope to enter the idle state.
+ if (!iree_notification_await(&scope->idle_notification,
+ (iree_condition_fn_t)iree_task_scope_is_idle,
+ scope, iree_make_deadline(deadline_ns))) {
+ status = iree_status_from_code(IREE_STATUS_DEADLINE_EXCEEDED);
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/task/scope.h b/runtime/src/iree/task/scope.h
new file mode 100644
index 0000000..2578f57
--- /dev/null
+++ b/runtime/src/iree/task/scope.h
@@ -0,0 +1,160 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_SCOPE_H_
+#define IREE_TASK_SCOPE_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/atomics.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/tracing.h"
+#include "iree/task/task.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// A loose way of grouping tasks within the task system.
+// Each scope represents a unique collection of tasks that have some related
+// properties - most often their producer - that need to carry along some
+// tracking information to act on all related tasks at once. They do not
+// indicate any particular ordering of tasks or how the tasks are to be treated
+// by executors.
+//
+// Scopes can be used to signal, propagate, and retrieve failure statuses. As
+// the executor processes tasks in an unordered fashion this is the only way to
+// perform cross-task operations such as "abort all of the tasks from this
+// producer" or "wait until all tasks from this producer finish." In addition
+// there are statistics that can be aggregated across all tasks attributed to
+// the scope that allows for an efficient roll-up of activity over specific
+// durations.
+//
+// Task producers can decide whether to create new scopes for each batch of
+// tasks they submit or reuse scopes for the lifetime of their subprocess. Scope
+// overhead is low and the only advantage of reusing them is that lifetime can
+// become easier to manage by tying them 1:1 with producers.
+//
+// Thread-safe; once created scopes are modified exclusively via atomic
+// operations.
+typedef struct iree_task_scope_t {
+ // Name used for logging and tracing.
+ char name[16];
+
+ // Base color used for tasks in this scope.
+ // The color will be modulated based on task type.
+ IREE_TRACE(uint32_t task_trace_color;)
+
+ // A permanent status code set when a task within the scope fails. All pending
+ // tasks will be aborted, though any in-flight tasks may continue executing
+ // to completion.
+ iree_atomic_intptr_t permanent_status;
+
+ // Dispatch statistics aggregated from all dispatches in this scope. Updated
+ // relatively infrequently and must not be used for task control as values
+ // are undefined in the case of failure and may tear.
+ iree_task_dispatch_statistics_t dispatch_statistics;
+
+ // A mutex used to guard the pending_submissions.
+ // We need a mutex here so that we can ensure proper ordering with respect to
+ // the pending_submissions changes and the idle_notification: if we were to
+ // decrement the pending_submissions to 0 ("going idle") there's a race that
+ // can happen where another thread may come in and observe that prior to the
+ // idle_notification being notified. If that thread happens to be destroying
+ // the scope then boom.
+ //
+ // Thankfully we insert fences fairly infrequently, the contention is low,
+ // and iree_slim_mutex_t is a futex so this isn't much more expensive than
+ // just having an atomic variable.
+ iree_slim_mutex_t mutex;
+
+ // A count of pending submissions within this scope. 0 indicates idle.
+ // Each submission has a fence that references this value and decrements it
+ // as it is reached indicating that all memory used by all tasks within that
+ // submission is available for reuse.
+ uint32_t pending_submissions;
+
+ // A notification signaled when the scope transitions to having no pending
+ // tasks or completes all pending tasks after a failure.
+ iree_notification_t idle_notification;
+} iree_task_scope_t;
+
+// Initializes a caller-allocated scope.
+// Callers must ensure the scope remains live for as long as there are any
+// tasks that may reference it.
+void iree_task_scope_initialize(iree_string_view_t name,
+ iree_task_scope_t* out_scope);
+
+// Deinitializes an task scope.
+// No tasks may be pending and the scope must be idle.
+void iree_task_scope_deinitialize(iree_task_scope_t* scope);
+
+// Returns the name of the scope. Informational only and may be the empty
+// string.
+iree_string_view_t iree_task_scope_name(iree_task_scope_t* scope);
+
+// Returns and resets the statistics for the scope.
+// Statistics may experience tearing (non-atomic update across fields) if this
+// is performed while tasks are in-flight.
+iree_task_dispatch_statistics_t iree_task_scope_consume_statistics(
+ iree_task_scope_t* scope);
+
+// Returns true if the scope has failed.
+// iree_task_scope_consume_status can be used once to get the full status
+// describing the failure and subsequent calls will return the status code.
+bool iree_task_scope_has_failed(iree_task_scope_t* scope);
+
+// Returns the permanent scope failure status to the caller (transfering
+// ownership). The scope will remain in a failed state with the status code.
+iree_status_t iree_task_scope_consume_status(iree_task_scope_t* scope);
+
+// Marks the scope as having been aborted by the user with IREE_STATUS_ABORTED.
+// All pending tasks will be dropped though in-flight tasks may complete
+// execution. Callers must use iree_task_scope_wait_idle to ensure the scope
+// state synchronizes prior to deinitializing. If the scope has already been
+// aborted or failed with a permanent error then the operation is ignored and
+// the previous error status is preserved.
+void iree_task_scope_abort(iree_task_scope_t* scope);
+
+// Marks the scope as having encountered an error while processing a task.
+// The scope will be moved into a permanent failure state and all pending tasks
+// will be aborted. In-flight tasks may continue executing prior to
+// iree_task_scope_wait_idle returning true. If the scope has already been
+// marked as failing then the status is ignored.
+void iree_task_scope_fail(iree_task_scope_t* scope, iree_status_t status);
+
+// Notifies the scope that a new execution task assigned to the scope has begun.
+// The scope is considered active until it is notified execution has completed
+// with iree_task_scope_end.
+void iree_task_scope_begin(iree_task_scope_t* scope);
+
+// Notifies the scope that a previously begun execution task has completed.
+void iree_task_scope_end(iree_task_scope_t* scope);
+
+// Returns true if the scope has no pending or in-flight tasks.
+//
+// May race with other threads enqueuing work and be out of date immediately
+// upon return; callers are expected to use this only when it is safe.
+bool iree_task_scope_is_idle(iree_task_scope_t* scope);
+
+// Waits for the scope to become idle indicating that all pending and in-flight
+// tasks have completed. If the scope is aborted or marked for permanent failure
+// then the wait will only return after it is guaranteed no more tasks will ever
+// be issued by the task system.
+//
+// May race with other threads enqueuing work and be out of date immediately
+// upon return; callers must ensure this is used for command and control
+// decisions only when no other threads may be enqueuing more work.
+iree_status_t iree_task_scope_wait_idle(iree_task_scope_t* scope,
+ iree_time_t deadline_ns);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_SCOPE_H_
diff --git a/runtime/src/iree/task/scope_test.cc b/runtime/src/iree/task/scope_test.cc
new file mode 100644
index 0000000..72befd4
--- /dev/null
+++ b/runtime/src/iree/task/scope_test.cc
@@ -0,0 +1,248 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/scope.h"
+
+#include <chrono>
+#include <thread>
+
+#include "iree/task/submission.h"
+#include "iree/task/task_impl.h"
+#include "iree/testing/gtest.h"
+
+namespace {
+
+TEST(ScopeTest, Lifetime) {
+ iree_task_scope_t scope;
+ iree_task_scope_initialize(iree_make_cstring_view("scope_a"), &scope);
+ EXPECT_TRUE(iree_task_scope_is_idle(&scope));
+ iree_task_scope_deinitialize(&scope);
+}
+
+// NOTE: the exact capacity (and whether we store the name at all) is an
+// implementation detail.
+TEST(ScopeTest, LongNameTruncation) {
+ iree_task_scope_t scope;
+ iree_task_scope_initialize(iree_make_cstring_view("01234567890123456789"),
+ &scope);
+ EXPECT_TRUE(iree_string_view_equal(iree_make_cstring_view("012345678901234"),
+ iree_task_scope_name(&scope)));
+ iree_task_scope_deinitialize(&scope);
+}
+
+TEST(ScopeTest, AbortEmpty) {
+ iree_task_scope_t scope;
+ iree_task_scope_initialize(iree_make_cstring_view("scope_a"), &scope);
+
+ // Current state is OK.
+ EXPECT_TRUE(iree_task_scope_is_idle(&scope));
+ EXPECT_TRUE(iree_status_is_ok(iree_task_scope_consume_status(&scope)));
+
+ // Enter aborted state.
+ iree_task_scope_abort(&scope);
+ iree_status_t consumed_status = iree_task_scope_consume_status(&scope);
+ EXPECT_TRUE(iree_status_is_aborted(consumed_status));
+ iree_status_ignore(consumed_status);
+
+ // Ensure aborted state is sticky.
+ EXPECT_TRUE(iree_status_is_aborted(iree_task_scope_consume_status(&scope)));
+
+ iree_task_scope_deinitialize(&scope);
+}
+
+TEST(ScopeTest, FailEmpty) {
+ iree_task_scope_t scope;
+ iree_task_scope_initialize(iree_make_cstring_view("scope_a"), &scope);
+
+ // Current state is OK.
+ EXPECT_TRUE(iree_task_scope_is_idle(&scope));
+ EXPECT_TRUE(iree_status_is_ok(iree_task_scope_consume_status(&scope)));
+
+ // Enter failure state.
+ iree_task_t failed_task = {0};
+ failed_task.scope = &scope;
+ iree_task_scope_fail(&scope,
+ iree_make_status(IREE_STATUS_DATA_LOSS, "whoops!"));
+ iree_status_t consumed_status = iree_task_scope_consume_status(&scope);
+ EXPECT_TRUE(iree_status_is_data_loss(consumed_status));
+ iree_status_ignore(consumed_status);
+
+ // Ensure failure state is sticky.
+ EXPECT_TRUE(iree_status_is_data_loss(iree_task_scope_consume_status(&scope)));
+
+ iree_task_scope_deinitialize(&scope);
+}
+
+// NOTE: only the first failure is recorded and made sticky; subsequent failure
+// calls are ignored.
+TEST(ScopeTest, FailAgain) {
+ iree_task_scope_t scope;
+ iree_task_scope_initialize(iree_make_cstring_view("scope_a"), &scope);
+
+ // Current state is OK.
+ EXPECT_TRUE(iree_task_scope_is_idle(&scope));
+ EXPECT_TRUE(iree_status_is_ok(iree_task_scope_consume_status(&scope)));
+
+ // Enter initial failure state.
+ iree_task_t failed_task_a = {0};
+ failed_task_a.scope = &scope;
+ iree_task_scope_fail(&scope,
+ iree_make_status(IREE_STATUS_DATA_LOSS, "whoops 1"));
+ iree_status_t consumed_status_a = iree_task_scope_consume_status(&scope);
+ EXPECT_TRUE(iree_status_is_data_loss(consumed_status_a));
+ iree_status_ignore(consumed_status_a);
+
+ // Ensure failure s tate is sticky.
+ EXPECT_TRUE(iree_status_is_data_loss(iree_task_scope_consume_status(&scope)));
+
+ // Try failing again - it should be ignored and correctly iree_status_free'd.
+ iree_task_t failed_task_b = {0};
+ failed_task_b.scope = &scope;
+ iree_task_scope_fail(
+ &scope, iree_make_status(IREE_STATUS_FAILED_PRECONDITION, "whoops 2"));
+ iree_status_t consumed_status_b = iree_task_scope_consume_status(&scope);
+ EXPECT_TRUE(iree_status_is_data_loss(consumed_status_b));
+ iree_status_ignore(consumed_status_b);
+
+ // Still the first failure status.
+ EXPECT_TRUE(iree_status_is_data_loss(iree_task_scope_consume_status(&scope)));
+
+ iree_task_scope_deinitialize(&scope);
+}
+
+TEST(ScopeTest, WaitIdleWhenIdle) {
+ iree_task_scope_t scope;
+ iree_task_scope_initialize(iree_make_cstring_view("scope_a"), &scope);
+
+ // Current state is OK and idle.
+ EXPECT_TRUE(iree_task_scope_is_idle(&scope));
+ EXPECT_TRUE(iree_status_is_ok(iree_task_scope_consume_status(&scope)));
+
+ // Wait until idle... which is now.
+ EXPECT_TRUE(iree_status_is_ok(
+ iree_task_scope_wait_idle(&scope, IREE_TIME_INFINITE_FUTURE)));
+ EXPECT_TRUE(iree_task_scope_is_idle(&scope));
+
+ iree_task_scope_deinitialize(&scope);
+}
+
+TEST(ScopeTest, WaitIdleDeadlineExceeded) {
+ iree_task_scope_t scope;
+ iree_task_scope_initialize(iree_make_cstring_view("scope_a"), &scope);
+
+ // Current state is OK and idle.
+ EXPECT_TRUE(iree_task_scope_is_idle(&scope));
+ EXPECT_TRUE(iree_status_is_ok(iree_task_scope_consume_status(&scope)));
+
+ // Enqueue a task to the scope so it is no longer idle.
+ iree_task_fence_t fence_task;
+ iree_task_fence_initialize(&scope, iree_wait_primitive_immediate(),
+ &fence_task);
+ EXPECT_FALSE(iree_task_scope_is_idle(&scope));
+
+ // Poll, which should fail immediately because we have the outstanding task.
+ iree_status_t wait_status =
+ iree_task_scope_wait_idle(&scope, IREE_TIME_INFINITE_PAST);
+ EXPECT_TRUE(iree_status_is_deadline_exceeded(wait_status));
+ EXPECT_FALSE(iree_task_scope_is_idle(&scope));
+
+ // Complete the task (required as part of the scope contract).
+ iree_task_submission_t pending_submission;
+ iree_task_submission_initialize(&pending_submission);
+ iree_task_fence_retire(&fence_task, &pending_submission);
+ EXPECT_TRUE(iree_task_submission_is_empty(&pending_submission));
+
+ iree_task_scope_deinitialize(&scope);
+}
+
+TEST(ScopeTest, WaitIdleSuccess) {
+ iree_task_scope_t scope;
+ iree_task_scope_initialize(iree_make_cstring_view("scope_a"), &scope);
+
+ // Current state is OK and idle.
+ EXPECT_TRUE(iree_task_scope_is_idle(&scope));
+ EXPECT_TRUE(iree_status_is_ok(iree_task_scope_consume_status(&scope)));
+
+ // Enqueue a task to the scope so it is no longer idle.
+ iree_task_fence_t fence_task;
+ iree_task_fence_initialize(&scope, iree_wait_primitive_immediate(),
+ &fence_task);
+ EXPECT_FALSE(iree_task_scope_is_idle(&scope));
+
+ // Spin up a thread to wait on the scope.
+ std::thread wait_thread([&]() {
+ EXPECT_FALSE(iree_task_scope_is_idle(&scope));
+ EXPECT_TRUE(iree_status_is_ok(
+ iree_task_scope_wait_idle(&scope, IREE_TIME_INFINITE_FUTURE)));
+ EXPECT_TRUE(iree_task_scope_is_idle(&scope));
+ });
+
+ // Wait a moment for the thread to spin up.
+ // NOTE: this may flake. Need to see if there's a better way to do this.
+ std::this_thread::sleep_for(std::chrono::milliseconds(150));
+
+ // Complete the task.
+ iree_task_submission_t pending_submission;
+ iree_task_submission_initialize(&pending_submission);
+ iree_task_fence_retire(&fence_task, &pending_submission);
+ EXPECT_TRUE(iree_task_submission_is_empty(&pending_submission));
+ EXPECT_TRUE(iree_task_scope_is_idle(&scope));
+
+ // Join with the thread - this will hang if it didn't wake correctly.
+ wait_thread.join();
+
+ iree_task_scope_deinitialize(&scope);
+}
+
+TEST(ScopeTest, WaitIdleFailure) {
+ iree_task_scope_t scope;
+ iree_task_scope_initialize(iree_make_cstring_view("scope_a"), &scope);
+
+ // Current state is OK and idle.
+ EXPECT_TRUE(iree_task_scope_is_idle(&scope));
+ EXPECT_TRUE(iree_status_is_ok(iree_task_scope_consume_status(&scope)));
+
+ // Enqueue a task to the scope so it is no longer idle.
+ iree_task_fence_t fence_task;
+ iree_task_fence_initialize(&scope, iree_wait_primitive_immediate(),
+ &fence_task);
+ EXPECT_FALSE(iree_task_scope_is_idle(&scope));
+
+ // Spin up a thread to wait on the scope.
+ std::thread wait_thread([&]() {
+ EXPECT_FALSE(iree_task_scope_is_idle(&scope));
+ EXPECT_TRUE(iree_status_is_ok(
+ iree_task_scope_wait_idle(&scope, IREE_TIME_INFINITE_FUTURE)));
+ EXPECT_TRUE(iree_task_scope_is_idle(&scope));
+ });
+
+ // Wait a moment for the thread to spin up.
+ // NOTE: this may flake. Need to see if there's a better way to do this.
+ std::this_thread::sleep_for(std::chrono::milliseconds(150));
+
+ // Set the failure state.
+ iree_task_scope_fail(
+ &scope, iree_make_status(IREE_STATUS_FAILED_PRECONDITION, "whoops"));
+ EXPECT_FALSE(iree_task_scope_is_idle(&scope));
+
+ // Complete the task.
+ // Note that even if a scope fails we still must complete the tasks so it
+ // becomes idle. This ensures that if the scope state is used to control
+ // deallocation we don't go deallocating the tasks still in flight and waiting
+ // to gracefully fail.
+ iree_task_submission_t pending_submission;
+ iree_task_submission_initialize(&pending_submission);
+ iree_task_fence_retire(&fence_task, &pending_submission);
+ EXPECT_TRUE(iree_task_submission_is_empty(&pending_submission));
+ EXPECT_TRUE(iree_task_scope_is_idle(&scope));
+
+ // Join with the thread - this will hang if it didn't wake correctly.
+ wait_thread.join();
+
+ iree_task_scope_deinitialize(&scope);
+}
+
+} // namespace
diff --git a/runtime/src/iree/task/submission.c b/runtime/src/iree/task/submission.c
new file mode 100644
index 0000000..0e8f2d6
--- /dev/null
+++ b/runtime/src/iree/task/submission.c
@@ -0,0 +1,71 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/submission.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+
+void iree_task_submission_initialize(iree_task_submission_t* out_submission) {
+ iree_task_list_initialize(&out_submission->ready_list);
+ iree_task_list_initialize(&out_submission->waiting_list);
+}
+
+void iree_task_submission_initialize_from_lifo_slist(
+ iree_atomic_task_slist_t* ready_slist,
+ iree_task_submission_t* out_submission) {
+ // Flush from the LIFO ready list to the LIFO submission queue.
+ // We have to walk everything here to get the tail pointer, which could be
+ // improved by sourcing from something other than an slist.
+ iree_task_submission_initialize(out_submission);
+ iree_atomic_task_slist_flush(
+ ready_slist, IREE_ATOMIC_SLIST_FLUSH_ORDER_APPROXIMATE_LIFO,
+ &out_submission->ready_list.head, &out_submission->ready_list.tail);
+}
+
+void iree_task_submission_reset(iree_task_submission_t* submission) {
+ memset(&submission->ready_list, 0, sizeof(submission->ready_list));
+ memset(&submission->waiting_list, 0, sizeof(submission->waiting_list));
+}
+
+void iree_task_submission_discard(iree_task_submission_t* submission) {
+ iree_task_list_discard(&submission->ready_list);
+ iree_task_list_discard(&submission->waiting_list);
+}
+
+bool iree_task_submission_is_empty(iree_task_submission_t* submission) {
+ return iree_task_list_is_empty(&submission->ready_list) &&
+ iree_task_list_is_empty(&submission->waiting_list);
+}
+
+void iree_task_submission_enqueue(iree_task_submission_t* submission,
+ iree_task_t* task) {
+ IREE_ASSERT_TRUE(iree_task_is_ready(task),
+ "must be a root task to be enqueued on a submission");
+ if (task->type == IREE_TASK_TYPE_WAIT &&
+ (task->flags & IREE_TASK_FLAG_WAIT_COMPLETED) == 0) {
+ // A wait that we know is unresolved and can immediately route to the
+ // waiting list. This avoids the need to try to schedule the wait when it's
+ // almost certain that the wait would not be satisfied.
+ iree_task_list_push_front(&submission->waiting_list, task);
+ } else {
+ // Task is ready to execute immediately.
+ iree_task_list_push_front(&submission->ready_list, task);
+ }
+}
+
+void iree_task_submission_enqueue_list(iree_task_submission_t* submission,
+ iree_task_list_t* list) {
+ iree_task_t* task = list->head;
+ list->head = list->tail = NULL;
+ while (task) {
+ iree_task_t* next = task->next_task;
+ iree_task_submission_enqueue(submission, task);
+ task = next;
+ }
+}
diff --git a/runtime/src/iree/task/submission.h b/runtime/src/iree/task/submission.h
new file mode 100644
index 0000000..9315dc6
--- /dev/null
+++ b/runtime/src/iree/task/submission.h
@@ -0,0 +1,103 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_SUBMISSION_H_
+#define IREE_TASK_SUBMISSION_H_
+
+#include <stdbool.h>
+
+#include "iree/base/api.h"
+#include "iree/task/list.h"
+#include "iree/task/task.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// A pending submission to a task queue made up of a DAG of tasks.
+// Tasks are executed when ready in the order they were enqueued while observing
+// all dependencies. This means that two tasks that have no dependencies may
+// execute out of order/overlap.
+//
+// By keeping track of which tasks are ready for execution (ready_list) upon
+// submission to a queue we avoid the need to walk the task list again and
+// instead only touch the waiting tasks during construction and as they are made
+// ready, avoiding needless work and cache thrashing.
+//
+// Waiting tasks (waiting_list) are those waiting on external dependencies such
+// as file descriptor wait handles. Because we track all of these the executor
+// can perform an efficient multi-wait across queues without needing to block
+// (or even check) every waiting task individually.
+//
+// Because we only track roots of the DAG to release all tasks in a submission
+// early (due to failure or shutdown) the DAG must be walked. Releasing just the
+// lists will only handle the roots and leave all the rest of the tasks
+// dangling.
+//
+// Thread-compatible; designed to be used from a single thread producing the
+// submission.
+typedef struct iree_task_submission_t {
+ // List of tasks that are ready for execution immediately. Upon submission to
+ // a queue the tasks will be passed on to the executor with no delay.
+ //
+ // Tasks are stored in LIFO order; this allows us to quickly concat them with
+ // incoming/mailbox slists that are naturally in LIFO order and that may
+ // contain tasks from prior submissions. Note that we are representing a
+ // ready list - meaning that all tasks are able to start simultaneously (in
+ // the best case where tasks <= workers); this means that the ordering
+ // requirements here are purely for performance and ease of debugging. In
+ // cases where tasks >> workers we could also see some benefits from the
+ // eventual FIFO order matching how the tasks were allocated.
+ iree_task_list_t ready_list;
+
+ // List of tasks that are waiting for execution on external dependencies.
+ // These are root tasks that have no internal task dependencies.
+ // Order is not important here; the assumption is that all waiting tasks are
+ // more of a set than an ordered list and that they can all be waited on as a
+ // multi-wait-any.
+ iree_task_list_t waiting_list;
+} iree_task_submission_t;
+
+// Initializes a task submission.
+void iree_task_submission_initialize(iree_task_submission_t* out_submission);
+
+// Flushes the given |ready_slist| and initializes the submission with all tasks
+// to the submission in LIFO order. All tasks in |ready_slist| are assumed to be
+// ready for execution immediately.
+void iree_task_submission_initialize_from_lifo_slist(
+ iree_atomic_task_slist_t* ready_slist,
+ iree_task_submission_t* out_submission);
+
+// Resets the submission by dropping the list references.
+void iree_task_submission_reset(iree_task_submission_t* submission);
+
+// Discards all pending tasks in the submission. This is only safe to call if
+// the submission has not yet been submitted to a queue for execution and should
+// be used for failure cleanup during submission construction.
+void iree_task_submission_discard(iree_task_submission_t* submission);
+
+// Returns true if the submission has no tasks.
+bool iree_task_submission_is_empty(iree_task_submission_t* submission);
+
+// Enqueues |task| to the pending |submission|.
+// The task will be checked to see whether it is immediately ready to execute
+// and placed in an appropriate list; all dependencies must be declared prior to
+// calling this method. After returning new tasks that depend on this task may
+// still be defined. The submission takes ownership of the |task|.
+void iree_task_submission_enqueue(iree_task_submission_t* submission,
+ iree_task_t* task);
+
+// Enqueues all tasks in |list| to the pending |submission|.
+// Ownership of the tasks transfers to the submission and the |list| will be
+// reset upon return. Ready tasks may execute in any order.
+void iree_task_submission_enqueue_list(iree_task_submission_t* submission,
+ iree_task_list_t* list);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_SUBMISSION_H_
diff --git a/runtime/src/iree/task/task.c b/runtime/src/iree/task/task.c
new file mode 100644
index 0000000..4f3593d
--- /dev/null
+++ b/runtime/src/iree/task/task.c
@@ -0,0 +1,826 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/task.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+#include "iree/task/list.h"
+#include "iree/task/pool.h"
+#include "iree/task/post_batch.h"
+#include "iree/task/scope.h"
+#include "iree/task/submission.h"
+#include "iree/task/task_impl.h"
+#include "iree/task/tuning.h"
+
+//==============================================================================
+// Task bookkeeping
+//==============================================================================
+
+void iree_task_initialize(iree_task_type_t type, iree_task_scope_t* scope,
+ iree_task_t* out_task) {
+ // NOTE: only clears the header, not the task body.
+ memset(out_task, 0, sizeof(*out_task));
+ out_task->scope = scope;
+ out_task->affinity_set = iree_task_affinity_for_any_worker();
+ out_task->type = type;
+}
+
+void iree_task_set_cleanup_fn(iree_task_t* task,
+ iree_task_cleanup_fn_t cleanup_fn) {
+ task->cleanup_fn = cleanup_fn;
+}
+
+void iree_task_set_completion_task(iree_task_t* task,
+ iree_task_t* completion_task) {
+ IREE_ASSERT(!task->completion_task);
+ task->completion_task = completion_task;
+ iree_atomic_fetch_add_int32(&completion_task->pending_dependency_count, 1,
+ iree_memory_order_seq_cst);
+}
+
+bool iree_task_is_ready(iree_task_t* task) {
+ if (iree_atomic_load_int32(&task->pending_dependency_count,
+ iree_memory_order_relaxed) > 0) {
+ // At least one dependency is still pending.
+ return false;
+ }
+ return true;
+}
+
+static void iree_task_try_set_status(iree_atomic_intptr_t* permanent_status,
+ iree_status_t new_status) {
+ if (IREE_UNLIKELY(iree_status_is_ok(new_status))) return;
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, "failed: ");
+ IREE_TRACE_ZONE_APPEND_TEXT(
+ z0, iree_status_code_string(iree_status_code(new_status)));
+
+ iree_status_t old_status = iree_ok_status();
+ if (!iree_atomic_compare_exchange_strong_intptr(
+ permanent_status, (intptr_t*)&old_status, (intptr_t)new_status,
+ iree_memory_order_seq_cst, iree_memory_order_seq_cst)) {
+ // Previous status was not OK; drop our new status.
+ IREE_IGNORE_ERROR(new_status);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static void iree_task_cleanup(iree_task_t* task,
+ iree_status_code_t status_code) {
+ // Call the (optional) cleanup function.
+ // NOTE: this may free the memory of the task itself!
+ iree_task_pool_t* pool = task->pool;
+ iree_task_cleanup_fn_t cleanup_fn = task->cleanup_fn;
+ if (cleanup_fn) {
+ cleanup_fn(task, status_code);
+ }
+
+ // Return the task to the pool it was allocated from.
+ // Some tasks are allocated as part of arenas/ringbuffers and won't have a
+ // pool as they'll be cleaned up as part of a larger operation.
+ if (pool) {
+ iree_task_pool_release(pool, task);
+ }
+}
+
+static void iree_task_barrier_discard(iree_task_barrier_t* task,
+ iree_task_list_t* discard_worklist);
+
+void iree_task_discard(iree_task_t* task, iree_task_list_t* discard_worklist) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // This models a BFS discard in our non-recursive approach.
+ // We must ensure that we only discard each task once and that we discard the
+ // tasks in the appropriate order: if we had a DAG of A -> B, C -> D we must
+ // discard respecting the same topological ordering.
+
+ IREE_ASSERT_EQ(0, iree_atomic_load_int32(&task->pending_dependency_count,
+ iree_memory_order_acquire));
+
+ // Almost all tasks will have a completion task; some may have additional
+ // dependent tasks (like barriers) that will be handled below.
+ const bool completion_task_ready =
+ task->completion_task &&
+ iree_atomic_fetch_sub_int32(
+ &task->completion_task->pending_dependency_count, 1,
+ iree_memory_order_acq_rel) == 1;
+ if (completion_task_ready) {
+ iree_task_list_push_back(discard_worklist, task->completion_task);
+ }
+
+ iree_task_scope_t* end_scope = NULL;
+ switch (task->type) {
+ default:
+ case IREE_TASK_TYPE_NOP:
+ case IREE_TASK_TYPE_CALL:
+ break;
+ case IREE_TASK_TYPE_BARRIER:
+ iree_task_barrier_discard((iree_task_barrier_t*)task, discard_worklist);
+ break;
+ case IREE_TASK_TYPE_FENCE:
+ end_scope = task->scope; // need to clean up the task first
+ break;
+ case IREE_TASK_TYPE_WAIT:
+ case IREE_TASK_TYPE_DISPATCH:
+ break;
+ }
+
+ iree_task_cleanup(task, IREE_STATUS_ABORTED);
+ // NOTE: task is invalidated here and cannot be used!
+ task = NULL;
+
+ if (end_scope) {
+ iree_task_scope_end(end_scope);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static void iree_task_retire(iree_task_t* task,
+ iree_task_submission_t* pending_submission,
+ iree_status_t status) {
+ IREE_ASSERT_EQ(0, iree_atomic_load_int32(&task->pending_dependency_count,
+ iree_memory_order_acquire));
+
+ // Decrement the pending count on the completion task, if any.
+ iree_task_t* completion_task = task->completion_task;
+ task->completion_task = NULL;
+
+ if (iree_status_is_ok(status)) {
+ // Task completed successfully.
+ iree_task_cleanup(task, IREE_STATUS_OK);
+ bool completion_task_ready =
+ completion_task &&
+ iree_atomic_fetch_sub_int32(&completion_task->pending_dependency_count,
+ 1, iree_memory_order_acq_rel) == 1;
+ if (completion_task_ready) {
+ // This was the last pending dependency and the completion task is ready
+ // to run.
+ iree_task_submission_enqueue(pending_submission, completion_task);
+ }
+ } else {
+ // Task failed: notify the scope.
+ iree_task_scope_t* scope = task->scope;
+ iree_task_scope_fail(scope, status);
+ status = iree_ok_status(); // consumed by the fail
+
+ // We need to carefully clean up the task: if we go discarding fences we'll
+ // end up waking waiters before we're done. To ensure this doesn't happen
+ // we retain the scope until we've finished cleaning things up.
+ iree_task_scope_begin(scope);
+ iree_task_cleanup(task, IREE_STATUS_ABORTED);
+
+ bool completion_task_ready =
+ completion_task &&
+ iree_atomic_fetch_sub_int32(&completion_task->pending_dependency_count,
+ 1, iree_memory_order_acq_rel) == 1;
+ if (completion_task_ready) {
+ // This was the last pending dependency and we know that we can safely
+ // abort the completion task by discarding.
+ iree_task_list_t discard_worklist;
+ iree_task_list_initialize(&discard_worklist);
+ iree_task_discard(completion_task, &discard_worklist);
+ iree_task_list_discard(&discard_worklist);
+ } else if (completion_task) {
+ // One or more pending dependencies are not yet satisfied and the
+ // completion task must stay alive. We can mark it as aborted, though,
+ // so that it knows not to execute when it is ready to run.
+ // TODO(benvanik): make this atomic? we only ever add bits and it's safe
+ // for it to run if we got this far.
+ completion_task->flags |= IREE_TASK_FLAG_ABORTED;
+ }
+
+ // Unlock the scope; it may immediately be freed before this returns!
+ iree_task_scope_end(scope);
+ }
+
+ // NOTE: task is invalidated here and cannot be used!
+ task = NULL;
+}
+
+//==============================================================================
+// IREE_TASK_TYPE_NOP
+//==============================================================================
+
+void iree_task_nop_initialize(iree_task_scope_t* scope,
+ iree_task_nop_t* out_task) {
+ iree_task_initialize(IREE_TASK_TYPE_NOP, scope, &out_task->header);
+}
+
+void iree_task_nop_retire(iree_task_nop_t* task,
+ iree_task_submission_t* pending_submission) {
+ iree_task_retire(&task->header, pending_submission, iree_ok_status());
+}
+
+//==============================================================================
+// IREE_TASK_TYPE_CALL
+//==============================================================================
+
+// Returns an XXBBGGRR color (red in the lowest bits).
+// Must not be 0 (tracy will ignore).
+static uint32_t iree_math_ptr_to_xrgb(const void* ptr) {
+ // This is just a simple hack to give us a unique(ish) per-pointer color.
+ // It's only to make it easier to distinguish which tiles are from the same
+ // dispatch.
+ uint64_t ptr64 = (uintptr_t)ptr;
+ return (uint32_t)ptr64 ^ (uint32_t)(ptr64 >> 32);
+}
+
+void iree_task_call_initialize(iree_task_scope_t* scope,
+ iree_task_call_closure_t closure,
+ iree_task_call_t* out_task) {
+ iree_task_initialize(IREE_TASK_TYPE_CALL, scope, &out_task->header);
+ out_task->closure = closure;
+ iree_atomic_store_intptr(&out_task->status, 0, iree_memory_order_release);
+}
+
+void iree_task_call_execute(iree_task_call_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_SET_COLOR(z0,
+ iree_math_ptr_to_xrgb(task->closure.user_context));
+
+ if (IREE_LIKELY(
+ !iree_any_bit_set(task->header.flags, IREE_TASK_FLAG_ABORTED))) {
+ // Execute the user callback.
+ // Note that this may enqueue more nested tasks, including tasks that
+ // prevent this task from retiring.
+ iree_status_t status = task->closure.fn(task->closure.user_context,
+ &task->header, pending_submission);
+ if (!iree_status_is_ok(status)) {
+ // Stash the failure status on the task.
+ // If there's still pending dependencies we won't be able to discard
+ // immediately and need to keep the status around until they all complete.
+ iree_task_try_set_status(&task->status, status);
+ status = iree_ok_status(); // consumed by try_set_status
+
+ // TODO(benvanik): discard pending_submission? As we may have pending work
+ // from multiple scopes it's dangerous to discard all. We could filter
+ // based on scope, though, and if we did that we (probably) wouldn't need
+ // to handle the permanent status on the task and could discard
+ // immediately.
+ }
+ }
+
+ // Check to see if there are no pending dependencies before retiring; the
+ // dependency count can go up if new nested tasks were enqueued.
+ if (iree_atomic_load_int32(&task->header.pending_dependency_count,
+ iree_memory_order_acquire) == 0) {
+ iree_status_t status = (iree_status_t)iree_atomic_exchange_intptr(
+ &task->status, 0, iree_memory_order_seq_cst);
+ iree_task_retire(&task->header, pending_submission, status);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+//==============================================================================
+// IREE_TASK_TYPE_BARRIER
+//==============================================================================
+
+void iree_task_barrier_initialize(iree_task_scope_t* scope,
+ iree_host_size_t dependent_task_count,
+ iree_task_t* const* dependent_tasks,
+ iree_task_barrier_t* out_task) {
+ iree_task_initialize(IREE_TASK_TYPE_BARRIER, scope, &out_task->header);
+ out_task->dependent_task_count = dependent_task_count;
+ out_task->dependent_tasks = dependent_tasks;
+ for (iree_host_size_t i = 0; i < out_task->dependent_task_count; ++i) {
+ iree_task_t* dependent_task = out_task->dependent_tasks[i];
+ iree_atomic_fetch_add_int32(&dependent_task->pending_dependency_count, 1,
+ iree_memory_order_relaxed);
+ }
+}
+
+void iree_task_barrier_initialize_empty(iree_task_scope_t* scope,
+ iree_task_barrier_t* out_task) {
+ iree_task_initialize(IREE_TASK_TYPE_BARRIER, scope, &out_task->header);
+ out_task->dependent_task_count = 0;
+ out_task->dependent_tasks = NULL;
+}
+
+void iree_task_barrier_set_dependent_tasks(
+ iree_task_barrier_t* task, iree_host_size_t dependent_task_count,
+ iree_task_t* const* dependent_tasks) {
+ task->dependent_task_count = dependent_task_count;
+ task->dependent_tasks = dependent_tasks;
+ for (iree_host_size_t i = 0; i < task->dependent_task_count; ++i) {
+ iree_task_t* dependent_task = task->dependent_tasks[i];
+ iree_atomic_fetch_add_int32(&dependent_task->pending_dependency_count, 1,
+ iree_memory_order_relaxed);
+ }
+}
+
+static void iree_task_barrier_discard(iree_task_barrier_t* task,
+ iree_task_list_t* discard_worklist) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Discard all of the tasks after the barrier.
+ // Note that we need to ensure we only enqueue them for discard after all of
+ // their dependencies have been met - otherwise we'll double-discard.
+ for (iree_host_size_t i = 0; i < task->dependent_task_count; ++i) {
+ iree_task_t* dependent_task = task->dependent_tasks[i];
+ const bool dependent_task_ready =
+ iree_atomic_fetch_sub_int32(&dependent_task->pending_dependency_count,
+ 1, iree_memory_order_acq_rel) == 1;
+ if (dependent_task_ready) {
+ // The dependent task has retired and can now be discard.
+ iree_task_list_push_back(discard_worklist, dependent_task);
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_task_barrier_retire(iree_task_barrier_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // NOTE: we walk in reverse so that we enqueue in LIFO order.
+ for (iree_host_size_t i = 0; i < task->dependent_task_count; ++i) {
+ iree_task_t* dependent_task =
+ task->dependent_tasks[task->dependent_task_count - i - 1];
+ if (iree_atomic_fetch_sub_int32(&dependent_task->pending_dependency_count,
+ 1, iree_memory_order_acq_rel) == 1) {
+ // The dependent task has retired and can now be made ready.
+ iree_task_submission_enqueue(pending_submission, dependent_task);
+ }
+ }
+
+ iree_task_retire(&task->header, pending_submission, iree_ok_status());
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+//==============================================================================
+// IREE_TASK_TYPE_FENCE
+//==============================================================================
+
+void iree_task_fence_initialize(iree_task_scope_t* scope,
+ iree_wait_primitive_t signal_handle,
+ iree_task_fence_t* out_task) {
+ iree_task_initialize(IREE_TASK_TYPE_FENCE, scope, &out_task->header);
+ out_task->signal_handle = signal_handle;
+ iree_task_scope_begin(scope);
+}
+
+void iree_task_fence_retire(iree_task_fence_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Need to wait until after we clean up the task before ending the scope.
+ // This way anyone waiting on the scope to go idle will be able to ensure the
+ // scope is actually idle - otherwise it may try to free the task memory
+ // while we are still using it.
+ iree_task_scope_t* end_scope = task->header.scope;
+
+ // TODO(benvanik): better API that doesn't require wrapping or requiring that
+ // iree_event_t is an iree_wait_handle_t.
+ iree_wait_handle_t signal_handle = {
+ .type = task->signal_handle.type,
+ .value = task->signal_handle.value,
+ };
+ iree_event_set(&signal_handle);
+
+ iree_task_retire(&task->header, pending_submission, iree_ok_status());
+
+ if (end_scope) {
+ iree_task_scope_end(end_scope);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+//==============================================================================
+// IREE_TASK_TYPE_WAIT
+//==============================================================================
+
+void iree_task_wait_initialize(iree_task_scope_t* scope,
+ iree_wait_source_t wait_source,
+ iree_time_t deadline_ns,
+ iree_task_wait_t* out_task) {
+ iree_task_initialize(IREE_TASK_TYPE_WAIT, scope, &out_task->header);
+ out_task->wait_source = wait_source;
+ out_task->deadline_ns = deadline_ns;
+ out_task->cancellation_flag = NULL;
+}
+
+void iree_task_wait_initialize_delay(iree_task_scope_t* scope,
+ iree_time_t deadline_ns,
+ iree_task_wait_t* out_task) {
+ iree_task_wait_initialize(scope, iree_wait_source_delay(deadline_ns),
+ IREE_TIME_INFINITE_FUTURE, out_task);
+}
+
+void iree_task_wait_set_wait_any(iree_task_wait_t* task,
+ iree_atomic_int32_t* cancellation_flag) {
+ task->header.flags |= IREE_TASK_FLAG_WAIT_ANY;
+ task->cancellation_flag = cancellation_flag;
+}
+
+void iree_task_wait_retire(iree_task_wait_t* task,
+ iree_task_submission_t* pending_submission,
+ iree_status_t status) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ task->header.flags &= ~IREE_TASK_FLAG_WAIT_COMPLETED; // reset for future use
+
+ // TODO(benvanik): allow deinit'ing the wait handle (if transient/from the
+ // executor event pool).
+ iree_task_retire(&task->header, pending_submission, status);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+//==============================================================================
+// IREE_TASK_TYPE_DISPATCH_* utilities
+//==============================================================================
+
+// Returns an XXBBGGRR color (red in the lowest bits).
+// Must not be 0 (tracy will ignore).
+static uint32_t iree_task_tile_to_color(
+ const iree_task_tile_context_t* tile_context);
+
+#if defined(IREE_TASK_TRACING_PER_TILE_COLORS)
+
+// TODO(#4017): optimize this to compute entire slices at once and fold in the
+// work grid location code.
+static uint32_t iree_math_hsv_to_xrgb(const uint8_t h, const uint8_t s,
+ const uint8_t v) {
+ // NOTE: this is matching with tracy's TracyColor.cpp implementation so that
+ // our colors fit nicely in the UI.
+ const uint8_t reg = h / 43;
+ const uint8_t rem = (h - (reg * 43)) * 6;
+ const uint8_t p = (v * (255 - s)) >> 8;
+ const uint8_t q = (v * (255 - ((s * rem) >> 8))) >> 8;
+ const uint8_t t = (v * (255 - ((s * (255 - rem)) >> 8))) >> 8;
+
+ // clang-format off
+ uint8_t r, g, b;
+ switch (reg) {
+ case 0: r = v; g = t; b = p; break;
+ case 1: r = q; g = v; b = p; break;
+ case 2: r = p; g = v; b = t; break;
+ case 3: r = p; g = q; b = v; break;
+ case 4: r = t; g = p; b = v; break;
+ default: r = v; g = p; b = q; break;
+ }
+ // clang-format on
+
+ uint32_t xrgb = (r << 16) | (g << 8) | b;
+ xrgb |= (xrgb ? 0 : 1); // ensure never zero
+ return xrgb;
+}
+
+static uint32_t iree_task_tile_to_color(
+ const iree_task_tile_context_t* tile_context) {
+ // TODO(#4017): optimize such that it's always on when tracing is
+ // enabled by amortizing the cost across the entire slice.
+
+ // Picked to try to make it easy to see gradients from tiles along the same x,
+ // y, and z (in that order). x is the fastest changing dimension and as such
+ // should all have the same hue, while z is the slowest changing dimension and
+ // should have different hues.
+ uint8_t h = (tile_context->workgroup_xyz[1] /
+ (float)(tile_context->workgroup_count[1])) *
+ 255;
+ h = (h * 11400714819323198485ull) & 0xFF;
+ uint8_t s = 100 - (tile_context->workgroup_xyz[2] /
+ (float)(tile_context->workgroup_count[2])) *
+ 100;
+ uint8_t v = (tile_context->workgroup_xyz[0] /
+ (float)(tile_context->workgroup_count[0])) *
+ 50 +
+ 50;
+ return iree_math_hsv_to_xrgb(h, s, v);
+}
+
+#else
+
+static uint32_t iree_task_tile_to_color(
+ const iree_task_tile_context_t* tile_context) {
+ return 0; // use default tracy colors
+}
+
+#endif // IREE_TASK_TRACING_PER_TILE_COLORS
+
+void iree_task_dispatch_statistics_merge(
+ const iree_task_dispatch_statistics_t* source,
+ iree_task_dispatch_statistics_t* target) {
+ // TODO(benvanik): statistics.
+}
+
+//==============================================================================
+// IREE_TASK_TYPE_DISPATCH
+//==============================================================================
+
+static void iree_task_dispatch_initialize_base(
+ iree_task_scope_t* scope, iree_task_dispatch_closure_t closure,
+ const uint32_t workgroup_size[3], iree_task_dispatch_t* out_task) {
+ iree_task_initialize(IREE_TASK_TYPE_DISPATCH, scope, &out_task->header);
+ out_task->closure = closure;
+ memcpy(out_task->workgroup_size, workgroup_size,
+ sizeof(out_task->workgroup_size));
+ out_task->local_memory_size = 0;
+ iree_atomic_store_intptr(&out_task->status, 0, iree_memory_order_release);
+ memset(&out_task->statistics, 0, sizeof(out_task->statistics));
+
+ IREE_TRACE({
+ static iree_atomic_int64_t next_dispatch_id = IREE_ATOMIC_VAR_INIT(0);
+ out_task->dispatch_id = iree_atomic_fetch_add_int64(
+ &next_dispatch_id, 1ll, iree_memory_order_acq_rel);
+ });
+}
+
+void iree_task_dispatch_initialize(iree_task_scope_t* scope,
+ iree_task_dispatch_closure_t closure,
+ const uint32_t workgroup_size[3],
+ const uint32_t workgroup_count[3],
+ iree_task_dispatch_t* out_task) {
+ iree_task_dispatch_initialize_base(scope, closure, workgroup_size, out_task);
+ memcpy(out_task->workgroup_count.value, workgroup_count,
+ sizeof(out_task->workgroup_count.value));
+}
+
+void iree_task_dispatch_initialize_indirect(
+ iree_task_scope_t* scope, iree_task_dispatch_closure_t closure,
+ const uint32_t workgroup_size[3], const uint32_t* workgroup_count_ptr,
+ iree_task_dispatch_t* out_task) {
+ iree_task_dispatch_initialize_base(scope, closure, workgroup_size, out_task);
+ out_task->header.flags |= IREE_TASK_FLAG_DISPATCH_INDIRECT;
+ out_task->workgroup_count.ptr = workgroup_count_ptr;
+}
+
+void iree_task_dispatch_issue(iree_task_dispatch_t* dispatch_task,
+ iree_task_pool_t* shard_task_pool,
+ iree_task_submission_t* pending_submission,
+ iree_task_post_batch_t* post_batch) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, dispatch_task->dispatch_id);
+
+ // Mark the dispatch as having been issued; the next time it retires it'll be
+ // because all work has completed.
+ dispatch_task->header.flags |= IREE_TASK_FLAG_DISPATCH_RETIRE;
+
+ // Fetch the workgroup count (directly or indirectly).
+ if (dispatch_task->header.flags & IREE_TASK_FLAG_DISPATCH_INDIRECT) {
+ // By the task being ready to execute we know any dependencies on the
+ // indirection buffer have been satisfied and its safe to read. We perform
+ // the indirection here and convert the dispatch to a direct one such that
+ // following code can read the value.
+ // TODO(benvanik): non-one-shot command buffers won't be able to do this as
+ // the intent is that they can be dynamic per execution.
+ const uint32_t* source_ptr = dispatch_task->workgroup_count.ptr;
+ memcpy(dispatch_task->workgroup_count.value, source_ptr,
+ sizeof(dispatch_task->workgroup_count.value));
+ dispatch_task->header.flags ^= IREE_TASK_FLAG_DISPATCH_INDIRECT;
+ }
+ const uint32_t* workgroup_count = dispatch_task->workgroup_count.value;
+
+ IREE_TRACE({
+ char xyz_string[32];
+ int xyz_string_length =
+ snprintf(xyz_string, IREE_ARRAYSIZE(xyz_string), "%ux%ux%u",
+ workgroup_count[0], workgroup_count[1], workgroup_count[2]);
+ IREE_TRACE_ZONE_APPEND_TEXT_STRING_VIEW(z0, xyz_string, xyz_string_length);
+ });
+
+ // Setup the iteration space for shards to pull work from the complete grid.
+ iree_atomic_store_int32(&dispatch_task->tile_index, 0,
+ iree_memory_order_relaxed);
+ dispatch_task->tile_count =
+ workgroup_count[0] * workgroup_count[1] * workgroup_count[2];
+
+ // Compute shard count - almost always worker_count unless we are a very small
+ // dispatch (1x1x1, etc).
+ iree_host_size_t worker_count = iree_task_post_batch_worker_count(post_batch);
+ iree_host_size_t shard_count =
+ iree_min(dispatch_task->tile_count, worker_count);
+
+ // Compute how many tiles we want each shard to reserve at a time from the
+ // larger grid. A higher number reduces overhead and improves locality while
+ // a lower number reduces maximum worst-case latency (coarser work stealing).
+ if (dispatch_task->tile_count <
+ worker_count * IREE_TASK_DISPATCH_MAX_TILES_PER_SHARD_RESERVATION) {
+ // Grid is small - allow it to be eagerly sliced up.
+ dispatch_task->tiles_per_reservation = 1;
+ } else {
+ dispatch_task->tiles_per_reservation =
+ IREE_TASK_DISPATCH_MAX_TILES_PER_SHARD_RESERVATION;
+ }
+
+ // Randomize starting worker.
+ iree_host_size_t worker_offset = iree_task_post_batch_select_worker(
+ post_batch, dispatch_task->header.affinity_set);
+ iree_host_size_t worker_index = worker_offset;
+
+ for (iree_host_size_t i = 0; i < shard_count; ++i) {
+ // Allocate and initialize the shard.
+ iree_task_dispatch_shard_t* shard_task =
+ iree_task_dispatch_shard_allocate(dispatch_task, shard_task_pool);
+
+ // Enqueue on the worker selected for the task.
+ iree_task_post_batch_enqueue(post_batch, worker_index % worker_count,
+ &shard_task->header);
+ ++worker_index;
+ }
+
+ // NOTE: the dispatch is not retired until all shards complete. Upon the last
+ // shard completing the lucky worker will retire the task inline and
+ // potentially queue up more ready tasks that follow.
+ //
+ // The gotcha here is that it's possible for there to be zero shards within
+ // a dispatch (if, for example, and indirect dispatch had its workgroup counts
+ // set to zero to prevent it from running). We check for that here.
+ if (shard_count == 0) {
+ iree_task_dispatch_retire(dispatch_task, pending_submission);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_task_dispatch_retire(iree_task_dispatch_t* dispatch_task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, dispatch_task->dispatch_id);
+
+ // TODO(benvanik): attach statistics to the tracy zone.
+
+ // Merge the statistics from the dispatch into the scope so we can track all
+ // of the work without tracking all the dispatches at a global level.
+ iree_task_dispatch_statistics_merge(
+ &dispatch_task->statistics,
+ &dispatch_task->header.scope->dispatch_statistics);
+
+ // Consume the status of the dispatch that may have been set from a workgroup
+ // and notify the scope. We need to do this here so that each shard retires
+ // before we discard any subsequent tasks: otherwise a failure of one shard
+ // would discard the shared dispatch task (and potentially everything) while
+ // other shards were still running. We also want to avoid fine-grained
+ // synchronization across shards that would occur by each checking to see if
+ // any other has hit an error; failure in a dispatch should be so exceedingly
+ // rare that allowing some shards to complete after one encounters an error is
+ // not a problem.
+ iree_status_t status = (iree_status_t)iree_atomic_exchange_intptr(
+ &dispatch_task->status, 0, iree_memory_order_seq_cst);
+
+ iree_task_retire(&dispatch_task->header, pending_submission, status);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+//==============================================================================
+// IREE_TASK_TYPE_DISPATCH_SHARD
+//==============================================================================
+
+static inline iree_task_dispatch_t* iree_task_dispatch_shard_parent(
+ iree_task_dispatch_shard_t* task) {
+ return (iree_task_dispatch_t*)task->header.completion_task;
+}
+
+void iree_task_dispatch_shard_initialize(iree_task_dispatch_t* dispatch_task,
+ iree_task_dispatch_shard_t* out_task) {
+ iree_task_initialize(IREE_TASK_TYPE_DISPATCH_SHARD,
+ dispatch_task->header.scope, &out_task->header);
+ iree_task_set_completion_task(&out_task->header, &dispatch_task->header);
+}
+
+iree_task_dispatch_shard_t* iree_task_dispatch_shard_allocate(
+ iree_task_dispatch_t* dispatch_task, iree_task_pool_t* shard_task_pool) {
+ iree_task_dispatch_shard_t* shard_task = NULL;
+ iree_status_t status =
+ iree_task_pool_acquire(shard_task_pool, (iree_task_t**)&shard_task);
+ if (!iree_status_is_ok(status)) {
+ iree_status_ignore(status);
+ return NULL;
+ }
+ iree_task_dispatch_shard_initialize(dispatch_task, shard_task);
+ shard_task->header.pool = shard_task_pool;
+ return shard_task;
+}
+
+void iree_task_dispatch_shard_execute(
+ iree_task_dispatch_shard_t* task, iree_cpu_processor_id_t processor_id,
+ iree_byte_span_t worker_local_memory,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_task_dispatch_t* dispatch_task = iree_task_dispatch_shard_parent(task);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, dispatch_task->dispatch_id);
+ IREE_TRACE_ZONE_SET_COLOR(
+ z0, iree_math_ptr_to_xrgb(dispatch_task->closure.user_context));
+
+ // Map only the requested amount of worker local memory into the tile context.
+ // This ensures that how much memory is used by some executions does not
+ // inadvertently leak over into other executions.
+ if (IREE_UNLIKELY(dispatch_task->local_memory_size >
+ worker_local_memory.data_length)) {
+ iree_task_try_set_status(
+ &dispatch_task->status,
+ iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "dispatch requires %ub of local memory but only "
+ "%zub is available per-worker",
+ dispatch_task->local_memory_size,
+ worker_local_memory.data_length));
+ iree_task_retire(&task->header, pending_submission, iree_ok_status());
+ IREE_TRACE_ZONE_END(z0);
+ return;
+ }
+ iree_byte_span_t local_memory = iree_make_byte_span(
+ worker_local_memory.data, dispatch_task->local_memory_size);
+
+ // Prepare context shared for all tiles in the shard.
+ iree_task_tile_context_t tile_context;
+ memcpy(&tile_context.workgroup_size, dispatch_task->workgroup_size,
+ sizeof(tile_context.workgroup_size));
+ memcpy(&tile_context.workgroup_count, dispatch_task->workgroup_count.value,
+ sizeof(tile_context.workgroup_count));
+ uint32_t workgroup_count_x = tile_context.workgroup_count[0];
+ uint32_t workgroup_count_y = tile_context.workgroup_count[1];
+ tile_context.local_memory = local_memory;
+
+ // We perform all our shard statistics work locally here and only push back to
+ // the dispatch at the end; this avoids contention from each shard trying to
+ // update the statistics together.
+ iree_task_dispatch_statistics_t shard_statistics;
+ memset(&shard_statistics, 0, sizeof(shard_statistics));
+ tile_context.statistics = &shard_statistics;
+
+ // Hint as to which processor we are running on.
+ tile_context.processor_id = processor_id;
+
+ // Loop over all tiles until they are all processed.
+ const uint32_t tile_count = dispatch_task->tile_count;
+ const uint32_t tiles_per_reservation = dispatch_task->tiles_per_reservation;
+ uint32_t tile_base = iree_atomic_fetch_add_int32(&dispatch_task->tile_index,
+ tiles_per_reservation,
+ iree_memory_order_relaxed);
+ while (tile_base < tile_count) {
+ const uint32_t tile_range =
+ iree_min(tile_base + tiles_per_reservation, tile_count);
+ for (uint32_t tile_index = tile_base; tile_index < tile_range;
+ ++tile_index) {
+ // TODO(benvanik): faster math here, especially knowing we pull off N
+ // sequential indices per reservation.
+ uint32_t tile_i = tile_index;
+ tile_context.workgroup_xyz[0] = tile_i % workgroup_count_x;
+ tile_i /= workgroup_count_x;
+ tile_context.workgroup_xyz[1] = tile_i % workgroup_count_y;
+ tile_i /= workgroup_count_y;
+ tile_context.workgroup_xyz[2] = tile_i;
+
+ IREE_TRACE_ZONE_BEGIN_NAMED(z_tile,
+ "iree_task_dispatch_shard_execute_tile");
+ IREE_TRACE_ZONE_SET_COLOR(z_tile, iree_task_tile_to_color(&tile_context));
+
+ // NOTE: these are useful for debugging but dramatically increase our
+ // cost here; only enable if needed for tracking work distribution:
+ IREE_TRACE_ZONE_APPEND_VALUE(z_tile, tile_context.workgroup_xyz[0]);
+ IREE_TRACE_ZONE_APPEND_VALUE(z_tile, tile_context.workgroup_xyz[1]);
+ IREE_TRACE_ZONE_APPEND_VALUE(z_tile, tile_context.workgroup_xyz[2]);
+ // IREE_TRACE_ZONE_APPEND_VALUE(z_tile, (uint64_t)task->closure.fn);
+
+ iree_status_t status =
+ dispatch_task->closure.fn(dispatch_task->closure.user_context,
+ &tile_context, pending_submission);
+
+ IREE_TRACE_ZONE_END(z_tile);
+
+ // If any tile fails we bail early from the loop. This doesn't match
+ // what an accelerator would do but saves some unneeded work.
+ // Note that other shards may have completed execution, be executing
+ // concurrently with this one, or still be pending - this does not
+ // have any influence on them and they may continue to execute even
+ // after we bail from here.
+ if (!iree_status_is_ok(status)) {
+ // Propagate failures to the dispatch task.
+ iree_task_try_set_status(&dispatch_task->status, status);
+ goto abort_shard; // out of the while-for nest
+ }
+ }
+
+ // Try to grab the next slice of tiles.
+ tile_base = iree_atomic_fetch_add_int32(&dispatch_task->tile_index,
+ tiles_per_reservation,
+ iree_memory_order_relaxed);
+ }
+abort_shard:
+
+ // Push aggregate statistics up to the dispatch.
+ // Note that we may have partial information here if we errored out of the
+ // loop but that's still useful to know.
+ iree_task_dispatch_statistics_merge(&shard_statistics,
+ &dispatch_task->statistics);
+
+ // NOTE: even if an error was hit we retire OK - the error has already been
+ // propagated to the dispatch and it'll clean up after all shards are joined.
+ iree_task_retire(&task->header, pending_submission, iree_ok_status());
+ IREE_TRACE_ZONE_END(z0);
+}
diff --git a/runtime/src/iree/task/task.h b/runtime/src/iree/task/task.h
new file mode 100644
index 0000000..aeef180
--- /dev/null
+++ b/runtime/src/iree/task/task.h
@@ -0,0 +1,687 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_TASK_H_
+#define IREE_TASK_TASK_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/atomic_slist.h"
+#include "iree/base/internal/atomics.h"
+#include "iree/base/internal/cpu.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/task/affinity_set.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_task_list_t iree_task_list_t;
+typedef struct iree_task_pool_t iree_task_pool_t;
+typedef struct iree_task_scope_t iree_task_scope_t;
+typedef struct iree_task_submission_t iree_task_submission_t;
+
+//==============================================================================
+// Task header for internal tracking
+//==============================================================================
+
+// Specifies the type of a task and how executors handle it.
+enum iree_task_type_bits_t {
+ // Task is a no-op (performs no work) and exists for flexibility.
+ IREE_TASK_TYPE_NOP = 0u,
+
+ // Task will synchronously call a function before continuing.
+ IREE_TASK_TYPE_CALL = 1u,
+
+ // Task exists only as a barrier to join/fork tasks and has no executable
+ // payload.
+ IREE_TASK_TYPE_BARRIER = 2u,
+
+ // Task is a fence indicating that a certain point in the task graph has been
+ // reached. All tasks prior to this fence (by way of happens-before
+ // dependencies) are guaranteed to have retired.
+ IREE_TASK_TYPE_FENCE = 3u,
+
+ // Task is a wait on an external wait handle (fd, HANDLE, etc).
+ // Executors will wait on the handle until it is signaled and meets the
+ // specified condition prior to readying the dependent tasks.
+ IREE_TASK_TYPE_WAIT = 4u,
+
+ // Task is a 3D grid dispatch of zero or more tiles.
+ // Dispatches are issued when ready by either being split into one shard per
+ // worker that should process the dispatch.
+ //
+ // If IREE_TASK_FLAG_DISPATCH_INDIRECT is set then the dispatch reads the
+ // workgroup count from a buffer immediately prior to fan-out instead of using
+ // the values embedded in the task structure.
+ //
+ // After a dispatch has been issued the IREE_TASK_FLAG_DISPATCH_RETIRE flag is
+ // set to indicate that when the dispatch becomes ready again it will be after
+ // all shards have completed.
+ IREE_TASK_TYPE_DISPATCH = 5u,
+
+ // Task is one of potentially many shards processing a larger dispatch grid.
+ // Each shard may have a preference as to which parts of grid it will focus
+ // on but is able to otherwise steal any available region directly from the
+ // shared dispatch coordination state. Shards retire once there are no more
+ // tiles remaining in the dispatch grid.
+ IREE_TASK_TYPE_DISPATCH_SHARD = 6u,
+};
+typedef uint8_t iree_task_type_t;
+
+enum iree_task_flag_bits_t {
+ IREE_TASK_FLAG_NONE = 0u,
+
+ // Indicates that a wait task is part of a wait-any operation and the
+ // cancellation flag should be latched by any wait that resolves.
+ IREE_TASK_FLAG_WAIT_ANY = 1u << 0,
+
+ // The wait handle of the wait task has been acquired and the task can be
+ // waited on with system APIs.
+ IREE_TASK_FLAG_WAIT_EXPORTED = 1u << 1,
+
+ // The wait handle the task is specified to wait on has resolved and the task
+ // can now be considered complete.
+ IREE_TASK_FLAG_WAIT_COMPLETED = 1u << 2,
+
+ // The workgroup count for the dispatch is provided by way of a pointer to a
+ // list of 3 uint32_t values that will be sampled immediately prior to
+ // issuing of the dispatch. The contents of the pointer can be safely modified
+ // up until the last dependency has completed and the dispatch is about to be
+ // issued.
+ IREE_TASK_FLAG_DISPATCH_INDIRECT = 1u << 3,
+
+ // The dispatch has been issued and the task is waiting for one or more
+ // shards to complete. After they complete the dispatch will be readied and
+ // can be retired.
+ //
+ // Though added by the executor after issuing a dispatch users can also set
+ // this to indicate that all dispatch shards for a particular dispatch have
+ // been statically scheduled. Executors will then skip issuing the dispatch
+ // and instead wait until all shards complete, enabling IREE_TASK_TYPE_BARRIER
+ // behavior but without an additional task as dispatches are still required
+ // to store information for shards.
+ IREE_TASK_FLAG_DISPATCH_RETIRE = 1u << 4,
+
+ // An error occurred at or before the task and it has been aborted.
+ // Aborted tasks may continue to execute if they're already in-flight but must
+ // not begin execution after the flag has been set.
+ //
+ // The actual error that occurred is routed to the parent task scope as it
+ // happens and may be available for querying before all tasks have been
+ // cleaned up.
+ IREE_TASK_FLAG_ABORTED = 1u << 5,
+};
+typedef uint16_t iree_task_flags_t;
+
+typedef struct iree_task_t iree_task_t;
+
+// A function called to cleanup tasks.
+// Each task has its associated cleanup function called exactly once.
+// The provided |status_code| indicates the execution status of the task prior
+// to cleanup and will usually be IREE_STATUS_OK indicating the task was
+// successfully issued or IREE_STATUS_ABORTED if the task was discard prior to
+// issuing.
+typedef void(IREE_API_PTR* iree_task_cleanup_fn_t)(
+ iree_task_t* task, iree_status_code_t status_code);
+
+// A task within the task system that runs on an executor.
+// Tasks have an iree_task_type_t that defines which parameters are valid and
+// how the executor is to treat the task. Dependency edges can be defined that
+// determine the execution order of tasks within the executors.
+struct iree_alignas(iree_max_align_t) iree_task_t {
+ // Intrusive pointer used to store tasks within iree_task_list_t and
+ // iree_atomic_task_list_t singly-linked lists. This must come first in the
+ // structure so that it is at the appropriate alignment.
+ iree_task_t* next_task;
+
+ // The scope this task is attributed to. Errors with the task will be
+ // propagated to the scope and errors in the scope will cause pending tasks to
+ // be skipped.
+ iree_task_scope_t* scope;
+
+ // Optional function to call to cleanup the task on completion.
+ // Will be called after the task has retired or if the task fails to issue
+ // (dependency failed, etc).
+ iree_task_cleanup_fn_t cleanup_fn;
+
+ // Optional task that will be notified when the task completes.
+ // The task will have its pending_dependency_count decremented and will be
+ // readied for execution when the count reaches 0.
+ iree_task_t* completion_task;
+
+ // Specifies which workers will be used to execute this task.
+ // Forked tasks will inherit their parent task affinity (possibly with some
+ // task-dependent rules) to partition workloads across workers with knowledge
+ // of the specific work being performed. For example, some dispatches can be
+ // limited to run on certain microarchitectures that workers have affinity
+ // with at the OS scheduler level (such as little.BIG topologies).
+ iree_task_affinity_set_t affinity_set;
+
+ // Total number of dependent tasks still outstanding. Decremented each time
+ // a dependent task completes. The task is considered ready to execute when
+ // this value reaches 0.
+ iree_atomic_int32_t pending_dependency_count;
+
+ // Optional pool the task should be returned to after it has resolved. If the
+ // task was allocated as part of a larger data structure (embedded within
+ // an arena for example) then this can be NULL to prevent the task system
+ // from interfering.
+ iree_task_pool_t* pool;
+
+ // Specifies the type of the task and how the executor handles it.
+ iree_task_type_t type;
+
+ // Task-specific flag bits.
+ iree_task_flags_t flags;
+};
+static_assert(offsetof(iree_task_t, next_task) == 0,
+ "next_task intrusive pointer must be at offset 0");
+static_assert(sizeof(iree_task_t) <= 64,
+ "the task header greatly influences pool sizes due to alignment "
+ "requirements and should be kept tiny");
+
+// Initializes a task header with the given type.
+// Must be called on all tasks to ensure proper dependency tracking and list
+// state prior to enqueuing. Only the task header structure is initialized and
+// any additional data as part of the wrapping task type must be initialized by
+// the caller.
+void iree_task_initialize(iree_task_type_t type, iree_task_scope_t* scope,
+ iree_task_t* out_task);
+
+// Sets the optional function called when the task completes (whether successful
+// or not). The cleanup function will receive a status indicating whether the
+// cleanup is from expected execution as the task retires (IREE_STATUS_OK)
+// or because it was aborted (IREE_STATUS_ABORTED).
+void iree_task_set_cleanup_fn(iree_task_t* task,
+ iree_task_cleanup_fn_t cleanup_fn);
+
+// Sets up a dependency edge from |task| to |completion_task| such that when
+// |task| completes |completion_task| will be notified and have its
+// pending_dependency_count decremented.
+void iree_task_set_completion_task(iree_task_t* task,
+ iree_task_t* completion_task);
+
+// Returns true if the |task| is ready to execute immediately.
+// Though this is safe to call from any thread the test may have false-negatives
+// (ready tasks are not returned as ready) due to cross-thread synchronization
+// latency. Note that tasks may yield themselves during execution and switch
+// from ready to waiting (such as when an indirect dispatch needs to wait for
+// all tiles to complete).
+bool iree_task_is_ready(iree_task_t* task);
+
+// Discards the task and any dependent tasks.
+// Any dependent tasks that need to be discarded will be added to
+// |discard_worklist| for the caller to continue discarding.
+void iree_task_discard(iree_task_t* task, iree_task_list_t* discard_worklist);
+
+//==============================================================================
+// IREE_TASK_TYPE_NOP
+//==============================================================================
+
+// Task is a no-op (performs no work) and exists for flexibility.
+// NOP tasks can be used to link together task lists from multiple threads
+// where it may otherwise not be ideal to have heavy-weight concurrency
+// structures. NOP tasks can also be useful for neutering another task type
+// after it has already been recorded into a list such as when cancellations
+// occur.
+typedef iree_alignas(iree_max_align_t) struct {
+ // Task header: implementation detail, do not use.
+ iree_task_t header;
+} iree_task_nop_t;
+
+void iree_task_nop_initialize(iree_task_scope_t* scope,
+ iree_task_nop_t* out_task);
+
+//==============================================================================
+// IREE_TASK_TYPE_CALL
+//==============================================================================
+
+typedef iree_status_t(IREE_API_PTR* iree_task_call_closure_fn_t)(
+ void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission);
+
+// A function closure representing the function to call and its arguments.
+typedef struct iree_task_call_closure_t {
+ // Function called per tile invocation.
+ iree_task_call_closure_fn_t fn;
+
+ // Opaque pointer to a user-provided data structure.
+ // No lifetime management is performed by the task system and it is required
+ // that users ensure that the memory referenced is live until after the task
+ // has completed.
+ void* user_context;
+
+ // TODO(benvanik): cleanup function? right now assume arg is never freed.
+} iree_task_call_closure_t;
+
+// Binds a function pointer and the arguments it should be called with.
+// If the arguments represent pointers they must remain live until the task
+// has completed execution.
+static inline iree_task_call_closure_t iree_task_make_call_closure(
+ iree_task_call_closure_fn_t fn, void* user_context) {
+ iree_task_call_closure_t closure = {fn, user_context};
+ return closure;
+}
+
+// A task that will synchronously call a function from the executor and wait
+// for it to complete before continuing.
+//
+// Memory referenced by closure arguments must be kept valid until the function
+// executes (in general with the same lifetime as the task itself).
+typedef iree_alignas(iree_max_align_t) struct {
+ // Task header: implementation detail, do not use.
+ iree_task_t header;
+
+ // Function closure to call when the task is executed.
+ iree_task_call_closure_t closure;
+
+ // Resulting status from the call available once all nested tasks have
+ // completed (or would have completed). It's possible for a call to nest
+ // additional work under it and then return a failure; to ensure we don't
+ // discard the root call while the nested tasks are still executing we set the
+ // status here and wait for the nested tasks to complete. We'll try not to
+ // issue work that was enqueued while the call was executing but it's possible
+ // for work to come from other angles and we need to err on the side of
+ // safety.
+ iree_atomic_intptr_t status;
+} iree_task_call_t;
+
+void iree_task_call_initialize(iree_task_scope_t* scope,
+ iree_task_call_closure_t closure,
+ iree_task_call_t* out_task);
+
+//==============================================================================
+// IREE_TASK_TYPE_BARRIER
+//==============================================================================
+
+// A join point for fork/join-style scheduling.
+// References a set of dependent tasks that will be notified and possibly
+// readied when the barrier is reached.
+//
+// This allows for modeling one-to-many and many-to-many relationships. The base
+// task dependency system only models one-to-one and should be used if possible
+// to avoid the additional overhead of a barrier task both in memory and task
+// indirection/queuing.
+//
+// Example:
+// * [A] -> Barrier -> [C, D]
+// - A executes
+// - Barrier is processed after A completes
+// - C and D execute concurrently (in any order)
+//
+// * [A, B] -> Barrier -> [C, D]
+// - A and B execute concurrently (in any order)
+// - Barrier is processed after both A and B complete
+// - C and D execute concurrently
+//
+// * [A] -> Barrier -> [B]
+// - Don't do this and use the base task dependency instead; it'll work, but
+// it's much better to avoid the additional barrier indirection when
+// possible.
+typedef iree_alignas(iree_max_align_t) struct {
+ // Task header: implementation detail, do not use.
+ iree_task_t header;
+
+ // Number of valid tasks in the dependent_tasks list.
+ iree_host_size_t dependent_task_count;
+ // [0-dependent_task_count] tasks that will be notified when the barrier is
+ // reached. Each task will have its pending_dependency_count decremented and
+ // when the count reaches 0 be added to the ready list.
+ iree_task_t* const* dependent_tasks;
+} iree_task_barrier_t;
+
+void iree_task_barrier_initialize(iree_task_scope_t* scope,
+ iree_host_size_t dependent_task_count,
+ iree_task_t* const* dependent_tasks,
+ iree_task_barrier_t* out_task);
+
+void iree_task_barrier_initialize_empty(iree_task_scope_t* scope,
+ iree_task_barrier_t* out_task);
+
+void iree_task_barrier_set_dependent_tasks(
+ iree_task_barrier_t* task, iree_host_size_t dependent_task_count,
+ iree_task_t* const* dependent_tasks);
+
+//==============================================================================
+// IREE_TASK_TYPE_FENCE
+//==============================================================================
+
+// A fence indicating that a certain point in the task graph has been reached.
+// All tasks prior to this fence (by way of happens-before dependencies) are
+// guaranteed to have retired.
+//
+// When all of the dependencies of a fence have retired the fence will notify
+// the parent scope of the task by decrementing the pending_submissions count
+// and publishing an idle_notification if it was the last in-flight submission.
+//
+// An optional platform primitive may be provided to signal in a way determined
+// by the primitive type via iree_event_set.
+typedef iree_alignas(iree_max_align_t) struct {
+ // Task header: implementation detail, do not use.
+ iree_task_t header;
+
+ // An optional wait primitive to signal when the fence is hit.
+ // If iree_wait_primitive_immediate then the signal will be ignored.
+ iree_wait_primitive_t signal_handle;
+} iree_task_fence_t;
+
+// Initializes a fence in |out_task| that demarcates activity in a |scope|.
+// An optional unowned |signal_handle| can be provided that will be signaled
+// with iree_event_set when the fence is reached.
+void iree_task_fence_initialize(iree_task_scope_t* scope,
+ iree_wait_primitive_t signal_handle,
+ iree_task_fence_t* out_task);
+
+//==============================================================================
+// IREE_TASK_TYPE_WAIT
+//==============================================================================
+
+// A task representing either a delay until a point in time or a wait on a wait
+// source external to the task system.
+//
+// Waits are modeled in the task graph to enable reducing the number of times a
+// full system wait is required by only beginning the wait when the task
+// dependencies have completed. Wait sources will be eagerly queried and
+// exported to wait handles when the task system would otherwise go idle. All
+// wait sources from all pending wait tasks will be accumulated into a wait set
+// and waited on in a single syscall.
+//
+// Waits will block the completion task until the wait resolves successfully or
+// the deadline is reached or exceeded.
+//
+// Sleeps (where wait_source is iree_wait_source_delay) will delay the
+// completion task until the delay time is reached or exceeded and will do so
+// without triggering an IREE_STATUS_DEADLINE_EXCEEDED.
+//
+// Wait-all behavior can be modeled with multiple wait tasks joined on one task;
+// all of the waits must successfully resolve prior to the completion task being
+// issued. If any wait fails then the scope is failed.
+//
+// Wait-any behavior can be modeled with multiple wait tasks joined on one task
+// as with wait-all but with each sharing a cancellation flag and having the
+// IREE_TASK_FLAG_WAIT_ANY bit set. If any wait successfully resolves or fails
+// the flag will be set to cancel all sibling waits. The cancellation flag must
+// be owned by the completion task to ensure that it is live for the lifetime of
+// all wait tasks sharing it. In more sophisticated scenarios the cancellation
+// flag may be owned by anything in the system that can guarantee the lifetime,
+// enabling cancellation actions from external code.
+//
+// Non-failing deadlines can be implemented with a wait-any on one or more wait
+// sources as well as on a delay task: if the delay task is resolved before any
+// of the other waits they will be cancelled and the completion task will be
+// issued without an IREE_STATUS_DEADLINE_EXCEEDED being emitted.
+typedef iree_alignas(iree_max_align_t) struct {
+ // Task header: implementation detail, do not use.
+ iree_task_t header;
+
+ // The wait source that the task is waiting on.
+ // May be iree_wait_source_immediate if the wait is neutered or
+ // iree_wait_source_delay if this is a delay (sleep).
+ iree_wait_source_t wait_source;
+
+ // Deadline for the wait; if this time elapses the wait will be failed with
+ // IREE_STATUS_DEADLINE_EXCEEDED. May be IREE_TIME_INFINITE_FUTURE to indicate
+ // that the wait has no deadline.
+ iree_time_t deadline_ns;
+
+ // Optional pointer to a shared cancellation flag.
+ // Set to non-zero to have the wait cancel and issue the completion task as if
+ // it had successfully waited. No error will be raised and the completion task
+ // will need to handle the wake. This is used to model wait-any behavior where
+ // multiple waits can be issued but if any one resolves all waits are silently
+ // cancelled.
+ //
+ // The flag memory must remain valid until all waits sharing it have retired.
+ // For a wait-any it would commonly be stored on the completion task to ensure
+ // that no waits tasks will be live when it is cleaned up.
+ //
+ // If omitted no cancellation behavior is enabled.
+ // If specified the wait task will check the flag prior to entering a system
+ // wait scope. Cancellation does not impact waits once the system is entered.
+ // If the IREE_TASK_FLAG_WAIT_ANY bit is set on the task the cancellation flag
+ // will be set to non-zero after it resolves in order to cancel the sibling
+ // waits in the wait-any operation.
+ iree_atomic_int32_t* cancellation_flag;
+} iree_task_wait_t;
+
+// Initializes |out_task| as a wait task on |wait_source|.
+// The wait will fail with IREE_STATUS_DEADLINE_EXCEEDED if |deadline_ns| is
+// exceeded prior to the wait resolving. If the wait fails (system error, etc)
+// the failure will be propagated to the |scope|.
+void iree_task_wait_initialize(iree_task_scope_t* scope,
+ iree_wait_source_t wait_source,
+ iree_time_t deadline_ns,
+ iree_task_wait_t* out_task);
+
+// Initializes |out_task| as a delay until the given |deadline_ns| is reached or
+// exceeded. The completion task will be issued instead of failing with an
+// IREE_STATUS_DEADLINE_EXCEEDED.
+void iree_task_wait_initialize_delay(iree_task_scope_t* scope,
+ iree_time_t deadline_ns,
+ iree_task_wait_t* out_task);
+
+// Sets the wait |task| to a cooperative wait-any mode by marking the
+// IREE_TASK_FLAG_WAIT_ANY bit and storing the |cancellation_flag|.
+// The cancellation flag must be kept live until after the wait task has
+// retired.
+void iree_task_wait_set_wait_any(iree_task_wait_t* task,
+ iree_atomic_int32_t* cancellation_flag);
+
+//==============================================================================
+// IREE_TASK_TYPE_DISPATCH_* structures
+//==============================================================================
+
+// Statistics tracked across an entire dispatch operation.
+// Each tile contributes to these statistics as they execute to provide an
+// aggregate set of statistics that can be reported to tracing/user queries.
+//
+// We want to keep this structure relatively compact as it does add overhead.
+// If statistics are used purely for interactive tracing then they can be
+// piped directly to the tracing tool using IREE_TRACE_* macros. If the
+// statistics are programmatically queried for benchmarks or reporting then
+// they belong here where we can efficiently move them around.
+//
+// If we find ourselves with a lot of hardware-specific counters (vs more
+// generic ones like 'l2 cache misses' or 'ipc') then we can sprinkle in some
+// #ifdefs.
+typedef struct iree_task_dispatch_statistics_t {
+ // TODO(benvanik): statistics counters.
+ // NOTE: each of these increases the command buffer storage requirements; we
+ // should always guard these with IREE_STATISTICS_ENABLE.
+ iree_atomic_int32_t reserved;
+} iree_task_dispatch_statistics_t;
+
+// Merges statistics from |source| to |target| atomically per-field.
+// As each field is updated independently and in a relaxed memory order it's
+// possible for statistics consumers to see a tear.
+void iree_task_dispatch_statistics_merge(
+ const iree_task_dispatch_statistics_t* source,
+ iree_task_dispatch_statistics_t* target);
+
+typedef struct iree_task_tile_storage_t {
+ // TODO(benvanik): coroutine storage.
+ // Ideally we'll be able to have a fixed coroutine storage size per dispatch
+ // (via @llvm.coro.size) such that we can preallocate all of the storage for
+ // a dispatch in one shot. If we need to do dynamic allocation we will need a
+ // ringbuffer or other kind of pool to allocate from on-demand.
+ uint32_t reserved;
+} iree_task_tile_storage_t;
+
+// Per-tile context provided to each dispatch function invocation in the grid.
+// This information is unique to the tile being dispatched and may contain
+// specific state about the calling thread/fiber/etc.
+//
+// If tile execution is suspended by hitting a coroutine suspend point then the
+// coroutine state will be stored within the tile context until the tile is
+// resumed.
+typedef iree_alignas(iree_max_align_t) struct {
+ // Workgroup ID for the current invocation.
+ uint32_t workgroup_xyz[3];
+ // Workgroup size for each invocation.
+ uint32_t workgroup_size[3];
+ // Total workgroup count for the task. Can be used in conjunction with the
+ // per-invocation workgroup_xyz and workgroup_size to compute offsets/indices.
+ uint32_t workgroup_count[3];
+ // TODO(benvanik): workgroup index to amortize calculating linear offsets.
+ // (like gl_GlobalInvocationID)
+
+ // Opaque ID of the processor executing the tile.
+ // May be slightly out of date or 0 if the processor could not be queried.
+ iree_cpu_processor_id_t processor_id;
+
+ // Tile-local memory that is pinned to each worker ensuring no cache
+ // thrashing. Aligned to at least the natural pointer size of the machine.
+ // Contents are (today) undefined upon entry.
+ iree_byte_span_t local_memory;
+
+ // Shared statistics counters for the dispatch shard.
+ iree_task_dispatch_statistics_t* statistics;
+} iree_task_tile_context_t;
+
+typedef struct iree_task_dispatch_t iree_task_dispatch_t;
+
+//==============================================================================
+// Dispatch function closures
+//==============================================================================
+
+typedef iree_status_t(IREE_API_PTR* iree_task_dispatch_closure_fn_t)(
+ void* user_context, const iree_task_tile_context_t* tile_context,
+ iree_task_submission_t* pending_submission);
+
+// A function closure representing the function to call and its arguments.
+typedef struct iree_task_dispatch_closure_t {
+ // Function called per tile invocation.
+ iree_task_dispatch_closure_fn_t fn;
+
+ // User-defined argument passed to task functions during invocation.
+ // Opaque pointer-sized values that could point to user data structures or
+ // contain embedded values. No lifetime management is performed by the task
+ // system and it is required that users ensure that the memory referenced is
+ // live until after the task has completed.
+ void* user_context;
+} iree_task_dispatch_closure_t;
+
+// Binds a function pointer and the arguments it should be called with.
+// If the arguments represent pointers they must remain live until the task
+// has completed execution.
+static inline iree_task_dispatch_closure_t iree_task_make_dispatch_closure(
+ iree_task_dispatch_closure_fn_t fn, void* user_context) {
+ iree_task_dispatch_closure_t closure = {fn, user_context};
+ return closure;
+}
+
+//==============================================================================
+// IREE_TASK_TYPE_DISPATCH
+//==============================================================================
+
+// An execution request across a tiled grid.
+// Dispatches are fork points where zero or more dispatch shard tasks are
+// spawned and processed prior to joining again on the dispatch completion task.
+//
+// The total workgroup count defines the [x,y,z] extents of the dispatch grid.
+// The count may either be embedded directly into the dispatch or provided as a
+// pointer to the workgroup_count[3] that will be read immediately prior to
+// forking. If any dimension of the workgroup count is zero then the dispatch is
+// skipped and the completion task will be readied immediately.
+//
+// Example:
+// dispatch([5, 1, 1])
+// forked into shards based on affinity/scheduling parameters:
+// -> dispatch_shard for core 0, processes [0-1, 1, 1]
+// -> dispatch_shard for core 1, processes [2-3, 1, 1]
+// -> dispatch_shard for core 2, processes [4-5, 1, 1]
+// completion_task run after all shards complete
+typedef iree_alignas(iree_max_align_t) struct iree_task_dispatch_t {
+ // Task header: implementation detail, do not use.
+ iree_task_t header;
+
+ // Function closure to call per tile.
+ iree_task_dispatch_closure_t closure;
+
+ // Workgroup size for each invocation. Passed on to tiles without
+ // modification and not used for scheduling.
+ uint32_t workgroup_size[3];
+
+ // 3D workgroup count used to tile the dispatch.
+ // [1,1,1] specifies single invocation of the function. A value of 0 in
+ // any dimension will skip execution of the function.
+ union {
+ // Embedded immutable 3D workgroup count value.
+ uint32_t value[3];
+ // Pointer to the uint32_t[3] containing the 3D workgroup count.
+ // Sampled immediately prior to execution.
+ const uint32_t* ptr;
+ } workgroup_count;
+
+ // Optional transient shared memory size in bytes to allocate and pass into
+ // the iree_task_tile_context_t::local_memory of each invocation of the
+ // dispatch closure.
+ uint32_t local_memory_size;
+
+ // Resulting status from the dispatch available once all workgroups have
+ // completed (or would have completed). If multiple shards processing the
+ // workgroups hit an error the first will be taken and the result ignored. A
+ // dispatch with a non-ok status will mark the parent task scope as failing
+ // when it retires.
+ iree_atomic_intptr_t status;
+
+ // Statistics storage used for aggregating counters across all shards.
+ iree_task_dispatch_statistics_t statistics;
+
+ // The total number of tiles in the dispatch bounding tile_index.
+ uint32_t tile_count;
+
+ // Maximum number of tiles to fetch per tile reservation from the grid.
+ // Bounded by IREE_TASK_DISPATCH_MAX_TILES_PER_SHARD_RESERVATION and a
+ // reasonable number chosen based on the tile and shard counts.
+ uint32_t tiles_per_reservation;
+
+ // The tail tile index; the next reservation will start from here.
+ // This is used by shards to slice off the work to perform in their inner
+ // loop. Ideally we'd have no destructive interference with other shared data
+ // in this structure but the shared parts (status/statistics) are updated once
+ // per shard instead of once per slice and are less of a concern.
+ iree_atomic_int32_t tile_index;
+
+ // Incrementing process-lifetime dispatch identifier.
+ IREE_TRACE(int64_t dispatch_id;)
+} iree_task_dispatch_t;
+
+void iree_task_dispatch_initialize(iree_task_scope_t* scope,
+ iree_task_dispatch_closure_t closure,
+ const uint32_t workgroup_size[3],
+ const uint32_t workgroup_count[3],
+ iree_task_dispatch_t* out_task);
+
+void iree_task_dispatch_initialize_indirect(
+ iree_task_scope_t* scope, iree_task_dispatch_closure_t closure,
+ const uint32_t workgroup_size[3], const uint32_t* workgroup_count_ptr,
+ iree_task_dispatch_t* out_task);
+
+//==============================================================================
+// IREE_TASK_TYPE_DISPATCH_SHARD
+//==============================================================================
+
+typedef iree_alignas(iree_max_align_t) struct {
+ // Task header: implementation detail, do not use.
+ iree_task_t header;
+
+ // NOTE: the parent dispatch task this shard is applied to is in the
+ // header.completion_task field.
+} iree_task_dispatch_shard_t;
+
+void iree_task_dispatch_shard_initialize(iree_task_dispatch_t* dispatch_task,
+ iree_task_dispatch_shard_t* out_task);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_TASK_H_
diff --git a/runtime/src/iree/task/task_impl.h b/runtime/src/iree/task/task_impl.h
new file mode 100644
index 0000000..ee1b5a3
--- /dev/null
+++ b/runtime/src/iree/task/task_impl.h
@@ -0,0 +1,132 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_TASK_IMPL_H_
+#define IREE_TASK_TASK_IMPL_H_
+
+#include "iree/task/list.h"
+#include "iree/task/pool.h"
+#include "iree/task/post_batch.h"
+#include "iree/task/submission.h"
+#include "iree/task/task.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//==============================================================================
+// IREE_TASK_TYPE_NOP
+//==============================================================================
+
+// Retires a no-op task.
+// No-op tasks don't *do* anything but must still be handled like any other
+// task in the system so dependent tasks are properly scheduled.
+void iree_task_nop_retire(iree_task_nop_t* task,
+ iree_task_submission_t* pending_submission);
+
+//==============================================================================
+// IREE_TASK_TYPE_CALL
+//==============================================================================
+
+// Executes and retires a user call.
+// May block the caller for an indeterminate amount of time and should only be
+// called from threads owned by or donated to the executor.
+//
+// Errors are propagated to the parent scope.
+void iree_task_call_execute(iree_task_call_t* task,
+ iree_task_submission_t* pending_submission);
+
+//==============================================================================
+// IREE_TASK_TYPE_BARRIER
+//==============================================================================
+
+// Retires a barrier task by notifying all dependent tasks.
+// May add zero or more tasks to the |pending_submission| if they are ready.
+//
+// Only called during coordination and expects the coordinator lock to be held.
+void iree_task_barrier_retire(iree_task_barrier_t* task,
+ iree_task_submission_t* pending_submission);
+
+//==============================================================================
+// IREE_TASK_TYPE_FENCE
+//==============================================================================
+
+// Retires a fence task by updating the scope state.
+//
+// Only called during coordination and expects the coordinator lock to be held.
+void iree_task_fence_retire(iree_task_fence_t* task,
+ iree_task_submission_t* pending_submission);
+
+//==============================================================================
+// IREE_TASK_TYPE_WAIT
+//==============================================================================
+
+// Returns true if the user-specified condition on the task is true.
+//
+// Only called during coordination and expects the coordinator lock to be held.
+bool iree_task_wait_check_condition(iree_task_wait_t* task);
+
+// Retires a wait when it has completed waiting (successfully or not).
+//
+// Only called during coordination and expects the coordinator lock to be held.
+void iree_task_wait_retire(iree_task_wait_t* task,
+ iree_task_submission_t* pending_submission,
+ iree_status_t status);
+
+//==============================================================================
+// IREE_TASK_TYPE_DISPATCH
+//==============================================================================
+
+// Schedules a dispatch by forking out to zero or more shards that will be
+// executed on workers. The shards are allocated from an executor-owned pool
+// and are generally not user-visible - they'll just see their dispatch begin
+// execution prior to the shards and end execution after the last shard
+// finishes.
+//
+// Only called during coordination and expects the coordinator lock to be held.
+void iree_task_dispatch_issue(iree_task_dispatch_t* dispatch_task,
+ iree_task_pool_t* shard_task_pool,
+ iree_task_submission_t* pending_submission,
+ iree_task_post_batch_t* post_batch);
+
+// Retires a dispatch when all issued shards have completed executing.
+//
+// Only called during coordination and expects the coordinator lock to be held.
+void iree_task_dispatch_retire(iree_task_dispatch_t* dispatch_task,
+ iree_task_submission_t* pending_submission);
+
+//==============================================================================
+// IREE_TASK_TYPE_DISPATCH_SHARD
+//==============================================================================
+
+// Allocates a dispatch shard task from the shared executor task pool.
+// The shard will be released back to the pool when it has completed execution.
+iree_task_dispatch_shard_t* iree_task_dispatch_shard_allocate(
+ iree_task_dispatch_t* dispatch_task, iree_task_pool_t* shard_task_pool);
+
+// Executes and retires a dispatch shard task.
+// May block the caller for an indeterminate amount of time and should only be
+// called from threads owned by or donated to the executor.
+//
+// |processor_id| is a guess as to which logical processor the shard is
+// executing on. It may be out of date or 0 if the processor could not be
+// queried.
+//
+// |worker_local_memory| is a block of memory exclusively available to the shard
+// during execution. Contents are undefined both before and after execution.
+//
+// Errors are propagated to the parent scope and the dispatch will fail once
+// all shards have completed.
+void iree_task_dispatch_shard_execute(
+ iree_task_dispatch_shard_t* task, iree_cpu_processor_id_t processor_id,
+ iree_byte_span_t worker_local_memory,
+ iree_task_submission_t* pending_submission);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_TASK_IMPL_H_
diff --git a/runtime/src/iree/task/task_test_barrier.cc b/runtime/src/iree/task/task_test_barrier.cc
new file mode 100644
index 0000000..135f63f
--- /dev/null
+++ b/runtime/src/iree/task/task_test_barrier.cc
@@ -0,0 +1,323 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <atomic>
+#include <cstdint>
+
+#include "iree/base/api.h"
+#include "iree/task/submission.h"
+#include "iree/task/task.h"
+#include "iree/task/testing/task_test.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace {
+
+using iree::Status;
+using iree::StatusCode;
+using iree::testing::status::StatusIs;
+
+class TaskBarrierTest : public TaskTest {};
+
+enum {
+ TASK_A = 1 << 0,
+ TASK_B = 1 << 1,
+ TASK_C = 1 << 2,
+ TASK_D = 1 << 3,
+};
+
+// We track which tasks were successfully executed
+struct TaskCtx {
+ std::atomic<uint32_t> tasks_called = {0};
+};
+
+#define MAKE_CALL_TASK_CLOSURE(task_ctx, task_id, status_code) \
+ iree_task_make_call_closure( \
+ [](void* user_context, iree_task_t* task, \
+ iree_task_submission_t* pending_submission) { \
+ IREE_TRACE_SCOPE(); \
+ auto* ctx = (TaskCtx*)user_context; \
+ EXPECT_EQ(0, (ctx->tasks_called & (task_id))); \
+ ctx->tasks_called |= (task_id); \
+ return iree_status_from_code(status_code); \
+ }, \
+ (void*)task_ctx)
+
+// Issues a standalone empty barrier:
+// { barrier }
+TEST_F(TaskBarrierTest, IssueStandalone) {
+ iree_task_barrier_t barrier_task;
+ iree_task_barrier_initialize_empty(&scope_, &barrier_task);
+ IREE_ASSERT_OK(
+ SubmitTasksAndWaitIdle(&barrier_task.header, &barrier_task.header));
+}
+
+// Issues a serialized sequence:
+// { a | barrier | b }
+TEST_F(TaskBarrierTest, IssueSequence) {
+ IREE_TRACE_SCOPE();
+ TaskCtx task_ctx;
+
+ iree_task_call_t task_a;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_A, IREE_STATUS_OK),
+ &task_a);
+ iree_task_call_t task_b;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_B, IREE_STATUS_OK),
+ &task_b);
+
+ iree_task_t* dependent_tasks[1] = {&task_b.header};
+ iree_task_barrier_t barrier_task;
+ iree_task_barrier_initialize(&scope_, IREE_ARRAYSIZE(dependent_tasks),
+ dependent_tasks, &barrier_task);
+ iree_task_set_completion_task(&task_a.header, &barrier_task.header);
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task_a.header, &task_b.header));
+ EXPECT_EQ(TASK_A | TASK_B, task_ctx.tasks_called);
+}
+
+// Issues a serialized sequence where task A fails:
+// { a | barrier | b }
+// B should not be run.
+TEST_F(TaskBarrierTest, IssueSequenceFailure) {
+ IREE_TRACE_SCOPE();
+ TaskCtx task_ctx;
+
+ iree_task_call_t task_a;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_A, IREE_STATUS_DATA_LOSS),
+ &task_a);
+ iree_task_call_t task_b;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_B, IREE_STATUS_OK),
+ &task_b);
+
+ iree_task_t* dependent_tasks[1] = {&task_b.header};
+ iree_task_barrier_t barrier_task;
+ iree_task_barrier_initialize(&scope_, IREE_ARRAYSIZE(dependent_tasks),
+ dependent_tasks, &barrier_task);
+ iree_task_set_completion_task(&task_a.header, &barrier_task.header);
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task_a.header, &task_b.header));
+ EXPECT_EQ(TASK_A, task_ctx.tasks_called);
+ EXPECT_THAT(Status(iree_task_scope_consume_status(&scope_)),
+ StatusIs(StatusCode::kDataLoss));
+}
+
+// Issues a deeply serialized sequence where task A fails:
+// { a | barrier | b | barrier | c }
+// B and C should not be run.
+TEST_F(TaskBarrierTest, IssueDeepSequenceFailure) {
+ IREE_TRACE_SCOPE();
+ TaskCtx task_ctx;
+
+ iree_task_call_t task_a;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_A, IREE_STATUS_DATA_LOSS),
+ &task_a);
+ iree_task_call_t task_b;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_B, IREE_STATUS_OK),
+ &task_b);
+ iree_task_call_t task_c;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_C, IREE_STATUS_OK),
+ &task_c);
+
+ iree_task_t* dependent_tasks_0[1] = {&task_b.header};
+ iree_task_barrier_t barrier_task_0;
+ iree_task_barrier_initialize(&scope_, IREE_ARRAYSIZE(dependent_tasks_0),
+ dependent_tasks_0, &barrier_task_0);
+ iree_task_set_completion_task(&task_a.header, &barrier_task_0.header);
+
+ iree_task_t* dependent_tasks_1[1] = {&task_c.header};
+ iree_task_barrier_t barrier_task_1;
+ iree_task_barrier_initialize(&scope_, IREE_ARRAYSIZE(dependent_tasks_1),
+ dependent_tasks_1, &barrier_task_1);
+ iree_task_set_completion_task(&task_b.header, &barrier_task_1.header);
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task_a.header, &task_c.header));
+ EXPECT_EQ(TASK_A, task_ctx.tasks_called);
+ EXPECT_THAT(Status(iree_task_scope_consume_status(&scope_)),
+ StatusIs(StatusCode::kDataLoss));
+}
+
+// Issues a join:
+// { a, b, c | barrier | d }
+TEST_F(TaskBarrierTest, IssueJoin) {
+ IREE_TRACE_SCOPE();
+ TaskCtx task_ctx;
+
+ iree_task_call_t task_a;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_A, IREE_STATUS_OK),
+ &task_a);
+ iree_task_call_t task_b;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_B, IREE_STATUS_OK),
+ &task_b);
+ iree_task_call_t task_c;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_C, IREE_STATUS_OK),
+ &task_c);
+ iree_task_call_t task_d;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_D, IREE_STATUS_OK),
+ &task_d);
+
+ iree_task_t* dependent_tasks[1] = {&task_d.header};
+ iree_task_barrier_t barrier_task;
+ iree_task_barrier_initialize(&scope_, IREE_ARRAYSIZE(dependent_tasks),
+ dependent_tasks, &barrier_task);
+ iree_task_set_completion_task(&task_a.header, &barrier_task.header);
+ iree_task_set_completion_task(&task_b.header, &barrier_task.header);
+ iree_task_set_completion_task(&task_c.header, &barrier_task.header);
+
+ iree_task_submission_t submission;
+ iree_task_submission_initialize(&submission);
+ iree_task_submission_enqueue(&submission, &task_a.header);
+ iree_task_submission_enqueue(&submission, &task_b.header);
+ iree_task_submission_enqueue(&submission, &task_c.header);
+ IREE_ASSERT_OK(SubmitAndWaitIdle(&submission, &task_d.header));
+ EXPECT_EQ(TASK_A | TASK_B | TASK_C | TASK_D, task_ctx.tasks_called);
+}
+
+// Issues a join where a dependent task B fails:
+// { a, b, c | barrier | d }
+// A, B, and C should all run but the barrier should fail and D should not.
+TEST_F(TaskBarrierTest, IssueJoinFailure) {
+ IREE_TRACE_SCOPE();
+ TaskCtx task_ctx;
+
+ iree_task_call_t task_a;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_A, IREE_STATUS_OK),
+ &task_a);
+ iree_task_call_t task_b;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_B, IREE_STATUS_DATA_LOSS),
+ &task_b);
+ iree_task_call_t task_c;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_C, IREE_STATUS_OK),
+ &task_c);
+ iree_task_call_t task_d;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_D, IREE_STATUS_OK),
+ &task_d);
+
+ iree_task_t* dependent_tasks[1] = {&task_d.header};
+ iree_task_barrier_t barrier_task;
+ iree_task_barrier_initialize(&scope_, IREE_ARRAYSIZE(dependent_tasks),
+ dependent_tasks, &barrier_task);
+ iree_task_set_completion_task(&task_a.header, &barrier_task.header);
+ iree_task_set_completion_task(&task_b.header, &barrier_task.header);
+ iree_task_set_completion_task(&task_c.header, &barrier_task.header);
+
+ iree_task_submission_t submission;
+ iree_task_submission_initialize(&submission);
+ iree_task_submission_enqueue(&submission, &task_a.header);
+ iree_task_submission_enqueue(&submission, &task_b.header);
+ iree_task_submission_enqueue(&submission, &task_c.header);
+ IREE_ASSERT_OK(SubmitAndWaitIdle(&submission, &task_d.header));
+ EXPECT_EQ(TASK_A | TASK_B | TASK_C, task_ctx.tasks_called);
+ EXPECT_THAT(Status(iree_task_scope_consume_status(&scope_)),
+ StatusIs(StatusCode::kDataLoss));
+}
+
+// Issues a fork:
+// { a | barrier | b, c, d | nop }
+TEST_F(TaskBarrierTest, IssueFork) {
+ IREE_TRACE_SCOPE();
+ TaskCtx task_ctx;
+
+ iree_task_call_t task_a;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_A, IREE_STATUS_OK),
+ &task_a);
+ iree_task_call_t task_b;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_B, IREE_STATUS_OK),
+ &task_b);
+ iree_task_call_t task_c;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_C, IREE_STATUS_OK),
+ &task_c);
+ iree_task_call_t task_d;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_D, IREE_STATUS_OK),
+ &task_d);
+
+ iree_task_t* dependent_tasks[3] = {
+ &task_b.header,
+ &task_c.header,
+ &task_d.header,
+ };
+ iree_task_barrier_t barrier_task;
+ iree_task_barrier_initialize(&scope_, IREE_ARRAYSIZE(dependent_tasks),
+ dependent_tasks, &barrier_task);
+ iree_task_set_completion_task(&task_a.header, &barrier_task.header);
+
+ // Just to give us a tail task to wait on.
+ iree_task_nop_t nop_task;
+ iree_task_nop_initialize(&scope_, &nop_task);
+ iree_task_set_completion_task(&task_b.header, &nop_task.header);
+ iree_task_set_completion_task(&task_c.header, &nop_task.header);
+ iree_task_set_completion_task(&task_d.header, &nop_task.header);
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task_a.header, &nop_task.header));
+ EXPECT_EQ(TASK_A | TASK_B | TASK_C | TASK_D, task_ctx.tasks_called);
+}
+
+// Issues a fork where task A fails:
+// { a (fails) | barrier | b, c, d | nop }
+// The barrier should fail and none of the subsequent tasks B, C, D should run.
+TEST_F(TaskBarrierTest, IssueForkFailure) {
+ IREE_TRACE_SCOPE();
+ TaskCtx task_ctx;
+
+ iree_task_call_t task_a;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_A, IREE_STATUS_DATA_LOSS),
+ &task_a);
+ iree_task_call_t task_b;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_B, IREE_STATUS_OK),
+ &task_b);
+ iree_task_call_t task_c;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_C, IREE_STATUS_OK),
+ &task_c);
+ iree_task_call_t task_d;
+ iree_task_call_initialize(
+ &scope_, MAKE_CALL_TASK_CLOSURE(&task_ctx, TASK_D, IREE_STATUS_OK),
+ &task_d);
+
+ iree_task_t* dependent_tasks[3] = {
+ &task_b.header,
+ &task_c.header,
+ &task_d.header,
+ };
+ iree_task_barrier_t barrier_task;
+ iree_task_barrier_initialize(&scope_, IREE_ARRAYSIZE(dependent_tasks),
+ dependent_tasks, &barrier_task);
+ iree_task_set_completion_task(&task_a.header, &barrier_task.header);
+
+ // Just to give us a tail task to wait on.
+ iree_task_nop_t nop_task;
+ iree_task_nop_initialize(&scope_, &nop_task);
+ iree_task_set_completion_task(&task_b.header, &nop_task.header);
+ iree_task_set_completion_task(&task_c.header, &nop_task.header);
+ iree_task_set_completion_task(&task_d.header, &nop_task.header);
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task_a.header, &nop_task.header));
+ EXPECT_EQ(TASK_A, task_ctx.tasks_called);
+ EXPECT_THAT(Status(iree_task_scope_consume_status(&scope_)),
+ StatusIs(StatusCode::kDataLoss));
+}
+
+} // namespace
diff --git a/runtime/src/iree/task/task_test_call.cc b/runtime/src/iree/task/task_test_call.cc
new file mode 100644
index 0000000..5572fac
--- /dev/null
+++ b/runtime/src/iree/task/task_test_call.cc
@@ -0,0 +1,333 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+
+#include "iree/base/api.h"
+#include "iree/task/submission.h"
+#include "iree/task/task.h"
+#include "iree/task/testing/task_test.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace {
+
+using iree::Status;
+using iree::StatusCode;
+using iree::testing::status::StatusIs;
+
+class TaskCallTest : public TaskTest {};
+
+// Tests issuing a single call and waiting for it to complete.
+TEST_F(TaskCallTest, Issue) {
+ IREE_TRACE_SCOPE();
+
+ struct TestCtx {
+ int did_call = 0;
+ };
+ TestCtx ctx;
+
+ iree_task_call_t task;
+ iree_task_call_initialize(&scope_,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE();
+ auto* ctx = (TestCtx*)user_context;
+ EXPECT_TRUE(NULL != ctx);
+ EXPECT_EQ(0, ctx->did_call);
+ ++ctx->did_call;
+ return iree_ok_status();
+ },
+ (void*)&ctx),
+ &task);
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task.header, &task.header));
+ EXPECT_EQ(1, ctx.did_call);
+ IREE_EXPECT_OK(iree_task_scope_consume_status(&scope_));
+}
+
+// Tests issuing a single call that returns a failure.
+// The failure should be propagated back on the task scope.
+TEST_F(TaskCallTest, IssueFailure) {
+ IREE_TRACE_SCOPE();
+
+ struct TestCtx {
+ int did_call = 0;
+ };
+ TestCtx ctx;
+
+ // Call successfully issues but fails with some user error.
+ iree_task_call_t task;
+ iree_task_call_initialize(&scope_,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE();
+ auto* ctx = (TestCtx*)user_context;
+ EXPECT_TRUE(NULL != ctx);
+ EXPECT_EQ(0, ctx->did_call);
+ ++ctx->did_call;
+ return iree_make_status(
+ IREE_STATUS_UNAUTHENTICATED, "whoops!");
+ },
+ (void*)&ctx),
+ &task);
+
+ // The task should still be cleaned up, even if it fails.
+ static int did_cleanup = 0;
+ did_cleanup = 0;
+ iree_task_set_cleanup_fn(
+ &task.header, +[](iree_task_t* task, iree_status_code_t status_code) {
+ IREE_TRACE_SCOPE();
+ EXPECT_EQ(status_code, IREE_STATUS_ABORTED);
+ ++did_cleanup;
+ });
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task.header, &task.header));
+
+ // Expect both the call to have been made and the task cleaned up.
+ // The scope has the failure status.
+ EXPECT_EQ(1, ctx.did_call);
+ EXPECT_EQ(1, did_cleanup);
+ EXPECT_THAT(Status(iree_task_scope_consume_status(&scope_)),
+ StatusIs(StatusCode::kUnauthenticated));
+}
+
+// Tests issuing chained calls where the first fails.
+// The failure should be propagated back on the task scope and the chained call
+// should be aborted.
+TEST_F(TaskCallTest, IssueFailureChained) {
+ IREE_TRACE_SCOPE();
+
+ struct TestCtx {
+ int did_call_a = 0;
+ int did_call_b = 0;
+ };
+ TestCtx ctx;
+
+ // First call that will fail.
+ iree_task_call_t task_a;
+ iree_task_call_initialize(&scope_,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE();
+ auto* ctx = (TestCtx*)user_context;
+ EXPECT_TRUE(NULL != ctx);
+ EXPECT_EQ(0, ctx->did_call_a);
+ ++ctx->did_call_a;
+ // Force a failure.
+ return iree_make_status(
+ IREE_STATUS_UNAUTHENTICATED, "whoops!");
+ },
+ (void*)&ctx),
+ &task_a);
+ static int did_cleanup_a = 0;
+ did_cleanup_a = 0;
+ iree_task_set_cleanup_fn(
+ &task_a.header, +[](iree_task_t* task, iree_status_code_t status_code) {
+ // Expect that the cleanup gets a signal indicating the task failed.
+ IREE_TRACE_SCOPE();
+ EXPECT_EQ(status_code, IREE_STATUS_ABORTED);
+ ++did_cleanup_a;
+ });
+
+ // Second call that will be aborted after the first fails.
+ iree_task_call_t task_b;
+ iree_task_call_initialize(&scope_,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ // This should never get called!
+ IREE_TRACE_SCOPE();
+ auto* ctx = (TestCtx*)user_context;
+ EXPECT_TRUE(NULL != ctx);
+ EXPECT_EQ(0, ctx->did_call_b);
+ ++ctx->did_call_b;
+ return iree_ok_status();
+ },
+ (void*)&ctx),
+ &task_b);
+ static int did_cleanup_b = 0;
+ did_cleanup_b = 0;
+ iree_task_set_cleanup_fn(
+ &task_b.header, +[](iree_task_t* task, iree_status_code_t status_code) {
+ // Expect that the cleanup gets a signal indicating the task failed.
+ IREE_TRACE_SCOPE();
+ EXPECT_EQ(status_code, IREE_STATUS_ABORTED);
+ ++did_cleanup_b;
+ });
+
+ // A -> B
+ iree_task_set_completion_task(&task_a.header, &task_b.header);
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task_a.header, &task_b.header));
+
+ // Expect that A was called but B was not, and both were cleaned up.
+ EXPECT_EQ(1, ctx.did_call_a);
+ EXPECT_EQ(1, did_cleanup_a);
+ EXPECT_EQ(0, ctx.did_call_b);
+ EXPECT_EQ(1, did_cleanup_b);
+ EXPECT_THAT(Status(iree_task_scope_consume_status(&scope_)),
+ StatusIs(StatusCode::kUnauthenticated));
+}
+
+// Issues task_a which then issues a nested task_b and waits for it to complete
+// prior to progressing. This models dynamic parallelism:
+// http://developer.download.nvidia.com/GTC/PDF/GTC2012/PresentationPDF/S0338-GTC2012-CUDA-Programming-Model.pdf
+TEST_F(TaskCallTest, IssueNested) {
+ IREE_TRACE_SCOPE();
+
+ struct TestCtx {
+ std::atomic<int> did_call_a = {0};
+ std::atomic<int> did_call_b = {0};
+ std::atomic<bool> has_issued = {false};
+ iree_task_call_t task_b;
+ };
+ TestCtx ctx;
+
+ // task_a will get called twice: the first time it will schedule task_b and
+ // then it'll get called again when task_b completes. This is not the only way
+ // to do this: task_a could set it up so that a task_c ran after task_b
+ // completed instead of getting itself called twice. Both approaches have
+ // their uses.
+ iree_task_call_t task_a;
+ iree_task_call_initialize(
+ &scope_,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE();
+ auto* ctx = (TestCtx*)user_context;
+ EXPECT_TRUE(NULL != ctx);
+
+ if (!ctx->has_issued) {
+ ctx->has_issued = true;
+ EXPECT_EQ(0, ctx->did_call_a);
+ ++ctx->did_call_a;
+ iree_task_call_initialize(
+ task->scope,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE();
+ auto* ctx = (TestCtx*)user_context;
+ EXPECT_TRUE(NULL != ctx);
+ EXPECT_EQ(0, ctx->did_call_b);
+ ++ctx->did_call_b;
+ return iree_ok_status();
+ },
+ user_context),
+ &ctx->task_b);
+ iree_task_set_completion_task(&ctx->task_b.header, task);
+ iree_task_submission_enqueue(pending_submission,
+ &ctx->task_b.header);
+ } else {
+ EXPECT_EQ(1, ctx->did_call_a);
+ ++ctx->did_call_a;
+ }
+
+ return iree_ok_status();
+ },
+ (void*)&ctx),
+ &task_a);
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task_a.header, &task_a.header));
+ EXPECT_EQ(2, ctx.did_call_a);
+ EXPECT_EQ(1, ctx.did_call_b);
+ IREE_EXPECT_OK(iree_task_scope_consume_status(&scope_));
+}
+
+// Issues task_a which then issues a nested task_b and task_c; task_b fails and
+// it's expected that task_c completes before failing task_a.
+// Sibling tasks don't abort each other and as such we are guaranteed that C
+// will run: A -> [B fail, C ok] -> A fail
+TEST_F(TaskCallTest, IssueNestedFailure) {
+ IREE_TRACE_SCOPE();
+
+ struct TestCtx {
+ std::atomic<int> did_call_a = {0};
+ std::atomic<int> did_call_b = {0};
+ std::atomic<int> did_call_c = {0};
+ std::atomic<bool> has_issued = {false};
+ iree_task_call_t task_b;
+ iree_task_call_t task_c;
+ };
+ TestCtx ctx;
+
+ // task_a will get called only once due to the error: the pre-nesting call
+ // will schedule task_b/task_c and then the expected call after the tasks
+ // complete will not be made as task_b fails.
+ iree_task_call_t task_a;
+ iree_task_call_initialize(
+ &scope_,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ auto* ctx = (TestCtx*)user_context;
+ EXPECT_TRUE(NULL != ctx);
+
+ if (!ctx->has_issued) {
+ ctx->has_issued = true;
+ EXPECT_EQ(0, ctx->did_call_a);
+ ++ctx->did_call_a;
+
+ // task_b: (fails)
+ iree_task_call_initialize(
+ task->scope,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE();
+ auto* ctx = (TestCtx*)user_context;
+ EXPECT_TRUE(NULL != ctx);
+ EXPECT_EQ(0, ctx->did_call_b);
+ ++ctx->did_call_b;
+ return iree_make_status(IREE_STATUS_DATA_LOSS, "uh oh");
+ },
+ user_context),
+ &ctx->task_b);
+ iree_task_set_completion_task(&ctx->task_b.header, task);
+ iree_task_submission_enqueue(pending_submission,
+ &ctx->task_b.header);
+
+ // task_c: (ok)
+ iree_task_call_initialize(
+ task->scope,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE();
+ auto* ctx = (TestCtx*)user_context;
+ EXPECT_TRUE(NULL != ctx);
+ EXPECT_EQ(0, ctx->did_call_c);
+ ++ctx->did_call_c;
+ return iree_ok_status();
+ },
+ user_context),
+ &ctx->task_c);
+ iree_task_set_completion_task(&ctx->task_c.header, task);
+ iree_task_submission_enqueue(pending_submission,
+ &ctx->task_c.header);
+ } else {
+ EXPECT_EQ(1, ctx->did_call_a);
+ ++ctx->did_call_a;
+ }
+
+ return iree_ok_status();
+ },
+ (void*)&ctx),
+ &task_a);
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task_a.header, &task_a.header));
+ EXPECT_EQ(1, ctx.did_call_a);
+ EXPECT_EQ(1, ctx.did_call_b);
+ EXPECT_EQ(1, ctx.did_call_c);
+ EXPECT_THAT(Status(iree_task_scope_consume_status(&scope_)),
+ StatusIs(StatusCode::kDataLoss));
+}
+
+} // namespace
diff --git a/runtime/src/iree/task/task_test_dispatch.cc b/runtime/src/iree/task/task_test_dispatch.cc
new file mode 100644
index 0000000..3324b6c
--- /dev/null
+++ b/runtime/src/iree/task/task_test_dispatch.cc
@@ -0,0 +1,217 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cstdint>
+#include <cstdio>
+#include <memory>
+
+#include "iree/base/api.h"
+#include "iree/task/submission.h"
+#include "iree/task/task.h"
+#include "iree/task/testing/task_test.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace {
+
+using iree::Status;
+using iree::StatusCode;
+using iree::testing::status::StatusIs;
+
+class GridCoverage {
+ public:
+ explicit GridCoverage(const uint32_t workgroup_count[3])
+ : workgroup_count_(workgroup_count[0] * workgroup_count[1] *
+ workgroup_count[2]),
+ storage_(new iree_atomic_int32_t[workgroup_count_]) {
+ for (iree_host_size_t i = 0; i < workgroup_count_; ++i) {
+ storage_[i] = IREE_ATOMIC_VAR_INIT(0);
+ }
+ }
+
+ bool Verify() {
+ fflush(stdout);
+ for (iree_host_size_t i = 0; i < workgroup_count_; ++i) {
+ if (iree_atomic_load_int32(&storage_[i], iree_memory_order_seq_cst) !=
+ 1) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ static iree_status_t Tile(void* user_context,
+ const iree_task_tile_context_t* tile_context,
+ iree_task_submission_t* pending_submission) {
+ GridCoverage* coverage = reinterpret_cast<GridCoverage*>(user_context);
+ uint32_t slot =
+ tile_context->workgroup_xyz[2] * (tile_context->workgroup_count[1] *
+ tile_context->workgroup_count[0]) +
+ tile_context->workgroup_xyz[1] * tile_context->workgroup_count[0] +
+ tile_context->workgroup_xyz[0];
+ iree_atomic_fetch_add_int32(&coverage->storage_[slot], 1,
+ iree_memory_order_seq_cst);
+
+ // Useful when testing large grids:
+ // printf("%u, %u, %u\n", tile_context->workgroup_xyz[0],
+ // tile_context->workgroup_xyz[1], tile_context->workgroup_xyz[2]);
+
+ return iree_ok_status();
+ }
+
+ private:
+ size_t workgroup_count_;
+ std::unique_ptr<iree_atomic_int32_t[]> storage_;
+};
+
+class TaskDispatchTest : public TaskTest {
+ public:
+ void DispatchAndVerifyGrid(const uint32_t workgroup_size[3],
+ const uint32_t workgroup_count[3],
+ uint32_t dispatch_flags) {
+ IREE_TRACE_SCOPE();
+ GridCoverage coverage(workgroup_count);
+ iree_task_dispatch_t task;
+ iree_task_dispatch_initialize(
+ &scope_,
+ iree_task_make_dispatch_closure(GridCoverage::Tile, (void*)&coverage),
+ workgroup_size, workgroup_count, &task);
+ task.header.flags |= dispatch_flags;
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task.header, &task.header));
+ EXPECT_TRUE(coverage.Verify());
+ }
+};
+
+TEST_F(TaskDispatchTest, Issue000) {
+ IREE_TRACE_SCOPE();
+ const uint32_t kWorkgroupSize[3] = {1, 1, 1};
+ const uint32_t kWorkgroupCount[3] = {0, 0, 0};
+ DispatchAndVerifyGrid(kWorkgroupSize, kWorkgroupCount, IREE_TASK_FLAG_NONE);
+}
+
+TEST_F(TaskDispatchTest, Issue120) {
+ IREE_TRACE_SCOPE();
+ const uint32_t kWorkgroupSize[3] = {1, 1, 1};
+ const uint32_t kWorkgroupCount[3] = {1, 2, 0};
+ DispatchAndVerifyGrid(kWorkgroupSize, kWorkgroupCount, IREE_TASK_FLAG_NONE);
+}
+
+TEST_F(TaskDispatchTest, Issue111) {
+ IREE_TRACE_SCOPE();
+ const uint32_t kWorkgroupSize[3] = {1, 1, 1};
+ const uint32_t kWorkgroupCount[3] = {1, 1, 1};
+ DispatchAndVerifyGrid(kWorkgroupSize, kWorkgroupCount, IREE_TASK_FLAG_NONE);
+}
+
+TEST_F(TaskDispatchTest, Issue345) {
+ IREE_TRACE_SCOPE();
+ const uint32_t kWorkgroupSize[3] = {1, 1, 1};
+ const uint32_t kWorkgroupCount[3] = {3, 4, 5};
+ DispatchAndVerifyGrid(kWorkgroupSize, kWorkgroupCount, IREE_TASK_FLAG_NONE);
+}
+
+TEST_F(TaskDispatchTest, IssueIndirect) {
+ IREE_TRACE_SCOPE();
+
+ static const uint32_t kWorkgroupSize[3] = {1, 1, 1};
+ static const uint32_t kWorkgroupCount[3] = {3, 4, 5};
+ uint32_t indirect_workgroup_count[3] = {0, 0, 0};
+ GridCoverage coverage(kWorkgroupCount);
+
+ iree_task_call_t calculate_task;
+ iree_task_call_initialize(
+ &scope_,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE();
+ uint32_t* indirect_workgroup_count_ptr = (uint32_t*)user_context;
+ for (size_t i = 0; i < IREE_ARRAYSIZE(kWorkgroupCount); ++i) {
+ indirect_workgroup_count_ptr[i] = kWorkgroupCount[i];
+ }
+ return iree_ok_status();
+ },
+ (void*)indirect_workgroup_count),
+ &calculate_task);
+
+ iree_task_dispatch_t dispatch_task;
+ iree_task_dispatch_initialize_indirect(
+ &scope_,
+ iree_task_make_dispatch_closure(GridCoverage::Tile, (void*)&coverage),
+ kWorkgroupSize, indirect_workgroup_count, &dispatch_task);
+ iree_task_set_completion_task(&calculate_task.header, &dispatch_task.header);
+
+ IREE_ASSERT_OK(
+ SubmitTasksAndWaitIdle(&calculate_task.header, &dispatch_task.header));
+ EXPECT_TRUE(coverage.Verify());
+}
+
+TEST_F(TaskDispatchTest, IssueFailure) {
+ IREE_TRACE_SCOPE();
+
+ const uint32_t kWorkgroupSize[3] = {1, 1, 1};
+ const uint32_t kWorkgroupCount[3] = {64, 1, 1};
+
+ auto tile = [](void* user_context,
+ const iree_task_tile_context_t* tile_context,
+ iree_task_submission_t* pending_submission) -> iree_status_t {
+ IREE_TRACE_SCOPE();
+ return tile_context->workgroup_xyz[0] == 32
+ ? iree_make_status(IREE_STATUS_DATA_LOSS, "whoops!")
+ : iree_ok_status();
+ };
+
+ iree_task_dispatch_t task;
+ iree_task_dispatch_initialize(&scope_,
+ iree_task_make_dispatch_closure(tile, NULL),
+ kWorkgroupSize, kWorkgroupCount, &task);
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task.header, &task.header));
+ EXPECT_THAT(Status(iree_task_scope_consume_status(&scope_)),
+ StatusIs(StatusCode::kDataLoss));
+}
+
+TEST_F(TaskDispatchTest, IssueFailureChained) {
+ IREE_TRACE_SCOPE();
+
+ const uint32_t kWorkgroupSize[3] = {1, 1, 1};
+ const uint32_t kWorkgroupCount[3] = {64, 1, 1};
+
+ auto tile = [](void* user_context,
+ const iree_task_tile_context_t* tile_context,
+ iree_task_submission_t* pending_submission) -> iree_status_t {
+ return tile_context->workgroup_xyz[0] == 32
+ ? iree_make_status(IREE_STATUS_DATA_LOSS, "whoops!")
+ : iree_ok_status();
+ };
+
+ iree_task_dispatch_t dispatch_task;
+ iree_task_dispatch_initialize(
+ &scope_, iree_task_make_dispatch_closure(tile, NULL), kWorkgroupSize,
+ kWorkgroupCount, &dispatch_task);
+
+ int did_call = 0;
+ iree_task_call_t call_task;
+ iree_task_call_initialize(&scope_,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE();
+ int* did_call_ptr = (int*)user_context;
+ ++(*did_call_ptr);
+ return iree_ok_status();
+ },
+ &did_call),
+ &call_task);
+ iree_task_set_completion_task(&dispatch_task.header, &call_task.header);
+
+ IREE_ASSERT_OK(
+ SubmitTasksAndWaitIdle(&dispatch_task.header, &call_task.header));
+ EXPECT_EQ(0, did_call);
+ EXPECT_THAT(Status(iree_task_scope_consume_status(&scope_)),
+ StatusIs(StatusCode::kDataLoss));
+}
+
+} // namespace
diff --git a/runtime/src/iree/task/task_test_fence.cc b/runtime/src/iree/task/task_test_fence.cc
new file mode 100644
index 0000000..0ed7828
--- /dev/null
+++ b/runtime/src/iree/task/task_test_fence.cc
@@ -0,0 +1,83 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/task.h"
+#include "iree/task/testing/task_test.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace {
+
+using iree::Status;
+using iree::StatusCode;
+using iree::testing::status::StatusIs;
+
+class TaskFenceTest : public TaskTest {};
+
+// Tests a chain of fences A -> B -> C.
+TEST_F(TaskFenceTest, IssueChained) {
+ iree_task_fence_t task_a;
+ iree_task_fence_initialize(&scope_, iree_wait_primitive_immediate(), &task_a);
+
+ iree_task_fence_t task_b;
+ iree_task_fence_initialize(&scope_, iree_wait_primitive_immediate(), &task_b);
+ iree_task_set_completion_task(&task_a.header, &task_b.header);
+
+ iree_task_fence_t task_c;
+ iree_task_fence_initialize(&scope_, iree_wait_primitive_immediate(), &task_c);
+ iree_task_set_completion_task(&task_b.header, &task_c.header);
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task_a.header, &task_c.header));
+}
+
+// Tests that failures propagate through fences; task B should not be called.
+// A fails -> fence -> B
+TEST_F(TaskFenceTest, IssueChainedFailure) {
+ IREE_TRACE_SCOPE();
+
+ int did_call_a = 0;
+ iree_task_call_t task_a;
+ iree_task_call_initialize(&scope_,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE();
+ int* did_call_ptr = (int*)user_context;
+ ++(*did_call_ptr);
+ return iree_make_status(IREE_STATUS_DATA_LOSS,
+ "whoops!");
+ },
+ &did_call_a),
+ &task_a);
+
+ iree_task_fence_t fence_task;
+ iree_task_fence_initialize(&scope_, iree_wait_primitive_immediate(),
+ &fence_task);
+ iree_task_set_completion_task(&task_a.header, &fence_task.header);
+
+ int did_call_b = 0;
+ iree_task_call_t task_b;
+ iree_task_call_initialize(&scope_,
+ iree_task_make_call_closure(
+ [](void* user_context, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ IREE_TRACE_SCOPE();
+ int* did_call_ptr = (int*)user_context;
+ ++(*did_call_ptr);
+ return iree_ok_status();
+ },
+ &did_call_b),
+ &task_b);
+ iree_task_set_completion_task(&fence_task.header, &task_b.header);
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task_a.header, &task_b.header));
+ EXPECT_EQ(1, did_call_a);
+ EXPECT_EQ(0, did_call_b);
+ EXPECT_THAT(Status(iree_task_scope_consume_status(&scope_)),
+ StatusIs(StatusCode::kDataLoss));
+}
+
+} // namespace
diff --git a/runtime/src/iree/task/task_test_nop.cc b/runtime/src/iree/task/task_test_nop.cc
new file mode 100644
index 0000000..8aeb539
--- /dev/null
+++ b/runtime/src/iree/task/task_test_nop.cc
@@ -0,0 +1,23 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/task.h"
+#include "iree/task/testing/task_test.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace {
+
+class TaskNopTest : public TaskTest {};
+
+TEST_F(TaskNopTest, Issue) {
+ IREE_TRACE_SCOPE();
+ iree_task_nop_t task;
+ iree_task_nop_initialize(&scope_, &task);
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task.header, &task.header));
+}
+
+} // namespace
diff --git a/runtime/src/iree/task/task_test_wait.cc b/runtime/src/iree/task/task_test_wait.cc
new file mode 100644
index 0000000..907e2fa
--- /dev/null
+++ b/runtime/src/iree/task/task_test_wait.cc
@@ -0,0 +1,297 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <atomic>
+#include <chrono>
+#include <thread>
+
+#include "iree/task/task.h"
+#include "iree/task/testing/task_test.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace {
+
+using iree::Status;
+using iree::StatusCode;
+using iree::testing::status::StatusIs;
+
+// NOTE: we intentionally perform most signaling to/from C++ std::threads.
+// This models a real application that may be passing in handles tied to custom
+// or system primitives unrelated to the task system.
+
+class TaskWaitTest : public TaskTest {};
+
+// Issues a wait task on a handle that has already been signaled.
+// The poller will query the status of the handle and immediately retire the
+// task.
+TEST_F(TaskWaitTest, IssueSignaled) {
+ IREE_TRACE_SCOPE();
+
+ iree_event_t event;
+ iree_event_initialize(/*initial_state=*/true, &event);
+
+ iree_task_wait_t task;
+ iree_task_wait_initialize(&scope_, iree_event_await(&event),
+ IREE_TIME_INFINITE_FUTURE, &task);
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task.header, &task.header));
+ IREE_EXPECT_OK(iree_task_scope_consume_status(&scope_));
+
+ iree_event_deinitialize(&event);
+}
+
+// Issues a wait task on an unsignaled handle such that the poller must wait.
+// We'll spin up a thread that sets it a short time in the future and ensure
+// that the poller woke and retired the task.
+TEST_F(TaskWaitTest, IssueUnsignaled) {
+ IREE_TRACE_SCOPE();
+
+ iree_event_t event;
+ iree_event_initialize(/*initial_state=*/false, &event);
+
+ iree_task_wait_t task;
+ iree_task_wait_initialize(&scope_, iree_event_await(&event),
+ IREE_TIME_INFINITE_FUTURE, &task);
+
+ // Spin up a thread that will signal the event after we start waiting on it.
+ std::atomic<bool> has_signaled = {false};
+ std::thread signal_thread([&]() {
+ IREE_TRACE_SCOPE();
+ std::this_thread::sleep_for(std::chrono::milliseconds(150));
+ EXPECT_FALSE(has_signaled);
+ has_signaled = true;
+ iree_event_set(&event);
+ });
+
+ EXPECT_FALSE(has_signaled);
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task.header, &task.header));
+ EXPECT_TRUE(has_signaled);
+ IREE_EXPECT_OK(iree_task_scope_consume_status(&scope_));
+
+ signal_thread.join();
+ iree_event_deinitialize(&event);
+}
+
+// Issues a wait task on a handle that will never be signaled.
+// We set the deadline in the near future and ensure that the poller correctly
+// fails the wait with a DEADLINE_EXCEEDED.
+TEST_F(TaskWaitTest, IssueTimeout) {
+ IREE_TRACE_SCOPE();
+
+ iree_event_t event;
+ iree_event_initialize(/*initial_state=*/false, &event);
+
+ iree_task_wait_t task;
+ iree_task_wait_initialize(&scope_, iree_event_await(&event),
+ iree_time_now() + (150 * 1000000), &task);
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task.header, &task.header));
+ EXPECT_THAT(Status(iree_task_scope_consume_status(&scope_)),
+ StatusIs(StatusCode::kDeadlineExceeded));
+
+ iree_event_deinitialize(&event);
+}
+
+// Issues a delay task that should wait until the requested time.
+// NOTE: this kind of test can be flaky - if we have issues we can bump the
+// sleep time up.
+TEST_F(TaskWaitTest, IssueDelay) {
+ IREE_TRACE_SCOPE();
+
+ iree_time_t start_time_ns = iree_time_now();
+
+ iree_task_wait_t task;
+ iree_task_wait_initialize_delay(&scope_, start_time_ns + (50 * 1000000),
+ &task);
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&task.header, &task.header));
+ IREE_EXPECT_OK(iree_task_scope_consume_status(&scope_));
+
+ iree_time_t end_time_ns = iree_time_now();
+ EXPECT_GE(end_time_ns - start_time_ns, 25 * 1000000);
+}
+
+// Issues multiple waits that join on a single task. This models a wait-all.
+TEST_F(TaskWaitTest, WaitAll) {
+ IREE_TRACE_SCOPE();
+
+ iree_event_t event_a;
+ iree_event_initialize(/*initial_state=*/false, &event_a);
+ iree_task_wait_t task_a;
+ iree_task_wait_initialize(&scope_, iree_event_await(&event_a),
+ IREE_TIME_INFINITE_FUTURE, &task_a);
+
+ iree_event_t event_b;
+ iree_event_initialize(/*initial_state=*/false, &event_b);
+ iree_task_wait_t task_b;
+ iree_task_wait_initialize(&scope_, iree_event_await(&event_b),
+ IREE_TIME_INFINITE_FUTURE, &task_b);
+
+ iree_task_t* wait_tasks[] = {&task_a.header, &task_b.header};
+ iree_task_barrier_t barrier;
+ iree_task_barrier_initialize(&scope_, IREE_ARRAYSIZE(wait_tasks), wait_tasks,
+ &barrier);
+
+ iree_task_fence_t fence;
+ iree_task_fence_initialize(&scope_, iree_wait_primitive_immediate(), &fence);
+ iree_task_set_completion_task(&task_a.header, &fence.header);
+ iree_task_set_completion_task(&task_b.header, &fence.header);
+
+ // Spin up a thread that will signal the event after we start waiting on it.
+ std::atomic<bool> has_signaled = {false};
+ std::thread signal_thread([&]() {
+ IREE_TRACE_SCOPE();
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ EXPECT_FALSE(has_signaled);
+ iree_event_set(&event_a);
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ has_signaled = true;
+ iree_event_set(&event_b);
+ });
+
+ EXPECT_FALSE(has_signaled);
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&barrier.header, &fence.header));
+ EXPECT_TRUE(has_signaled);
+ IREE_EXPECT_OK(iree_task_scope_consume_status(&scope_));
+
+ signal_thread.join();
+ iree_event_deinitialize(&event_a);
+ iree_event_deinitialize(&event_b);
+}
+
+// Issues multiple waits that join on a single task but where one times out.
+TEST_F(TaskWaitTest, WaitAllTimeout) {
+ IREE_TRACE_SCOPE();
+
+ iree_event_t event_a;
+ iree_event_initialize(/*initial_state=*/true, &event_a);
+ iree_task_wait_t task_a;
+ iree_task_wait_initialize(&scope_, iree_event_await(&event_a),
+ IREE_TIME_INFINITE_FUTURE, &task_a);
+
+ iree_event_t event_b;
+ iree_event_initialize(/*initial_state=*/false, &event_b);
+ iree_task_wait_t task_b;
+ iree_task_wait_initialize(&scope_, iree_event_await(&event_b),
+ iree_time_now() + (50 * 1000000), &task_b);
+
+ iree_task_t* wait_tasks[] = {&task_a.header, &task_b.header};
+ iree_task_barrier_t barrier;
+ iree_task_barrier_initialize(&scope_, IREE_ARRAYSIZE(wait_tasks), wait_tasks,
+ &barrier);
+
+ iree_task_fence_t fence;
+ iree_task_fence_initialize(&scope_, iree_wait_primitive_immediate(), &fence);
+ iree_task_set_completion_task(&task_a.header, &fence.header);
+ iree_task_set_completion_task(&task_b.header, &fence.header);
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&barrier.header, &fence.header));
+ EXPECT_THAT(Status(iree_task_scope_consume_status(&scope_)),
+ StatusIs(StatusCode::kDeadlineExceeded));
+
+ iree_event_deinitialize(&event_a);
+ iree_event_deinitialize(&event_b);
+}
+
+// Issues multiple waits that join on a single task in wait-any mode.
+// This means that if one wait finishes all other waits will be cancelled and
+// the completion task will continue.
+//
+// Here event_a is signaled but event_b is not.
+TEST_F(TaskWaitTest, WaitAny) {
+ IREE_TRACE_SCOPE();
+
+ // Flag shared between all waits in a group.
+ iree_atomic_int32_t cancellation_flag = IREE_ATOMIC_VAR_INIT(0);
+
+ iree_event_t event_a;
+ iree_event_initialize(/*initial_state=*/false, &event_a);
+ iree_task_wait_t task_a;
+ iree_task_wait_initialize(&scope_, iree_event_await(&event_a),
+ IREE_TIME_INFINITE_FUTURE, &task_a);
+ iree_task_wait_set_wait_any(&task_a, &cancellation_flag);
+
+ iree_event_t event_b;
+ iree_event_initialize(/*initial_state=*/false, &event_b);
+ iree_task_wait_t task_b;
+ iree_task_wait_initialize(&scope_, iree_event_await(&event_b),
+ IREE_TIME_INFINITE_FUTURE, &task_b);
+ iree_task_wait_set_wait_any(&task_b, &cancellation_flag);
+
+ iree_task_t* wait_tasks[] = {&task_a.header, &task_b.header};
+ iree_task_barrier_t barrier;
+ iree_task_barrier_initialize(&scope_, IREE_ARRAYSIZE(wait_tasks), wait_tasks,
+ &barrier);
+
+ iree_task_fence_t fence;
+ iree_task_fence_initialize(&scope_, iree_wait_primitive_immediate(), &fence);
+ iree_task_set_completion_task(&task_a.header, &fence.header);
+ iree_task_set_completion_task(&task_b.header, &fence.header);
+
+ // Spin up a thread that will signal the event after we start waiting on it.
+ std::atomic<bool> has_signaled = {false};
+ std::thread signal_thread([&]() {
+ IREE_TRACE_SCOPE();
+ // NOTE: we only signal event_a - event_b remains unsignaled.
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ EXPECT_FALSE(has_signaled);
+ has_signaled = true;
+ iree_event_set(&event_a);
+ });
+
+ EXPECT_FALSE(has_signaled);
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&barrier.header, &fence.header));
+ EXPECT_TRUE(has_signaled);
+ IREE_EXPECT_OK(iree_task_scope_consume_status(&scope_));
+
+ signal_thread.join();
+ iree_event_deinitialize(&event_a);
+ iree_event_deinitialize(&event_b);
+}
+
+// Issues multiple waits that join on a single task in wait-any mode.
+// Here instead of signaling anything we cause event_a to timeout so that the
+// entire wait is cancelled.
+TEST_F(TaskWaitTest, WaitAnyTimeout) {
+ IREE_TRACE_SCOPE();
+
+ // Flag shared between all waits in a group.
+ iree_atomic_int32_t cancellation_flag = IREE_ATOMIC_VAR_INIT(0);
+
+ iree_event_t event_a;
+ iree_event_initialize(/*initial_state=*/false, &event_a);
+ iree_task_wait_t task_a;
+ iree_task_wait_initialize(&scope_, iree_event_await(&event_a),
+ iree_time_now() + (50 * 1000000), &task_a);
+ iree_task_wait_set_wait_any(&task_a, &cancellation_flag);
+
+ iree_event_t event_b;
+ iree_event_initialize(/*initial_state=*/false, &event_b);
+ iree_task_wait_t task_b;
+ iree_task_wait_initialize(&scope_, iree_event_await(&event_b),
+ IREE_TIME_INFINITE_FUTURE, &task_b);
+ iree_task_wait_set_wait_any(&task_b, &cancellation_flag);
+
+ iree_task_t* wait_tasks[] = {&task_a.header, &task_b.header};
+ iree_task_barrier_t barrier;
+ iree_task_barrier_initialize(&scope_, IREE_ARRAYSIZE(wait_tasks), wait_tasks,
+ &barrier);
+
+ iree_task_fence_t fence;
+ iree_task_fence_initialize(&scope_, iree_wait_primitive_immediate(), &fence);
+ iree_task_set_completion_task(&task_a.header, &fence.header);
+ iree_task_set_completion_task(&task_b.header, &fence.header);
+
+ IREE_ASSERT_OK(SubmitTasksAndWaitIdle(&barrier.header, &fence.header));
+ EXPECT_THAT(Status(iree_task_scope_consume_status(&scope_)),
+ StatusIs(StatusCode::kDeadlineExceeded));
+
+ iree_event_deinitialize(&event_a);
+ iree_event_deinitialize(&event_b);
+}
+
+} // namespace
diff --git a/runtime/src/iree/task/testing/BUILD b/runtime/src/iree/task/testing/BUILD
new file mode 100644
index 0000000..c355f6b
--- /dev/null
+++ b/runtime/src/iree/task/testing/BUILD
@@ -0,0 +1,33 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_runtime_cc_library(
+ name = "task_test",
+ testonly = 1,
+ hdrs = ["task_test.h"],
+ deps = [
+ "//runtime/src/iree/task",
+ "//runtime/src/iree/testing:gtest",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "test_util",
+ testonly = 1,
+ hdrs = ["test_util.h"],
+ deps = [
+ "//runtime/src/iree/task",
+ "//runtime/src/iree/testing:gtest",
+ ],
+)
diff --git a/runtime/src/iree/task/testing/CMakeLists.txt b/runtime/src/iree/task/testing/CMakeLists.txt
new file mode 100644
index 0000000..9dbd55d
--- /dev/null
+++ b/runtime/src/iree/task/testing/CMakeLists.txt
@@ -0,0 +1,37 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/task/testing/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ task_test
+ HDRS
+ "task_test.h"
+ DEPS
+ iree::task
+ iree::testing::gtest
+ TESTONLY
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ test_util
+ HDRS
+ "test_util.h"
+ DEPS
+ iree::task
+ iree::testing::gtest
+ TESTONLY
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/task/testing/task_test.h b/runtime/src/iree/task/testing/task_test.h
new file mode 100644
index 0000000..12068e6
--- /dev/null
+++ b/runtime/src/iree/task/testing/task_test.h
@@ -0,0 +1,77 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// NOTE: the best kind of synchronization is no synchronization; always try to
+// design your algorithm so that you don't need anything from this file :)
+// See https://travisdowns.github.io/blog/2020/07/06/concurrency-costs.html
+
+#ifndef IREE_TASK_TESTING_TASK_TEST_H_
+#define IREE_TASK_TESTING_TASK_TEST_H_
+
+#include <memory>
+
+#include "iree/task/executor.h"
+#include "iree/task/scope.h"
+#include "iree/task/task.h"
+#include "iree/task/topology.h"
+#include "iree/testing/status_matchers.h"
+
+class TaskTest : public ::testing::Test {
+ protected:
+ virtual void SetUp() {
+ iree_task_topology_t topology;
+ iree_task_topology_initialize_from_group_count(8, &topology);
+ IREE_ASSERT_OK(
+ iree_task_executor_create(IREE_TASK_SCHEDULING_MODE_RESERVED, &topology,
+ /*worker_local_memory_size=*/(64 * 1024),
+ iree_allocator_system(), &executor_));
+ iree_task_topology_deinitialize(&topology);
+
+ iree_task_scope_initialize(iree_make_cstring_view("scope"), &scope_);
+ }
+
+ virtual void TearDown() {
+ iree_task_scope_deinitialize(&scope_);
+
+ iree_task_executor_release(executor_);
+ }
+
+ // Submits a sequence of tasks with |head_task| at the head and |tail_task| at
+ // the tail (they can be the same).
+ iree_status_t SubmitTasksAndWaitIdle(iree_task_t* head_task,
+ iree_task_t* tail_task) {
+ iree_task_fence_t* fence = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_task_executor_acquire_fence(executor_, &scope_, &fence));
+ iree_task_set_completion_task(tail_task, &fence->header);
+
+ iree_task_submission_t submission;
+ iree_task_submission_initialize(&submission);
+ iree_task_submission_enqueue(&submission, head_task);
+ iree_task_executor_submit(executor_, &submission);
+ iree_task_executor_flush(executor_);
+ return iree_task_scope_wait_idle(&scope_, IREE_TIME_INFINITE_FUTURE);
+ }
+
+ // Submits a DAG of tasks with |tail_task| at the tail (used just for idle
+ // detection).
+ iree_status_t SubmitAndWaitIdle(iree_task_submission_t* submission,
+ iree_task_t* tail_task) {
+ iree_task_fence_t* fence = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_task_executor_acquire_fence(executor_, &scope_, &fence));
+ iree_task_set_completion_task(tail_task, &fence->header);
+
+ iree_task_executor_submit(executor_, submission);
+ iree_task_executor_flush(executor_);
+ return iree_task_scope_wait_idle(&scope_, IREE_TIME_INFINITE_FUTURE);
+ }
+
+ iree_task_executor_t* executor_ = NULL;
+ iree_task_scope_t scope_;
+};
+
+#endif // IREE_TASK_TESTING_TASK_TEST_H_
diff --git a/runtime/src/iree/task/testing/test_util.h b/runtime/src/iree/task/testing/test_util.h
new file mode 100644
index 0000000..047882b
--- /dev/null
+++ b/runtime/src/iree/task/testing/test_util.h
@@ -0,0 +1,77 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// NOTE: the best kind of synchronization is no synchronization; always try to
+// design your algorithm so that you don't need anything from this file :)
+// See https://travisdowns.github.io/blog/2020/07/06/concurrency-costs.html
+
+#ifndef IREE_TASK_TESTING_TEST_UTIL_H_
+#define IREE_TASK_TESTING_TEST_UTIL_H_
+
+#include <memory>
+
+#include "iree/task/list.h"
+#include "iree/task/pool.h"
+#include "iree/task/scope.h"
+#include "iree/testing/status_matchers.h"
+
+using TaskPoolPtr =
+ std::unique_ptr<iree_task_pool_t, void (*)(iree_task_pool_t*)>;
+static inline TaskPoolPtr AllocateNopPool() {
+ iree_task_pool_t* pool = new iree_task_pool_t();
+ IREE_CHECK_OK(iree_task_pool_initialize(iree_allocator_system(),
+ sizeof(iree_task_nop_t), 1024, pool));
+ return {pool, [](iree_task_pool_t* pool) {
+ iree_task_pool_deinitialize(pool);
+ delete pool;
+ }};
+}
+
+using TaskScopePtr =
+ std::unique_ptr<iree_task_scope_t, void (*)(iree_task_scope_t*)>;
+static inline TaskScopePtr AllocateScope(const char* name) {
+ iree_task_scope_t* scope = new iree_task_scope_t();
+ iree_task_scope_initialize(iree_make_cstring_view(name), scope);
+ return {scope, [](iree_task_scope_t* scope) {
+ iree_task_scope_deinitialize(scope);
+ delete scope;
+ }};
+}
+
+static inline iree_task_t* AcquireNopTask(TaskPoolPtr& pool,
+ TaskScopePtr& scope, uint16_t value) {
+ iree_task_t* task = NULL;
+ IREE_CHECK_OK(iree_task_pool_acquire(pool.get(), &task));
+ iree_task_initialize(IREE_TASK_TYPE_NOP, scope.get(), task);
+ task->flags = value;
+ return task;
+}
+
+static inline bool CheckListOrderFIFO(iree_task_list_t* list) {
+ iree_task_t* p = list->head;
+ if (!p) return true;
+ uint16_t value = p->flags;
+ p = p->next_task;
+ while (p) {
+ if (p->flags <= value) return false;
+ p = p->next_task;
+ }
+ return true;
+}
+
+static inline bool CheckListOrderLIFO(iree_task_list_t* list) {
+ iree_task_t* p = list->head;
+ if (!p) return true;
+ uint16_t value = p->flags;
+ p = p->next_task;
+ while (p) {
+ if (p->flags >= value) return false;
+ p = p->next_task;
+ }
+ return true;
+}
+
+#endif // IREE_TASK_TESTING_TEST_UTIL_H_
diff --git a/runtime/src/iree/task/topology.c b/runtime/src/iree/task/topology.c
new file mode 100644
index 0000000..57450b9
--- /dev/null
+++ b/runtime/src/iree/task/topology.c
@@ -0,0 +1,94 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/topology.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+
+void iree_task_topology_group_initialize(
+ uint8_t group_index, iree_task_topology_group_t* out_group) {
+ memset(out_group, 0, sizeof(*out_group));
+ out_group->group_index = group_index;
+ snprintf(out_group->name, IREE_ARRAYSIZE(out_group->name), "iree-worker-%u",
+ group_index);
+ iree_thread_affinity_set_any(&out_group->ideal_thread_affinity);
+ out_group->constructive_sharing_mask = IREE_TASK_TOPOLOGY_GROUP_MASK_ALL;
+}
+
+void iree_task_topology_initialize(iree_task_topology_t* out_topology) {
+ IREE_ASSERT_ARGUMENT(out_topology);
+ memset(out_topology, 0, sizeof(*out_topology));
+}
+
+void iree_task_topology_deinitialize(iree_task_topology_t* topology) {
+ IREE_ASSERT_ARGUMENT(topology);
+}
+
+iree_status_t iree_task_topology_parse(iree_string_view_t value,
+ iree_task_topology_t* out_topology) {
+ // TODO(benvanik): define a format that is generally useful alongside cpuinfo.
+ // Maybe colon-separated group-id values from thread affinities? Like:
+ // 0.0:0.2:0.4:0.8 to indicate cores 0,2,4,8 on group 0
+ // 0.0:0.1:1.0:1.1 to indicate cores 0,1 of both groups 0,1
+ // etc
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED);
+}
+
+bool iree_task_topology_format(const iree_task_topology_t* topology,
+ iree_host_size_t buffer_capacity, char* buffer,
+ iree_host_size_t* out_buffer_length) {
+ // TODO(benvanik): formatting to match parsing.
+ return false;
+}
+
+iree_host_size_t iree_task_topology_group_capacity(
+ const iree_task_topology_t* topology) {
+ return IREE_ARRAYSIZE(topology->groups);
+}
+
+iree_host_size_t iree_task_topology_group_count(
+ const iree_task_topology_t* topology) {
+ return topology->group_count;
+}
+
+const iree_task_topology_group_t* iree_task_topology_get_group(
+ const iree_task_topology_t* topology, iree_host_size_t group_index) {
+ if (group_index >= topology->group_count) return NULL;
+ return &topology->groups[group_index];
+}
+
+iree_status_t iree_task_topology_push_group(
+ iree_task_topology_t* topology, const iree_task_topology_group_t* group) {
+ if (topology->group_count + 1 > IREE_ARRAYSIZE(topology->groups)) {
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "group capacity exceeded");
+ }
+ iree_task_topology_group_t* dst_group =
+ &topology->groups[topology->group_count];
+ memcpy(dst_group, group, sizeof(*group));
+ dst_group->group_index = topology->group_count++;
+ return iree_ok_status();
+}
+
+void iree_task_topology_initialize_from_group_count(
+ iree_host_size_t group_count, iree_task_topology_t* out_topology) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, group_count);
+
+ iree_task_topology_initialize(out_topology);
+ for (iree_host_size_t i = 0; i < group_count; ++i) {
+ iree_task_topology_group_t* group = &out_topology->groups[i];
+ iree_task_topology_group_initialize(i, group);
+ }
+ out_topology->group_count = group_count;
+
+ IREE_TRACE_ZONE_END(z0);
+}
diff --git a/runtime/src/iree/task/topology.h b/runtime/src/iree/task/topology.h
new file mode 100644
index 0000000..ca02352
--- /dev/null
+++ b/runtime/src/iree/task/topology.h
@@ -0,0 +1,133 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_TOPOLOGY_H_
+#define IREE_TASK_TOPOLOGY_H_
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/threading.h"
+#include "iree/task/tuning.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// A bitmask indicating which other groups from 0 to N may constructively share
+// caches. For example, a value of 0b1100 indicates that group 2 and 3 share.
+typedef uint64_t iree_task_topology_group_mask_t;
+
+#define IREE_TASK_TOPOLOGY_GROUP_MASK_ALL UINT64_MAX
+#define IREE_TASK_TOPOLOGY_GROUP_BIT_COUNT \
+ (sizeof(iree_task_topology_group_mask_t) * 8)
+
+// Information about a particular group within the topology.
+// Groups may be of varying levels of granularity even within the same topology
+// based on how the topology is defined.
+typedef struct iree_task_topology_group_t {
+ // Group index within the topology matching a particular bit in
+ // iree_task_topology_group_mask_t.
+ uint8_t group_index;
+
+ // A name assigned to executor workers used for logging/tracing.
+ char name[15];
+
+ // Processor index in the cpuinfo set.
+ uint32_t processor_index;
+
+ // Ideal thread affinity for threads within this group.
+ // All threads within the group share the same affinity and this is what
+ // allows us to model Simultaneous Multi-Threading (SMT) (aka hyperthreading).
+ iree_thread_affinity_t ideal_thread_affinity;
+
+ // A bitmask of other group indices that share some level of the cache
+ // hierarchy. Workers of this group are more likely to constructively share
+ // some cache levels higher up with these other groups. For example, if the
+ // workers in a group all share an L2 cache then the groups indicated here may
+ // all share the same L3 cache.
+ iree_task_topology_group_mask_t constructive_sharing_mask;
+} iree_task_topology_group_t;
+
+// Initializes |out_group| with a |group_index| derived name.
+void iree_task_topology_group_initialize(uint8_t group_index,
+ iree_task_topology_group_t* out_group);
+
+// Task system topology information used to define the workers within an
+// executor.
+//
+// Topologies are used to statically configure task executors by defining the
+// total number of workers in the worker pool and how those workers map to
+// hardware compute resources.
+//
+// Users can allocate topologies, populate them with zero or more groups, and
+// then pass them to the executor to construct the desired configuration. To
+// ease testing and debugging topologies can be formatted as string values and
+// round tripped through flags, though obviously the value of such encodings are
+// machine-dependent.
+//
+// Several helper constructors are available that query the machine topology
+// and attempt to derive some (hopefully) useful task system topology from it.
+// We can add the more common heuristics over time to the core and leave the
+// edge cases for applications to construct.
+typedef struct iree_task_topology_t {
+ iree_host_size_t group_count;
+ iree_task_topology_group_t groups[IREE_TASK_EXECUTOR_MAX_WORKER_COUNT];
+} iree_task_topology_t;
+
+// Initializes an empty task topology.
+void iree_task_topology_initialize(iree_task_topology_t* out_topology);
+
+// Deinitializes a topology structure.
+void iree_task_topology_deinitialize(iree_task_topology_t* topology);
+
+// Parses a serialized topology in string form.
+iree_status_t iree_task_topology_parse(iree_string_view_t value,
+ iree_task_topology_t* out_topology);
+
+// Formats the topology as a string value that can be parsed with
+// iree_task_topology_parse.
+bool iree_task_topology_format(const iree_task_topology_t* topology,
+ iree_host_size_t buffer_capacity, char* buffer,
+ iree_host_size_t* out_buffer_length);
+
+// Returns the group capacity in the topology structure.
+iree_host_size_t iree_task_topology_group_capacity(
+ const iree_task_topology_t* topology);
+
+// Returns the total group count defined by the topology.
+iree_host_size_t iree_task_topology_group_count(
+ const iree_task_topology_t* topology);
+
+// Returns the group information for the given group index.
+const iree_task_topology_group_t* iree_task_topology_get_group(
+ const iree_task_topology_t* topology, iree_host_size_t group_index);
+
+// Pushes a new group onto the topology set.
+// The provided group data will be copied into the topology structure.
+iree_status_t iree_task_topology_push_group(
+ iree_task_topology_t* topology, const iree_task_topology_group_t* group);
+
+// Initializes a topology with the specified number of groups.
+// 0 is a valid value, indicating that only donated threads will be used to
+// perform work. Groups will have no specific affinity and rely on the OS
+// scheduler to ensure they are distributed in a meaningful way; this generally
+// works out as threads created within a process are usually rotated across
+// preferred processors by default.
+void iree_task_topology_initialize_from_group_count(
+ iree_host_size_t group_count, iree_task_topology_t* out_topology);
+
+// Initializes a topology with one group for each physical core in the machine.
+void iree_task_topology_initialize_from_physical_cores(
+ iree_host_size_t max_core_count, iree_task_topology_t* out_topology);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_TOPOLOGY_H_
diff --git a/runtime/src/iree/task/topology_cpuinfo.c b/runtime/src/iree/task/topology_cpuinfo.c
new file mode 100644
index 0000000..ca91263
--- /dev/null
+++ b/runtime/src/iree/task/topology_cpuinfo.c
@@ -0,0 +1,256 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdio.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/math.h"
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+#include "iree/task/topology.h"
+
+// Initializes |out_topology| with a standardized behavior when cpuinfo is not
+// available (unsupported arch, failed to query, etc).
+static void iree_task_topology_initialize_fallback(
+ iree_host_size_t max_group_count, iree_task_topology_t* out_topology) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, max_group_count);
+ // TODO(benvanik): implement our own query... but that seems not so great.
+ // For now we default to a single group: if a user wants more then they can
+ // either get cpuinfo working for their platform or manually construct the
+ // topology themselves.
+ iree_host_size_t group_count = 1;
+ iree_task_topology_initialize_from_group_count(group_count, out_topology);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+#if defined(IREE_TASK_CPUINFO_DISABLED)
+
+void iree_task_topology_initialize_from_physical_cores(
+ iree_host_size_t max_core_count, iree_task_topology_t* out_topology) {
+ iree_task_topology_initialize_fallback(max_core_count, out_topology);
+}
+
+#else
+
+#include <cpuinfo.h>
+
+static bool iree_task_topology_is_cpuinfo_available() {
+ return cpuinfo_initialize() && cpuinfo_get_cores_count() > 0;
+}
+
+// Returns the core of the calling thread or NULL if not supported.
+// We wrap this here because cpuinfo only returns non-NULL on linux.
+static const struct cpuinfo_core* iree_task_topology_get_current_core() {
+ const struct cpuinfo_core* current_core = cpuinfo_get_current_core();
+#if defined(IREE_PLATFORM_WINDOWS)
+ // TODO(benvanik): upstream into cpuinfo.
+ if (current_core == NULL) {
+ PROCESSOR_NUMBER processor_number;
+ GetCurrentProcessorNumberEx(&processor_number);
+ uint32_t processor_id =
+ cpuinfo_get_package(processor_number.Group)->processor_start +
+ processor_number.Number;
+ current_core = cpuinfo_get_processor(processor_id)->core;
+ }
+#endif // IREE_PLATFORM_WINDOWS
+ return current_core;
+}
+
+// Returns |core_id| rotated by the calling base core ID.
+// On many systems the kernel will have already assigned a randomized starting
+// core for thread distribution and we can just reuse that.
+static uint32_t iree_task_topology_rotate_from_base_core(uint32_t core_id) {
+ const struct cpuinfo_core* current_core =
+ iree_task_topology_get_current_core();
+ if (!current_core) {
+ return core_id; // don't modify if we don't know
+ }
+ uint32_t next_core_id =
+ (current_core->core_id + 1) % cpuinfo_get_cores_count();
+ return (next_core_id + core_id) % cpuinfo_get_cores_count();
+}
+
+// Sets a platform-specific iree_thread_affinity_t based on the cpuinfo
+// processor.
+static void iree_task_topology_set_affinity_from_processor(
+ const struct cpuinfo_processor* processor,
+ iree_thread_affinity_t* out_affinity) {
+ memset(out_affinity, 0, sizeof(*out_affinity));
+ out_affinity->specified = 1;
+
+ // Special bit to indicate that (if required) we want the entire core.
+ if (processor->core->processor_count > 1) {
+ out_affinity->smt = 1;
+ }
+
+ // cpuinfo #ifdefs the fields we need to extract the right platform IDs.
+ // We purposefully use the same exact macros they do there so that we don't
+ // have to worry about skew.
+
+#if defined(__MACH__) && defined(__APPLE__)
+ // TODO(benvanik): run on darwin to see how the l2 caches map. We ideally want
+ // a unique affinity ID per L2 cache.
+ // For now, we just use some random pointer bytes. It's just a tag used by
+ // the kernel to distribute the threads so the exact bits don't matter as long
+ // as they are unique per group we want isolated.
+ out_affinity->id = (uint32_t)(uintptr_t)processor;
+#elif defined(__linux__)
+ out_affinity->id = processor->linux_id;
+#elif defined(_WIN32) || defined(__CYGWIN__)
+ out_affinity->group = processor->windows_group_id;
+ out_affinity->id = processor->windows_processor_id;
+#else
+ // WASM? Unusued today.
+ out_affinity->specified = 0;
+#endif // cpuinfo-like platform field
+}
+
+// Returns a bitset with all *processors* that share the same |cache|.
+static uint64_t iree_task_topology_calculate_cache_bits(
+ const struct cpuinfo_cache* cache) {
+ if (!cache) return 0;
+ uint64_t mask = 0;
+ for (uint32_t processor_i = 0; processor_i < cache->processor_count;
+ ++processor_i) {
+ uint32_t i = cache->processor_start + processor_i;
+ if (i < IREE_TASK_TOPOLOGY_GROUP_BIT_COUNT) {
+ mask |= 1ull << i;
+ }
+ }
+ return mask;
+}
+
+// Constructs a constructive sharing mask for all *processors* that share the
+// same cache as the specified |processor|.
+static uint64_t iree_task_topology_calculate_constructive_sharing_mask(
+ const struct cpuinfo_processor* processor) {
+ uint64_t mask = 0;
+ mask |= iree_task_topology_calculate_cache_bits(processor->cache.l1i);
+ mask |= iree_task_topology_calculate_cache_bits(processor->cache.l1d);
+ mask |= iree_task_topology_calculate_cache_bits(processor->cache.l2);
+ // TODO(benvanik): include L3 here too (for systems that have it)? Or use L3
+ // info purely for distribution and focus the group mask on lower-latency
+ // caches?
+ return mask;
+}
+
+// Populates |our_group| with the information from |core|.
+static void iree_task_topology_group_initialize_from_core(
+ uint32_t group_index, const struct cpuinfo_core* core,
+ iree_task_topology_group_t* out_group) {
+ iree_task_topology_group_initialize(group_index, out_group);
+
+ // Guess: always pick the first processor in a core.
+ // When pinning to threads we'll take into account whether the core is SMT
+ // and use all threads anyway so this alignment is just helpful for debugging.
+ uint32_t processor_i = core->processor_start;
+ out_group->processor_index = processor_i;
+
+ const struct cpuinfo_processor* processor =
+ cpuinfo_get_processor(processor_i);
+ iree_task_topology_set_affinity_from_processor(
+ processor, &out_group->ideal_thread_affinity);
+}
+
+// Fixes constructive_sharing_mask values such that they represent other chosen
+// topology groups instead of processor indices. We do this so that code using
+// the topology groups doesn't need to know anything about which physical
+// processor IDs a particular group is mapped to.
+static void iree_task_topology_fixup_constructive_sharing_masks(
+ iree_task_topology_t* topology) {
+ // O(n^2), but n is always <= 64 (and often <= 8).
+ for (iree_host_size_t i = 0; i < topology->group_count; ++i) {
+ iree_task_topology_group_t* group = &topology->groups[i];
+
+ // Compute the processors that we can constructively share with.
+ uint64_t constructive_sharing_mask =
+ iree_task_topology_calculate_constructive_sharing_mask(
+ cpuinfo_get_processor(group->processor_index));
+
+ iree_task_topology_group_mask_t group_mask = 0;
+ for (iree_host_size_t j = 0; j < topology->group_count; ++j) {
+ if (i == j) continue;
+ const iree_task_topology_group_t* other_group = &topology->groups[j];
+ uint64_t group_processor_bits =
+ iree_math_rotl_u64(1ull, other_group->processor_index);
+ if (constructive_sharing_mask & group_processor_bits) {
+ group_mask |= iree_math_rotl_u64(1ull, other_group->group_index);
+ }
+ }
+
+ group->constructive_sharing_mask = group_mask;
+ }
+}
+
+// Matches all cores.
+static bool iree_task_topology_core_filter_all(const struct cpuinfo_core* core,
+ uintptr_t user_data) {
+ return true;
+}
+
+// Returns true if the given |core| passes the filter and should be included.
+// |user_data| is the value passed alongside the filter function.
+typedef bool (*iree_task_topology_core_filter_t)(
+ const struct cpuinfo_core* core, uintptr_t user_data);
+
+// Initializes a topology with one group for each core that matches |filter_fn|.
+//
+// If cpuinfo is not available this falls back to the same behavior as
+// iree_task_topology_initialize_from_physical_cores.
+static void iree_task_topology_initialize_from_physical_cores_with_filter(
+ iree_task_topology_core_filter_t filter_fn, uintptr_t filter_fn_data,
+ iree_host_size_t max_core_count, iree_task_topology_t* out_topology) {
+ max_core_count = iree_min(max_core_count, IREE_TASK_TOPOLOGY_GROUP_BIT_COUNT);
+ if (!iree_task_topology_is_cpuinfo_available()) {
+ iree_task_topology_initialize_fallback(max_core_count, out_topology);
+ return;
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, max_core_count);
+
+ // Count cores that match the filter.
+ iree_host_size_t core_count = 0;
+ for (uint32_t i = 0; i < cpuinfo_get_cores_count(); i++) {
+ const struct cpuinfo_core* core = cpuinfo_get_core(i);
+ if (filter_fn(core, filter_fn_data)) ++core_count;
+ }
+ core_count = iree_min(core_count, max_core_count);
+
+ iree_task_topology_initialize(out_topology);
+
+ // Build each core up to the max allowed.
+ // TODO(benvanik): if our group_count <= core_count/2 then distribute better;
+ // for now we just do a straight-line through (cores 0-N) when instead we may
+ // want to take advantage of L3 cache info (half of groups on one L3 cache,
+ // half of groups on another, etc).
+ out_topology->group_count = core_count;
+ for (uint32_t core_i = 0, group_i = 0; group_i < out_topology->group_count;
+ ++core_i) {
+ // Rotate the core ID so that we avoid setting the affinity to the calling
+ // thread which we assume is something the user has plans for and doesn't
+ // want to have our workers stealing their time.
+ const struct cpuinfo_core* core =
+ cpuinfo_get_core(iree_task_topology_rotate_from_base_core(core_i));
+ if (filter_fn(core, filter_fn_data)) {
+ iree_task_topology_group_initialize_from_core(
+ group_i, core, &out_topology->groups[group_i]);
+ ++group_i;
+ }
+ }
+
+ iree_task_topology_fixup_constructive_sharing_masks(out_topology);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_task_topology_initialize_from_physical_cores(
+ iree_host_size_t max_core_count, iree_task_topology_t* out_topology) {
+ iree_task_topology_initialize_from_physical_cores_with_filter(
+ iree_task_topology_core_filter_all, 0, max_core_count, out_topology);
+}
+
+#endif // IREE_TASK_CPUINFO_DISABLED
diff --git a/runtime/src/iree/task/topology_test.cc b/runtime/src/iree/task/topology_test.cc
new file mode 100644
index 0000000..446e824
--- /dev/null
+++ b/runtime/src/iree/task/topology_test.cc
@@ -0,0 +1,146 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/topology.h"
+
+#include <cstddef>
+
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace {
+
+using namespace iree::testing::status;
+
+TEST(TopologyTest, Lifetime) {
+ iree_task_topology_t topology;
+ iree_task_topology_initialize(&topology);
+ EXPECT_GT(iree_task_topology_group_capacity(&topology), 0);
+ EXPECT_EQ(0, iree_task_topology_group_count(&topology));
+ iree_task_topology_deinitialize(&topology);
+}
+
+TEST(TopologyTest, Empty) {
+ iree_task_topology_t topology;
+ iree_task_topology_initialize(&topology);
+
+ EXPECT_EQ(0, iree_task_topology_group_count(&topology));
+ EXPECT_EQ(NULL, iree_task_topology_get_group(&topology, 0));
+ EXPECT_EQ(NULL, iree_task_topology_get_group(&topology, 100));
+
+ iree_task_topology_deinitialize(&topology);
+}
+
+TEST(TopologyTest, Parsing) {
+ // TODO(benvanik): implement parsing.
+}
+
+TEST(TopologyTest, Formatting) {
+ // TODO(benvanik): implement formatting.
+}
+
+TEST(TopologyTest, Construction) {
+ iree_task_topology_t topology;
+ iree_task_topology_initialize(&topology);
+
+ EXPECT_EQ(0, iree_task_topology_group_count(&topology));
+
+ for (iree_host_size_t i = 0; i < 8; ++i) {
+ iree_task_topology_group_t group;
+ iree_task_topology_group_initialize(i, &group);
+ IREE_EXPECT_OK(iree_task_topology_push_group(&topology, &group));
+ EXPECT_EQ(i + 1, iree_task_topology_group_count(&topology));
+ }
+ EXPECT_EQ(8, iree_task_topology_group_count(&topology));
+
+ for (iree_host_size_t i = 0; i < 8; ++i) {
+ const iree_task_topology_group_t* group =
+ iree_task_topology_get_group(&topology, i);
+ EXPECT_EQ(i, group->group_index);
+ }
+
+ iree_task_topology_deinitialize(&topology);
+}
+
+TEST(TopologyTest, MaxCapacity) {
+ iree_task_topology_t topology;
+ iree_task_topology_initialize(&topology);
+
+ EXPECT_EQ(0, iree_task_topology_group_count(&topology));
+
+ // Fill up to capacity.
+ for (iree_host_size_t i = 0; i < iree_task_topology_group_capacity(&topology);
+ ++i) {
+ iree_task_topology_group_t group;
+ iree_task_topology_group_initialize(i, &group);
+ IREE_EXPECT_OK(iree_task_topology_push_group(&topology, &group));
+ EXPECT_EQ(i + 1, iree_task_topology_group_count(&topology));
+ }
+ EXPECT_EQ(iree_task_topology_group_capacity(&topology),
+ iree_task_topology_group_count(&topology));
+
+ // Try adding one more - it should it fail because we are at capacity.
+ iree_task_topology_group_t extra_group;
+ iree_task_topology_group_initialize(UINT8_MAX, &extra_group);
+ iree_status_t status = iree_task_topology_push_group(&topology, &extra_group);
+ EXPECT_TRUE(iree_status_is_resource_exhausted(status));
+ iree_status_ignore(status);
+
+ // Confirm that the only groups we have are the valid ones we added above.
+ for (iree_host_size_t i = 0; i < 8; ++i) {
+ const iree_task_topology_group_t* group =
+ iree_task_topology_get_group(&topology, i);
+ EXPECT_EQ(i, group->group_index);
+ }
+
+ iree_task_topology_deinitialize(&topology);
+}
+
+TEST(TopologyTest, FromGroupCount) {
+ static constexpr iree_host_size_t kGroupCount = 4;
+ iree_task_topology_t topology;
+ iree_task_topology_initialize(&topology);
+
+ iree_task_topology_initialize_from_group_count(kGroupCount, &topology);
+ EXPECT_LE(iree_task_topology_group_count(&topology),
+ iree_task_topology_group_capacity(&topology));
+ EXPECT_EQ(iree_task_topology_group_count(&topology), kGroupCount);
+ for (iree_host_size_t i = 0; i < kGroupCount; ++i) {
+ const iree_task_topology_group_t* group =
+ iree_task_topology_get_group(&topology, i);
+ EXPECT_EQ(i, group->group_index);
+ }
+
+ iree_task_topology_deinitialize(&topology);
+}
+
+// Verifies only that the |topology| is usable.
+// If we actually checked the contents here then we'd just be validating that
+// cpuinfo was working and the tests would become machine-dependent.
+static void EnsureTopologyValid(iree_host_size_t max_group_count,
+ iree_task_topology_t* topology) {
+ EXPECT_LE(iree_task_topology_group_count(topology),
+ iree_task_topology_group_capacity(topology));
+ EXPECT_LE(iree_task_topology_group_count(topology), max_group_count);
+ EXPECT_GE(iree_task_topology_group_count(topology), 1);
+ for (iree_host_size_t i = 0; i < iree_task_topology_group_count(topology);
+ ++i) {
+ const iree_task_topology_group_t* group =
+ iree_task_topology_get_group(topology, i);
+ EXPECT_EQ(i, group->group_index);
+ }
+}
+
+TEST(TopologyTest, FromPhysicalCores) {
+ static constexpr iree_host_size_t kMaxGroupCount = 4;
+ iree_task_topology_t topology;
+ iree_task_topology_initialize(&topology);
+ iree_task_topology_initialize_from_physical_cores(kMaxGroupCount, &topology);
+ EnsureTopologyValid(kMaxGroupCount, &topology);
+ iree_task_topology_deinitialize(&topology);
+}
+
+} // namespace
diff --git a/runtime/src/iree/task/tuning.h b/runtime/src/iree/task/tuning.h
new file mode 100644
index 0000000..dbe4bbf
--- /dev/null
+++ b/runtime/src/iree/task/tuning.h
@@ -0,0 +1,105 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_TUNING_H_
+#define IREE_TASK_TUNING_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Maximum number of workers that an executor can manage.
+// A 64 worker hard limit is based on us using uint64_t as a bitmask to select
+// workers. It's easy to go smaller (just use fewer bits) if it's known that
+// only <64 will ever be used (such as for devices with 2 cores).
+#define IREE_TASK_EXECUTOR_MAX_WORKER_COUNT (64)
+
+// Initial number of shard tasks that are allocated in the executor pool.
+// Increasing this number will decrease initial allocation storms in cases of
+// extremely wide concurrency regions (many dispatches running at the same time)
+// at the cost of a higher minimum memory consumption.
+#define IREE_TASK_EXECUTOR_INITIAL_SHARD_RESERVATION_PER_WORKER (4)
+
+// Maximum number of events retained by the executor event pool.
+#define IREE_TASK_EXECUTOR_EVENT_POOL_CAPACITY 64
+
+// Maximum number of simultaneous waits an executor may perform as part of a
+// wait-any operation. A larger value may enable better wake coalescing by the
+// kernel. This is only a count limiting wait tasks that have been scheduled and
+// been promoted to the root executor waiting list. There may be any number of
+// waits deeper in the pipeline so long as they don't all become ready
+// simultaneously.
+//
+// Realistically, though, if we have more than 64 outstanding **root** waits
+// it's hard to reason about if/when the executor queue could make forward
+// progress and indicates a possible error in task assignment.
+//
+// Also, the underlying iree_wait_set_t may not support more than 64 handles on
+// certain platforms without emulation. Trying to keep us on the fast-path
+// with a reasonable number seems fine for now until we have a need for more.
+//
+// NOTE: we reserve 1 wait handle for our own internal use. This allows us to
+// wake the coordination worker when new work is submitted from external
+// sources.
+#define IREE_TASK_EXECUTOR_MAX_OUTSTANDING_WAITS (64 - 1)
+
+// Amount of time that can remain in a delay task while still retiring.
+// This prevents additional system sleeps when the remaining time before the
+// deadline is less than the granularity the system is likely able to sleep for.
+// Some platforms may have as much as 10-15ms of potential slop and sleeping for
+// 1ms may result in 10-15ms.
+#define IREE_TASK_EXECUTOR_DELAY_SLOP_NS (1 /*ms*/ * 1000000)
+
+// Allows for dividing the total number of attempts that a worker will make to
+// steal tasks from other workers. By default all other workers will be
+// attempted while setting this to 2, for example, will try for only half of
+// the available workers.
+#define IREE_TASK_EXECUTOR_MAX_THEFT_ATTEMPTS_DIVISOR (1)
+
+// Maximum number of tasks that will be stolen in one go from another worker.
+//
+// Too few tasks will cause additional overhead as the worker repeatedly sips
+// away tasks and when it does get tasks it may suffer spatial locality cache
+// issues as it is effectively walking backwards in memory to both touch the
+// tasks and - a much larger impact - running tasks that themselves are walking
+// orders of magnitude more memory backwards.
+//
+// Too many tasks will cause additional latency on workers that may interfere
+// with higher level scheduling; for example, if a worker runs out of tasks and
+// immediately steals 8000 of them from another worker it's going to take until
+// those 8000 complete before any work that arrives specifically for the worker
+// is able to start processing.
+//
+// In real-time systems too few tasks is better (slightly more work for much
+// lower variance in execution) while in batch mode systems too many tasks is
+// better (as latencies don't matter so long as throughput is maximized).
+#define IREE_TASK_EXECUTOR_MAX_THEFT_TASK_COUNT \
+ IREE_TASK_EXECUTOR_MAX_WORKER_COUNT
+
+// Number of tiles that will be batched into a single reservation from the grid.
+// This is a maximum; if there are fewer tiles that would otherwise allow for
+// maximum parallelism then this may be ignored.
+//
+// The more tiles reserved at a time the higher the chance for latency to
+// increase as many reserved tiles are held up on one worker while another may
+// have otherwise been able to steal them and help finish them sooner.
+//
+// The fewer tiles reserved at a time the higher the chance for cache-locality
+// destroying behavior where multiple workers all stomp on the same cache lines
+// (as say worker 0 and worker 1 both fight over sequential tiles adjacent in
+// memory).
+#define IREE_TASK_DISPATCH_MAX_TILES_PER_SHARD_RESERVATION (8)
+
+// Whether to enable per-tile colors for each tile tracing zone based on the
+// tile grid xyz. Not cheap and can be disabled to reduce tracing overhead.
+// TODO(#4017): make per-tile color tracing fast enough to always have on.
+#define IREE_TASK_TRACING_PER_TILE_COLORS 1
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_TUNING_H_
diff --git a/runtime/src/iree/task/worker.c b/runtime/src/iree/task/worker.c
new file mode 100644
index 0000000..d12e9a2
--- /dev/null
+++ b/runtime/src/iree/task/worker.c
@@ -0,0 +1,386 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/task/worker.h"
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "iree/base/internal/fpu_state.h"
+#include "iree/base/internal/math.h"
+#include "iree/base/tracing.h"
+#include "iree/task/executor_impl.h"
+#include "iree/task/post_batch.h"
+#include "iree/task/submission.h"
+#include "iree/task/task_impl.h"
+#include "iree/task/tuning.h"
+
+static int iree_task_worker_main(iree_task_worker_t* worker);
+
+iree_status_t iree_task_worker_initialize(
+ iree_task_executor_t* executor, iree_host_size_t worker_index,
+ const iree_task_topology_group_t* topology_group,
+ iree_byte_span_t local_memory, iree_prng_splitmix64_state_t* seed_prng,
+ iree_task_worker_t* out_worker) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ out_worker->executor = executor;
+ out_worker->worker_bit = iree_task_affinity_for_worker(worker_index);
+ out_worker->ideal_thread_affinity = topology_group->ideal_thread_affinity;
+ out_worker->constructive_sharing_mask =
+ topology_group->constructive_sharing_mask;
+ out_worker->max_theft_attempts =
+ executor->worker_count / IREE_TASK_EXECUTOR_MAX_THEFT_ATTEMPTS_DIVISOR;
+ iree_prng_minilcg128_initialize(iree_prng_splitmix64_next(seed_prng),
+ &out_worker->theft_prng);
+ out_worker->local_memory = local_memory;
+ out_worker->processor_id = 0;
+ out_worker->processor_tag = 0;
+
+ iree_task_worker_state_t initial_state = IREE_TASK_WORKER_STATE_RUNNING;
+ if (executor->scheduling_mode &
+ IREE_TASK_SCHEDULING_MODE_DEFER_WORKER_STARTUP) {
+ // User is favoring startup latency vs. initial scheduling latency. Our
+ // thread will be created suspended and not first scheduled until work
+ // arrives for it, (almost) ensuring no context switches and 10x+ lower
+ // blocking startup time.
+ initial_state = IREE_TASK_WORKER_STATE_SUSPENDED;
+ }
+ iree_atomic_store_int32(&out_worker->state, initial_state,
+ iree_memory_order_seq_cst);
+
+ iree_notification_initialize(&out_worker->wake_notification);
+ iree_notification_initialize(&out_worker->state_notification);
+ iree_atomic_task_slist_initialize(&out_worker->mailbox_slist);
+ iree_task_queue_initialize(&out_worker->local_task_queue);
+
+ iree_thread_create_params_t thread_params;
+ memset(&thread_params, 0, sizeof(thread_params));
+ thread_params.name = iree_make_cstring_view(topology_group->name);
+ thread_params.create_suspended =
+ initial_state == IREE_TASK_WORKER_STATE_SUSPENDED;
+ thread_params.priority_class = IREE_THREAD_PRIORITY_CLASS_NORMAL;
+ thread_params.initial_affinity = out_worker->ideal_thread_affinity;
+
+ // NOTE: if the thread creation fails we'll bail here and let the caller
+ // cleanup by calling deinitialize (which is safe because we zero init
+ // everything).
+ iree_status_t status = iree_thread_create(
+ (iree_thread_entry_t)iree_task_worker_main, out_worker, thread_params,
+ executor->allocator, &out_worker->thread);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+void iree_task_worker_request_exit(iree_task_worker_t* worker) {
+ if (!worker->thread) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // If the thread is already in the exiting/zombie state we don't need to do
+ // anything.
+ iree_task_worker_state_t prev_state =
+ (iree_task_worker_state_t)iree_atomic_exchange_int32(
+ &worker->state, IREE_TASK_WORKER_STATE_EXITING,
+ iree_memory_order_acq_rel);
+ switch (prev_state) {
+ case IREE_TASK_WORKER_STATE_SUSPENDED:
+ // Worker was suspended; resume it so that it can exit itself.
+ iree_thread_resume(worker->thread);
+ break;
+ case IREE_TASK_WORKER_STATE_ZOMBIE:
+ // Worker already exited; reset state to ZOMBIE.
+ iree_atomic_store_int32(&worker->state, IREE_TASK_WORKER_STATE_ZOMBIE,
+ iree_memory_order_seq_cst);
+ break;
+ default:
+ // Worker now set to EXITING and should exit soon.
+ break;
+ }
+
+ // Kick the worker in case it is waiting for work.
+ iree_notification_post(&worker->wake_notification, 1);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+// Returns true if the worker is in the zombie state (exited and awaiting
+// teardown).
+static bool iree_task_worker_is_zombie(iree_task_worker_t* worker) {
+ return iree_atomic_load_int32(&worker->state, iree_memory_order_seq_cst) ==
+ IREE_TASK_WORKER_STATE_ZOMBIE;
+}
+
+void iree_task_worker_await_exit(iree_task_worker_t* worker) {
+ if (!worker->thread) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_task_worker_request_exit(worker);
+ iree_notification_await(&worker->state_notification,
+ (iree_condition_fn_t)iree_task_worker_is_zombie,
+ worker, iree_infinite_timeout());
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_task_worker_deinitialize(iree_task_worker_t* worker) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Must have called request_exit/await_exit.
+ IREE_ASSERT_TRUE(iree_task_worker_is_zombie(worker));
+
+ iree_thread_release(worker->thread);
+ worker->thread = NULL;
+
+ // Release unfinished tasks by flushing the mailbox (which if we're here can't
+ // get anything more posted to it) and then discarding everything we still
+ // have a reference to.
+ iree_atomic_task_slist_discard(&worker->mailbox_slist);
+ iree_task_list_discard(&worker->local_task_queue.list);
+
+ iree_notification_deinitialize(&worker->wake_notification);
+ iree_notification_deinitialize(&worker->state_notification);
+ iree_atomic_task_slist_deinitialize(&worker->mailbox_slist);
+ iree_task_queue_deinitialize(&worker->local_task_queue);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+void iree_task_worker_post_tasks(iree_task_worker_t* worker,
+ iree_task_list_t* list) {
+ // Move the list into the mailbox. Note that the mailbox is LIFO and this list
+ // is concatenated with its current order preserved (which should be LIFO).
+ iree_atomic_task_slist_concat(&worker->mailbox_slist, list->head, list->tail);
+ memset(list, 0, sizeof(*list));
+}
+
+iree_task_t* iree_task_worker_try_steal_task(iree_task_worker_t* worker,
+ iree_task_queue_t* target_queue,
+ iree_host_size_t max_tasks) {
+ // Try to grab tasks from the worker; if more than one task is stolen then the
+ // first will be returned and the remaining will be added to the target queue.
+ iree_task_t* task = iree_task_queue_try_steal(
+ &worker->local_task_queue, target_queue,
+ /*max_tasks=*/IREE_TASK_EXECUTOR_MAX_THEFT_TASK_COUNT);
+ if (task) return task;
+
+ // If we still didn't steal any tasks then let's try the slist instead.
+ task = iree_atomic_task_slist_pop(&worker->mailbox_slist);
+ if (task) return task;
+
+ return NULL;
+}
+
+// Executes a task on a worker.
+// Only task types that are scheduled to workers are handled; all others must be
+// handled by the coordinator during scheduling.
+static void iree_task_worker_execute(
+ iree_task_worker_t* worker, iree_task_t* task,
+ iree_task_submission_t* pending_submission) {
+ // Execute the task and resolve the task and gather any tasks that are now
+ // ready for submission to the executor. They'll be scheduled the next time
+ // the coordinator runs.
+ //
+ // TODO(benvanik): think a bit more about this timing; this ensures we have
+ // BFS behavior at the cost of the additional merge overhead - it's probably
+ // worth it?
+ // TODO(benvanik): handle partial tasks and re-queuing.
+ switch (task->type) {
+ case IREE_TASK_TYPE_CALL: {
+ iree_task_call_execute((iree_task_call_t*)task, pending_submission);
+ break;
+ }
+ case IREE_TASK_TYPE_DISPATCH_SHARD: {
+ iree_task_dispatch_shard_execute(
+ (iree_task_dispatch_shard_t*)task, worker->processor_id,
+ worker->local_memory, pending_submission);
+ break;
+ }
+ default:
+ IREE_ASSERT_UNREACHABLE("incorrect task type for worker execution");
+ break;
+ }
+
+ // NOTE: task is invalidated above and must not be used!
+ task = NULL;
+}
+
+// Pumps the worker thread once, processing a single task.
+// Returns true if pumping should continue as there are more tasks remaining or
+// false if the caller should wait for more tasks to be posted.
+static bool iree_task_worker_pump_once(
+ iree_task_worker_t* worker, iree_task_submission_t* pending_submission) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Check the local work queue for any work we know we should start
+ // processing immediately. Other workers may try to steal some of this work
+ // if we take too long.
+ iree_task_t* task = iree_task_queue_pop_front(&worker->local_task_queue);
+
+ // Check the mailbox to see if we have incoming work that has been posted.
+ // We try to greedily move it to our local work list so that we can work
+ // with the full thread-local pending task list.
+ if (!task) {
+ // NOTE: there's a potential for theft pessimization if the queue runs too
+ // low and there's nothing there when a thief goes to grab some tasks. A
+ // standout there would indicate that we weren't scheduling very well in the
+ // first place (large uneven workloads for various workers, bad distribution
+ // in the face of heterogenous multi-core architectures where some workers
+ // complete tasks faster than others, etc).
+ task = iree_task_queue_flush_from_lifo_slist(&worker->local_task_queue,
+ &worker->mailbox_slist);
+ }
+
+ // If we ran out of work assigned to this specific worker try to steal some
+ // from other workers that we hopefully share some of the cache hierarchy
+ // with. Their tasks will be moved from their local queue into ours and the
+ // the first task in the queue is popped off and returned.
+ if (!task) {
+ task = iree_task_executor_try_steal_task(
+ worker->executor, worker->constructive_sharing_mask,
+ worker->max_theft_attempts, &worker->theft_prng,
+ &worker->local_task_queue);
+ }
+
+ // No tasks to run; let the caller know we want to wait for more.
+ if (!task) {
+ IREE_TRACE_ZONE_END(z0);
+ return false;
+ }
+
+ // Execute the task (may call out to arbitrary user code and may submit more
+ // tasks for execution).
+ iree_task_worker_execute(worker, task, pending_submission);
+
+ IREE_TRACE_ZONE_END(z0);
+ return true; // try again
+}
+
+// Updates the cached processor ID field in the worker.
+static void iree_task_worker_update_processor_id(iree_task_worker_t* worker) {
+ iree_cpu_requery_processor_id(&worker->processor_tag, &worker->processor_id);
+}
+
+// Alternates between pumping ready tasks in the worker queue and waiting
+// for more tasks to arrive. Only returns when the worker has been asked by
+// the executor to exit.
+static void iree_task_worker_pump_until_exit(iree_task_worker_t* worker) {
+ // Initial processor ID assignment. We normally refresh this upon waking from
+ // a wait but it's possible that there's already work pending and we want to
+ // be able to process it with the proper processor ID immediately.
+ iree_task_worker_update_processor_id(worker);
+
+ // Pump the thread loop to process more tasks.
+ while (true) {
+ // If we fail to find any work to do we'll wait at the end of this loop.
+ // In order not to not miss any work that is enqueued after we've already
+ // checked a particular source we use an interruptable wait token that
+ // will prevent the wait from happening if anyone touches the data
+ // structures we use.
+ iree_wait_token_t wait_token =
+ iree_notification_prepare_wait(&worker->wake_notification);
+ iree_atomic_task_affinity_set_fetch_and(&worker->executor->worker_idle_mask,
+ ~worker->worker_bit,
+ iree_memory_order_seq_cst);
+
+ // Check state to see if we've been asked to exit.
+ if (iree_atomic_load_int32(&worker->state, iree_memory_order_seq_cst) ==
+ IREE_TASK_WORKER_STATE_EXITING) {
+ // Thread exit requested - cancel pumping.
+ iree_notification_cancel_wait(&worker->wake_notification);
+ // TODO(benvanik): complete tasks before exiting?
+ break;
+ }
+
+ // TODO(benvanik): we could try to update the processor ID here before we
+ // begin a new batch of work - assuming it's not too expensive.
+
+ iree_task_submission_t pending_submission;
+ iree_task_submission_initialize(&pending_submission);
+
+ while (iree_task_worker_pump_once(worker, &pending_submission)) {
+ // All work done ^, which will return false when the worker should wait.
+ }
+
+ bool schedule_dirty = false;
+ if (!iree_task_submission_is_empty(&pending_submission)) {
+ iree_task_executor_merge_submission(worker->executor,
+ &pending_submission);
+ schedule_dirty = true;
+ }
+
+ // We've finished all the work we have scheduled so set our idle flag.
+ // This ensures that if any other thread comes in and wants to give us
+ // work we will properly coordinate/wake below.
+ iree_atomic_task_affinity_set_fetch_or(&worker->executor->worker_idle_mask,
+ worker->worker_bit,
+ iree_memory_order_seq_cst);
+
+ // When we encounter a complete lack of work we can self-nominate to check
+ // the global work queue and distribute work to other threads. Only one
+ // coordinator can be running at a time so we also ensure that if another
+ // is doing its work we gracefully wait for it. It's fine to block in here
+ // as the next thing we'd have done is go idle anyway.
+
+ // First self-nominate; this *may* do something or just be ignored (if
+ // another worker is already coordinating).
+ iree_task_executor_coordinate(worker->executor, worker);
+
+ // If nothing has been enqueued since we started this loop (so even
+ // coordination didn't find anything) we go idle. Otherwise we fall
+ // through and try the loop again.
+ if (schedule_dirty ||
+ !iree_task_queue_is_empty(&worker->local_task_queue)) {
+ // Have more work to do; loop around to try another pump.
+ iree_notification_cancel_wait(&worker->wake_notification);
+ } else {
+ IREE_TRACE_ZONE_BEGIN_NAMED(z_wait,
+ "iree_task_worker_main_pump_wake_wait");
+ iree_notification_commit_wait(&worker->wake_notification, wait_token,
+ IREE_TIME_INFINITE_FUTURE);
+ IREE_TRACE_ZONE_END(z_wait);
+
+ // Woke from a wait - query the processor ID in case we migrated during
+ // the sleep.
+ iree_task_worker_update_processor_id(worker);
+ }
+
+ // Wait completed.
+ // Jump back up and try pumping any tasks that arrived.
+ continue;
+ }
+}
+
+// Thread entry point for each worker.
+static int iree_task_worker_main(iree_task_worker_t* worker) {
+ IREE_TRACE_ZONE_BEGIN(thread_zone);
+
+ // We cannot rely on the global process settings for FPU state.
+ // Be explicit here on what we need.
+ iree_fpu_state_push(IREE_FPU_STATE_FLAG_FLUSH_DENORMALS_TO_ZERO);
+
+ // Reset affinity (as it can change over time).
+ // TODO(benvanik): call this after waking in case CPU hotplugging happens.
+ iree_thread_request_affinity(worker->thread, worker->ideal_thread_affinity);
+
+ // Enter the running state immediately. Note that we could have been requested
+ // to exit while suspended/still starting up, so check that here before we
+ // mess with any data structures.
+ const bool should_run =
+ iree_atomic_exchange_int32(&worker->state, IREE_TASK_WORKER_STATE_RUNNING,
+ iree_memory_order_seq_cst) !=
+ IREE_TASK_WORKER_STATE_EXITING;
+ if (IREE_LIKELY(should_run)) {
+ // << work happens here >>
+ iree_task_worker_pump_until_exit(worker);
+ }
+
+ IREE_TRACE_ZONE_END(thread_zone);
+ iree_atomic_store_int32(&worker->state, IREE_TASK_WORKER_STATE_ZOMBIE,
+ iree_memory_order_seq_cst);
+ iree_notification_post(&worker->state_notification, IREE_ALL_WAITERS);
+ return 0;
+}
diff --git a/runtime/src/iree/task/worker.h b/runtime/src/iree/task/worker.h
new file mode 100644
index 0000000..6a7fc31
--- /dev/null
+++ b/runtime/src/iree/task/worker.h
@@ -0,0 +1,205 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TASK_WORKER_H_
+#define IREE_TASK_WORKER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/prng.h"
+#include "iree/base/internal/synchronization.h"
+#include "iree/base/internal/threading.h"
+#include "iree/base/tracing.h"
+#include "iree/task/affinity_set.h"
+#include "iree/task/executor.h"
+#include "iree/task/list.h"
+#include "iree/task/queue.h"
+#include "iree/task/task.h"
+#include "iree/task/topology.h"
+#include "iree/task/tuning.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Indicates the current state of a worker or, in the case of EXITING, the state
+// the worker should transition to.
+//
+// Transition graph:
+// SUSPENDED -> RUNNING (IDLE<->PROCESSING) -> EXITING -> ZOMBIE
+//
+// NOTE: state values are ordered such that </> comparisons can be used; ensure
+// that for example all states after resuming are > SUSPENDED and all states
+// before exiting are < EXITING.
+typedef enum iree_task_worker_state_e {
+ // Worker has been created in a suspended state and must be resumed to wake.
+ IREE_TASK_WORKER_STATE_SUSPENDED = 0,
+ // Worker is idle or actively processing tasks (either its own or others).
+ IREE_TASK_WORKER_STATE_RUNNING = 1,
+ // Worker should exit (or is exiting) and will soon enter the zombie state.
+ // Coordinators can request workers to exit by setting their state to this and
+ // then waking.
+ IREE_TASK_WORKER_STATE_EXITING = 2,
+ // Worker has exited and entered a 🧟 state (waiting for join).
+ // The thread handle is still valid and must be destroyed.
+ IREE_TASK_WORKER_STATE_ZOMBIE = 3,
+} iree_task_worker_state_t;
+
+// A worker within the executor pool.
+//
+// NOTE: fields in here are touched from multiple threads with lock-free
+// techniques. The alignment of the entire iree_task_worker_t as well as the
+// alignment and padding between particular fields is carefully (though perhaps
+// not yet correctly) selected; see the 'LAYOUT' comments below.
+typedef struct iree_task_worker_t {
+ // A LIFO mailbox used by coordinators to post tasks to this worker.
+ // As workers self-nominate to be coordinators and fan out dispatch shards
+ // they can directly emplace those shards into the workers that should execute
+ // them based on the work distribution policy. When workers go to look for
+ // more work after their local queue empties they will flush this list and
+ // move all of the tasks into their local queue and restart processing.
+ // LAYOUT: must be 64b away from local_task_queue.
+ iree_atomic_task_slist_t mailbox_slist;
+
+ // Current state of the worker (iree_task_worker_state_t).
+ // LAYOUT: frequent access; next to wake_notification as they are always
+ // accessed together.
+ iree_atomic_int32_t state;
+
+ // Notification signaled when the worker should wake (if it is idle).
+ // LAYOUT: next to state for similar access patterns; when posting other
+ // threads will touch mailbox_slist and then send a wake
+ // notification.
+ iree_notification_t wake_notification;
+
+ // Notification signaled when the worker changes any state.
+ iree_notification_t state_notification;
+
+ // Parent executor that can be used to access the global work queue or task
+ // pool. Executors always outlive the workers they own.
+ iree_task_executor_t* executor;
+
+ // Bit the worker represents in the various worker bitsets.
+ iree_task_affinity_set_t worker_bit;
+
+ // Ideal thread affinity for the worker thread.
+ iree_thread_affinity_t ideal_thread_affinity;
+
+ // A bitmask of other group indices that share some level of the cache
+ // hierarchy. Workers of this group are more likely to constructively share
+ // some cache levels higher up with these other groups. For example, if the
+ // workers in a group all share an L2 cache then the groups indicated here may
+ // all share the same L3 cache.
+ iree_task_affinity_set_t constructive_sharing_mask;
+
+ // Maximum number of attempts to make when trying to steal tasks from other
+ // workers. This could be 64 (try stealing from all workers) or just a handful
+ // (try stealing from these 3 other cores that share your L3 cache).
+ uint32_t max_theft_attempts;
+
+ // Rotation counter for work stealing (ensures we don't favor one victim).
+ // Only ever touched by the worker thread as it steals work.
+ iree_prng_minilcg128_state_t theft_prng;
+
+ // Thread handle of the worker. If the thread has exited the handle will
+ // remain valid so that the executor can query its state.
+ iree_thread_t* thread;
+
+ // Guess at the current processor ID.
+ // This is updated infrequently as it can be semi-expensive to determine
+ // (on some platforms at least 1 syscall involved). We always update it upon
+ // waking as idle waits are the most likely place the worker will be migrated
+ // across processors.
+ iree_cpu_processor_id_t processor_id;
+ // An opaque tag used to reduce the cost of processor ID queries.
+ iree_cpu_processor_tag_t processor_tag;
+
+ // Destructive interference padding between the mailbox and local task queue
+ // to ensure that the worker - who is pounding on local_task_queue - doesn't
+ // contend with submissions or coordinators dropping new tasks in the mailbox.
+ //
+ // Today we don't need this, however on 32-bit systems or if we adjust the
+ // size of iree_task_affinity_t/iree_task_affinity_set_t/etc we may need to
+ // add it back.
+ //
+ // NOTE: due to the layout requirements of this structure (to avoid cache
+ // interference) this is the only place padding should be added.
+ // uint8_t _padding[8];
+
+ // Pointer to local memory available for use exclusively by the worker.
+ // The base address should be aligned to avoid false sharing with other
+ // workers.
+ iree_byte_span_t local_memory;
+
+ // Worker-local FIFO queue containing the tasks that will be processed by the
+ // worker. This queue supports work-stealing by other workers if they run out
+ // of work of their own.
+ // LAYOUT: must be 64b away from mailbox_slist.
+ iree_task_queue_t local_task_queue;
+} iree_task_worker_t;
+static_assert(offsetof(iree_task_worker_t, mailbox_slist) +
+ sizeof(iree_atomic_task_slist_t) <
+ iree_hardware_constructive_interference_size,
+ "mailbox_slist must be in the first cache line");
+static_assert(offsetof(iree_task_worker_t, local_task_queue) >=
+ iree_hardware_constructive_interference_size,
+ "local_task_queue must be separated from mailbox_slist by "
+ "at least a cache line");
+
+// Initializes a worker by creating its thread and configuring it for receiving
+// tasks. Where supported the worker will be created in a suspended state so
+// that we aren't creating a thundering herd on startup:
+// https://en.wikipedia.org/wiki/Thundering_herd_problem
+iree_status_t iree_task_worker_initialize(
+ iree_task_executor_t* executor, iree_host_size_t worker_index,
+ const iree_task_topology_group_t* topology_group,
+ iree_byte_span_t local_memory, iree_prng_splitmix64_state_t* seed_prng,
+ iree_task_worker_t* out_worker);
+
+// Requests that the worker begin exiting (if it hasn't already).
+// If the worker is actively processing tasks it will wait until it has
+// completed all it can and is about to go idle prior to exiting.
+//
+// May be called from any thread (including the worker thread).
+void iree_task_worker_request_exit(iree_task_worker_t* worker);
+
+// Blocks the caller until |worker| has exited.
+//
+// May be called from any thread.
+void iree_task_worker_await_exit(iree_task_worker_t* worker);
+
+// Deinitializes a worker that has successfully exited.
+// The worker must be in the IREE_TASK_WORKER_STATE_ZOMBIE state.
+//
+// Expected shutdown sequence:
+// - request_exit on all workers
+// - await_exit on all workers
+// - deinitialize all workers
+void iree_task_worker_deinitialize(iree_task_worker_t* worker);
+
+// Posts a FIFO list of tasks to the worker mailbox. The target worker takes
+// ownership of the tasks and will be woken if it is currently idle.
+//
+// May be called from any thread (including the worker thread).
+void iree_task_worker_post_tasks(iree_task_worker_t* worker,
+ iree_task_list_t* list);
+
+// Tries to steal up to |max_tasks| from the back of the queue.
+// Returns NULL if no tasks are available and otherwise up to |max_tasks| tasks
+// that were at the tail of the worker FIFO will be moved to the |target_queue|
+// and the first of the stolen tasks is returned. While tasks from the FIFO
+// are preferred this may also steal tasks from the mailbox.
+iree_task_t* iree_task_worker_try_steal_task(iree_task_worker_t* worker,
+ iree_task_queue_t* target_queue,
+ iree_host_size_t max_tasks);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TASK_WORKER_H_
diff --git a/runtime/src/iree/testing/BUILD b/runtime/src/iree/testing/BUILD
new file mode 100644
index 0000000..31be851
--- /dev/null
+++ b/runtime/src/iree/testing/BUILD
@@ -0,0 +1,65 @@
+# Copyright 2019 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Testing utilities for IREE.
+
+load("//iree:build_defs.oss.bzl", "iree_runtime_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_runtime_cc_library(
+ name = "benchmark",
+ srcs = [
+ "benchmark_full.cc",
+ ],
+ hdrs = [
+ "benchmark.h",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:tracing",
+ "@com_google_benchmark//:benchmark",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "benchmark_main",
+ testonly = True,
+ srcs = ["benchmark_main.c"],
+ deps = [
+ ":benchmark",
+ "//runtime/src/iree/base/internal:flags",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "gtest",
+ testonly = True,
+ hdrs = [
+ "gtest.h",
+ "status_matchers.h",
+ ],
+ deps = [
+ "//runtime/src/iree/base:cc",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "gtest_main",
+ testonly = True,
+ srcs = ["gtest_main.cc"],
+ tags = ["keep_dep"],
+ deps = [
+ ":gtest",
+ "//runtime/src/iree/base/internal:flags",
+ "@com_google_googletest//:gtest",
+ ],
+)
diff --git a/runtime/src/iree/testing/CMakeLists.txt b/runtime/src/iree/testing/CMakeLists.txt
new file mode 100644
index 0000000..be9935c
--- /dev/null
+++ b/runtime/src/iree/testing/CMakeLists.txt
@@ -0,0 +1,80 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Doesn't use bazel_to_cmake because of special logic for the benchmark library.
+
+iree_add_all_subdirs()
+
+if(${IREE_ENABLE_THREADING})
+ iree_cc_library(
+ NAME
+ benchmark
+ HDRS
+ "benchmark.h"
+ SRCS
+ "benchmark_full.cc"
+ DEPS
+ benchmark
+ iree::base
+ iree::base::tracing
+ PUBLIC
+ )
+else()
+ iree_cc_library(
+ NAME
+ benchmark
+ HDRS
+ "benchmark.h"
+ SRCS
+ "benchmark_nop.c"
+ DEPS
+ iree::base
+ iree::base::tracing
+ PUBLIC
+ )
+endif()
+
+iree_cc_library(
+ NAME
+ benchmark_main
+ SRCS
+ "benchmark_main.c"
+ DEPS
+ ::benchmark
+ iree::base::internal::flags
+ TESTONLY
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ gtest
+ HDRS
+ "gtest.h"
+ "status_matchers.h"
+ DEPS
+ gmock
+ gtest
+ iree::base::cc
+ TESTONLY
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ gtest_main
+ SRCS
+ "gtest_main.cc"
+ DEPS
+ ::gtest
+ gmock
+ gtest
+ iree::base::internal::flags
+ TESTONLY
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/testing/benchmark.h b/runtime/src/iree/testing/benchmark.h
new file mode 100644
index 0000000..cc258d5
--- /dev/null
+++ b/runtime/src/iree/testing/benchmark.h
@@ -0,0 +1,147 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TESTING_BENCHMARK_H_
+#define IREE_TESTING_BENCHMARK_H_
+
+// This is a C API shim for a benchmark-like interface.
+// The intent is that we can write benchmarks that are portable to bare-metal
+// systems and use some simple tooling while also allowing them to run on
+// the full benchmark library with all its useful reporting and statistics.
+
+#include "iree/base/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_benchmark_state_t
+//===----------------------------------------------------------------------===//
+
+// Benchmark state manipulator.
+// Passed to each benchmark during execution to control the benchmark state
+// or append information beyond just timing.
+typedef struct iree_benchmark_state_t {
+ // Internal implementation handle.
+ void* impl;
+
+ // Allocator that can be used for host allocations required during benchmark
+ // execution.
+ iree_allocator_t host_allocator;
+} iree_benchmark_state_t;
+
+// Returns a range argument with the given ordial.
+int64_t iree_benchmark_get_range(iree_benchmark_state_t* state,
+ iree_host_size_t ordinal);
+
+// Returns true while the benchmark should keep running its step loop.
+//
+// Usage:
+// while (iree_benchmark_keep_running(state, 1000)) {
+// // process 1000 elements
+// }
+bool iree_benchmark_keep_running(iree_benchmark_state_t* state,
+ uint64_t batch_count);
+
+// Reports that the currently executing benchmark cannot be run.
+// Callers should return after calling as further benchmark-related calls may
+// fail.
+void iree_benchmark_skip(iree_benchmark_state_t* state, const char* message);
+
+// Suspends the benchmark timer until iree_benchmark_resume_timing is called.
+// This can be used to guard per-step code that is required to initialze the
+// work but not something that needs to be accounted for in the benchmark
+// timing. Introduces non-trivial overhead: only use this ~once per step when
+// then going on to perform large amounts of batch work in the step.
+void iree_benchmark_pause_timing(iree_benchmark_state_t* state);
+
+// Resumes the benchmark timer after a prior iree_benchmark_suspend_timing.
+void iree_benchmark_resume_timing(iree_benchmark_state_t* state);
+
+// Sets a label string that will be displayed alongside the report line from the
+// currently executing benchmark.
+void iree_benchmark_set_label(iree_benchmark_state_t* state, const char* label);
+
+// Adds a 'bytes/s' label with the given value.
+//
+// REQUIRES: must only be called outside of the benchmark step loop.
+void iree_benchmark_set_bytes_processed(iree_benchmark_state_t* state,
+ int64_t bytes);
+
+// Adds an `items/s` label with the given value.
+//
+// REQUIRES: must only be called outside of the benchmark step loop.
+void iree_benchmark_set_items_processed(iree_benchmark_state_t* state,
+ int64_t items);
+
+//===----------------------------------------------------------------------===//
+// iree_benchmark_def_t
+//===----------------------------------------------------------------------===//
+
+enum iree_benchmark_flag_bits_t {
+ IREE_BENCHMARK_FLAG_MEASURE_PROCESS_CPU_TIME = 1u << 0,
+
+ IREE_BENCHMARK_FLAG_USE_REAL_TIME = 1u << 1,
+ IREE_BENCHMARK_FLAG_USE_MANUAL_TIME = 1u << 2,
+};
+typedef uint32_t iree_benchmark_flags_t;
+
+typedef enum iree_benchmark_unit_e {
+ IREE_BENCHMARK_UNIT_MILLISECOND = 0,
+ IREE_BENCHMARK_UNIT_MICROSECOND,
+ IREE_BENCHMARK_UNIT_NANOSECOND,
+} iree_benchmark_unit_t;
+
+typedef struct iree_benchmark_def_t iree_benchmark_def_t;
+
+// A benchmark case definition.
+struct iree_benchmark_def_t {
+ // IREE_BENCHMARK_FLAG_* bitmask controlling benchmark behavior and reporting.
+ iree_benchmark_flags_t flags;
+
+ // Time unit used in display.
+ iree_benchmark_unit_t time_unit; // MILLISECOND by default
+
+ // Optional minimum duration the benchmark should run for in nanoseconds.
+ iree_duration_t minimum_duration_ns; // 0 if unspecified to autodetect
+ // Optional iteration count the benchmark should run for.
+ uint64_t iteration_count; // 0 if unspecified to autodetect
+
+ // TODO(benvanik): add range arguments.
+
+ // Runs the benchmark to completion.
+ // Implementations must call iree_benchmark_keep_running in a loop until it
+ // returns false.
+ iree_status_t (*run)(const iree_benchmark_def_t* benchmark_def,
+ iree_benchmark_state_t* benchmark_state);
+
+ // User-defined data accessible in the run function.
+ const void* user_data;
+};
+
+// Registers a benchmark with the given definition.
+void iree_benchmark_register(iree_string_view_t name,
+ const iree_benchmark_def_t* benchmark_def);
+
+//===----------------------------------------------------------------------===//
+// Benchmark infra management
+//===----------------------------------------------------------------------===//
+
+// Initializes the benchmark framework.
+// Must be called before any other iree_benchmark_* functions.
+void iree_benchmark_initialize(int* argc, char** argv);
+
+// Runs all registered benchmarks specified by the command line flags.
+// Must be called after iree_benchmark_initialize and zero or more benchmarks
+// have been registered with iree_benchmark_register.
+void iree_benchmark_run_specified(void);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_TESTING_BENCHMARK_H_
diff --git a/runtime/src/iree/testing/benchmark_full.cc b/runtime/src/iree/testing/benchmark_full.cc
new file mode 100644
index 0000000..c01abf0
--- /dev/null
+++ b/runtime/src/iree/testing/benchmark_full.cc
@@ -0,0 +1,190 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <utility>
+
+#include "benchmark/benchmark.h"
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/testing/benchmark.h"
+
+//===----------------------------------------------------------------------===//
+// iree_benchmark_state_t
+//===----------------------------------------------------------------------===//
+
+benchmark::State& GetBenchmarkState(iree_benchmark_state_t* state) {
+ return *(benchmark::State*)state->impl;
+}
+
+int64_t iree_benchmark_get_range(iree_benchmark_state_t* state,
+ iree_host_size_t ordinal) {
+ auto& s = GetBenchmarkState(state);
+ return s.range(ordinal);
+}
+
+bool iree_benchmark_keep_running(iree_benchmark_state_t* state,
+ uint64_t batch_count) {
+ auto& s = GetBenchmarkState(state);
+ return s.KeepRunningBatch(batch_count);
+}
+
+void iree_benchmark_skip(iree_benchmark_state_t* state, const char* message) {
+ auto& s = GetBenchmarkState(state);
+ s.SkipWithError(message);
+}
+
+void iree_benchmark_pause_timing(iree_benchmark_state_t* state) {
+ auto& s = GetBenchmarkState(state);
+ s.PauseTiming();
+}
+
+void iree_benchmark_resume_timing(iree_benchmark_state_t* state) {
+ auto& s = GetBenchmarkState(state);
+ s.ResumeTiming();
+}
+
+void iree_benchmark_set_label(iree_benchmark_state_t* state,
+ const char* label) {
+ auto& s = GetBenchmarkState(state);
+ s.SetLabel(label);
+}
+
+void iree_benchmark_set_bytes_processed(iree_benchmark_state_t* state,
+ int64_t bytes) {
+ auto& s = GetBenchmarkState(state);
+ s.SetBytesProcessed(bytes);
+}
+
+void iree_benchmark_set_items_processed(iree_benchmark_state_t* state,
+ int64_t items) {
+ auto& s = GetBenchmarkState(state);
+ s.SetItemsProcessed(items);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_benchmark_def_t
+//===----------------------------------------------------------------------===//
+
+static std::string StatusToString(iree_status_t status) {
+ if (iree_status_is_ok(status)) {
+ return "OK";
+ }
+ iree_host_size_t buffer_length = 0;
+ if (IREE_UNLIKELY(!iree_status_format(status, /*buffer_capacity=*/0,
+ /*buffer=*/NULL, &buffer_length))) {
+ return "<!>";
+ }
+ std::string result(buffer_length, '\0');
+ if (IREE_UNLIKELY(!iree_status_format(status, result.size() + 1,
+ const_cast<char*>(result.data()),
+ &buffer_length))) {
+ return "<!>";
+ }
+ return result;
+}
+
+static void iree_benchmark_run(const char* benchmark_name,
+ const iree_benchmark_def_t* benchmark_def,
+ benchmark::State& benchmark_state) {
+ IREE_TRACE_SCOPE_DYNAMIC(benchmark_name);
+ IREE_TRACE_FRAME_MARK();
+
+ iree_benchmark_state_t state;
+ memset(&state, 0, sizeof(state));
+ state.impl = &benchmark_state;
+ state.host_allocator = iree_allocator_system();
+
+ iree_status_t status = benchmark_def->run(benchmark_def, &state);
+ if (!iree_status_is_ok(status)) {
+ auto status_str = StatusToString(status);
+ iree_status_ignore(status);
+ benchmark_state.SkipWithError(status_str.c_str());
+ }
+}
+
+void iree_benchmark_register(iree_string_view_t name,
+ const iree_benchmark_def_t* benchmark_def) {
+ std::string name_str(name.data, name.size);
+ std::string prefixed_str = "BM_" + name_str;
+ iree_benchmark_def_t cloned_def = *benchmark_def;
+ auto* instance = benchmark::RegisterBenchmark(
+ prefixed_str.c_str(),
+ [name_str, cloned_def](benchmark::State& state) -> void {
+ iree_benchmark_run(name_str.c_str(), &cloned_def, state);
+ });
+
+ if (iree_all_bits_set(benchmark_def->flags,
+ IREE_BENCHMARK_FLAG_MEASURE_PROCESS_CPU_TIME)) {
+ instance->MeasureProcessCPUTime();
+ }
+ if (iree_all_bits_set(benchmark_def->flags,
+ IREE_BENCHMARK_FLAG_USE_REAL_TIME)) {
+ instance->UseRealTime();
+ }
+ if (iree_all_bits_set(benchmark_def->flags,
+ IREE_BENCHMARK_FLAG_USE_MANUAL_TIME)) {
+ instance->UseManualTime();
+ }
+
+ if (benchmark_def->minimum_duration_ns != 0) {
+ instance->MinTime((double)benchmark_def->minimum_duration_ns / 1e-9);
+ } else if (benchmark_def->iteration_count != 0) {
+ instance->Iterations(benchmark_def->iteration_count);
+ }
+
+ switch (benchmark_def->time_unit) {
+ default:
+ case IREE_BENCHMARK_UNIT_MILLISECOND:
+ instance->Unit(benchmark::kMillisecond);
+ break;
+ case IREE_BENCHMARK_UNIT_MICROSECOND:
+ instance->Unit(benchmark::kMicrosecond);
+ break;
+ case IREE_BENCHMARK_UNIT_NANOSECOND:
+ instance->Unit(benchmark::kNanosecond);
+ break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Benchmark infra management
+//===----------------------------------------------------------------------===//
+
+void iree_benchmark_initialize(int* argc, char** argv) {
+ benchmark::Initialize(argc, argv);
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ // clang-format off
+ fprintf(stderr,
+"\x1b[31m"
+"===----------------------------------------------------------------------===\n"
+"\n"
+" ██ ██ █████ ██████ ███ ██ ██ ███ ██ ██████\n"
+" ██ ██ ██ ██ ██ ██ ████ ██ ██ ████ ██ ██\n"
+" ██ █ ██ ███████ ██████ ██ ██ ██ ██ ██ ██ ██ ██ ███\n"
+" ██ ███ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██\n"
+" ███ ███ ██ ██ ██ ██ ██ ████ ██ ██ ████ ██████\n"
+"\n"
+"===----------------------------------------------------------------------===\n"
+"\n"
+"Tracing is enabled and will skew your results!\n"
+"The timings involved here can an order of magnitude off due to the tracing\n"
+"time sampling, recording, and instrumentation overhead. Disable tracing with\n"
+"IREE_ENABLE_RUNTIME_TRACING=OFF and rebuild.\n"
+"\x1b[0m"
+"\n"
+ );
+ fflush(stderr);
+ // clang-format on
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+}
+
+void iree_benchmark_run_specified(void) { benchmark::RunSpecifiedBenchmarks(); }
diff --git a/runtime/src/iree/testing/benchmark_main.c b/runtime/src/iree/testing/benchmark_main.c
new file mode 100644
index 0000000..860f4a6
--- /dev/null
+++ b/runtime/src/iree/testing/benchmark_main.c
@@ -0,0 +1,18 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/flags.h"
+#include "iree/testing/benchmark.h"
+
+int main(int argc, char** argv) {
+ // Pass through flags to benchmark (allowing --help to fall through).
+ iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_UNDEFINED_OK |
+ IREE_FLAGS_PARSE_MODE_CONTINUE_AFTER_HELP,
+ &argc, &argv);
+ iree_benchmark_initialize(&argc, argv);
+ iree_benchmark_run_specified();
+ return 0;
+}
diff --git a/runtime/src/iree/testing/benchmark_nop.c b/runtime/src/iree/testing/benchmark_nop.c
new file mode 100644
index 0000000..65272e7
--- /dev/null
+++ b/runtime/src/iree/testing/benchmark_nop.c
@@ -0,0 +1,41 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/testing/benchmark.h"
+
+int64_t iree_benchmark_get_range(iree_benchmark_state_t* state,
+ iree_host_size_t ordinal) {
+ return 0;
+}
+
+bool iree_benchmark_keep_running(iree_benchmark_state_t* state,
+ uint64_t batch_count) {
+ return false;
+}
+
+void iree_benchmark_skip(iree_benchmark_state_t* state, const char* message) {}
+
+void iree_benchmark_pause_timing(iree_benchmark_state_t* state) {}
+
+void iree_benchmark_resume_timing(iree_benchmark_state_t* state) {}
+
+void iree_benchmark_set_label(iree_benchmark_state_t* state,
+ const char* label) {}
+
+void iree_benchmark_set_bytes_processed(iree_benchmark_state_t* state,
+ int64_t bytes) {}
+
+void iree_benchmark_set_items_processed(iree_benchmark_state_t* state,
+ int64_t items) {}
+
+void iree_benchmark_register(iree_string_view_t name,
+ const iree_benchmark_def_t* benchmark_def) {}
+
+void iree_benchmark_initialize(int* argc, char** argv) {}
+
+void iree_benchmark_run_specified(void) {}
diff --git a/runtime/src/iree/testing/gtest.h b/runtime/src/iree/testing/gtest.h
new file mode 100644
index 0000000..fbd6dc4
--- /dev/null
+++ b/runtime/src/iree/testing/gtest.h
@@ -0,0 +1,17 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TESTING_GTEST_H_
+#define IREE_TESTING_GTEST_H_
+
+#include "gmock/gmock-matchers.h" // IWYU pragma: export
+#include "gmock/gmock.h" // IWYU pragma: export
+#include "gtest/gtest-message.h" // IWYU pragma: export
+#include "gtest/gtest-spi.h" // IWYU pragma: export
+#include "gtest/gtest-test-part.h" // IWYU pragma: export
+#include "gtest/gtest.h" // IWYU pragma: export
+
+#endif // IREE_TESTING_GTEST_H_
diff --git a/runtime/src/iree/testing/gtest_main.cc b/runtime/src/iree/testing/gtest_main.cc
new file mode 100644
index 0000000..801aac8
--- /dev/null
+++ b/runtime/src/iree/testing/gtest_main.cc
@@ -0,0 +1,18 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/flags.h"
+#include "iree/testing/gtest.h"
+
+extern "C" int main(int argc, char** argv) {
+ // Pass through flags to gtest (allowing --help to fall through).
+ iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_UNDEFINED_OK |
+ IREE_FLAGS_PARSE_MODE_CONTINUE_AFTER_HELP,
+ &argc, &argv);
+ ::testing::InitGoogleTest(&argc, argv);
+
+ return RUN_ALL_TESTS();
+}
diff --git a/runtime/src/iree/testing/status_matchers.h b/runtime/src/iree/testing/status_matchers.h
new file mode 100644
index 0000000..1697e4c
--- /dev/null
+++ b/runtime/src/iree/testing/status_matchers.h
@@ -0,0 +1,369 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TESTING_STATUS_MATCHERS_H_
+#define IREE_TESTING_STATUS_MATCHERS_H_
+
+#include <memory>
+#include <string>
+
+#include "iree/base/status_cc.h" // IWYU pragma: export
+#include "iree/testing/gtest.h"
+
+namespace iree {
+
+namespace internal {
+
+// Implements a gMock matcher that checks that an iree::StaturOr<T> has an OK
+// status and that the contained T value matches another matcher.
+template <typename T>
+class IsOkAndHoldsMatcher
+ : public ::testing::MatcherInterface<const StatusOr<T> &> {
+ public:
+ template <typename MatcherT>
+ IsOkAndHoldsMatcher(MatcherT &&value_matcher)
+ : value_matcher_(::testing::SafeMatcherCast<const T &>(value_matcher)) {}
+
+ // From testing::MatcherInterface.
+ void DescribeTo(std::ostream *os) const override {
+ *os << "is OK and contains a value that ";
+ value_matcher_.DescribeTo(os);
+ }
+
+ // From testing::MatcherInterface.
+ void DescribeNegationTo(std::ostream *os) const override {
+ *os << "is not OK or contains a value that ";
+ value_matcher_.DescribeNegationTo(os);
+ }
+
+ // From testing::MatcherInterface.
+ bool MatchAndExplain(
+ const StatusOr<T> &status_or,
+ ::testing::MatchResultListener *listener) const override {
+ if (!status_or.ok()) {
+ *listener << "which is not OK";
+ return false;
+ }
+
+ ::testing::StringMatchResultListener value_listener;
+ bool is_a_match =
+ value_matcher_.MatchAndExplain(status_or.value(), &value_listener);
+ std::string value_explanation = value_listener.str();
+ if (!value_explanation.empty()) {
+ *listener << "which contains a value " << value_explanation;
+ }
+
+ return is_a_match;
+ }
+
+ private:
+ const ::testing::Matcher<const T &> value_matcher_;
+};
+
+// A polymorphic IsOkAndHolds() matcher.
+//
+// IsOkAndHolds() returns a matcher that can be used to process an IsOkAndHolds
+// expectation. However, the value type T is not provided when IsOkAndHolds() is
+// invoked. The value type is only inferable when the gUnit framework invokes
+// the matcher with a value. Consequently, the IsOkAndHolds() function must
+// return an object that is implicitly convertible to a matcher for StatusOr<T>.
+// gUnit refers to such an object as a polymorphic matcher, since it can be used
+// to match with more than one type of value.
+template <typename ValueMatcherT>
+class IsOkAndHoldsGenerator {
+ public:
+ explicit IsOkAndHoldsGenerator(ValueMatcherT value_matcher)
+ : value_matcher_(std::move(value_matcher)) {}
+
+ template <typename T>
+ operator ::testing::Matcher<const StatusOr<T> &>() const {
+ return ::testing::MakeMatcher(new IsOkAndHoldsMatcher<T>(value_matcher_));
+ }
+
+ private:
+ const ValueMatcherT value_matcher_;
+};
+
+// Implements a gMock matcher for checking error-code expectations on
+// iree::Status and iree::StatusOr objects.
+template <typename Enum, typename Matchee>
+class StatusMatcher : public ::testing::MatcherInterface<Matchee> {
+ public:
+ StatusMatcher(Enum code, std::string message)
+ : code_(code), message_(std::move(message)) {}
+
+ // From testing::MatcherInterface.
+ //
+ // Describes the expected error code.
+ void DescribeTo(std::ostream *os) const override {
+ *os << "error code " << StatusCodeToString(code_);
+ if (!message_.empty()) {
+ *os << "::'" << message_ << "'";
+ }
+ }
+
+ // From testing::MatcherInterface.
+ //
+ // Tests whether |matchee| has an error code that meets this matcher's
+ // expectation. If an error message string is specified in this matcher, it
+ // also tests that |matchee| has an error message that matches that
+ // expectation.
+ bool MatchAndExplain(
+ Matchee &matchee,
+ ::testing::MatchResultListener *listener) const override {
+ if (GetCode(matchee) != code_) {
+ *listener << "whose error code is "
+ << StatusCodeToString(GetCode(matchee)) << ": "
+ << GetMessage(matchee);
+ return false;
+ }
+ if (!message_.empty() && GetMessage(matchee) != message_) {
+ *listener << "whose error message is '" << GetMessage(matchee) << "'";
+ return false;
+ }
+ return true;
+ }
+
+ private:
+ template <typename T>
+ StatusCode GetCode(const T &matchee) const {
+ return GetCode(matchee.status());
+ }
+
+ StatusCode GetCode(const iree_status_code_t &status_code) const {
+ return static_cast<StatusCode>(status_code);
+ }
+
+ StatusCode GetCode(const iree_status_t &status) const {
+ return static_cast<StatusCode>(iree_status_code(status));
+ }
+
+ StatusCode GetCode(const Status &status) const { return status.code(); }
+
+ template <typename T>
+ std::string GetMessage(const T &matchee) const {
+ return GetMessage(matchee.status());
+ }
+
+ std::string GetMessage(const iree_status_t &status) const {
+ return Status::ToString(status);
+ }
+
+ std::string GetMessage(const Status &status) const {
+ return status.ToString();
+ }
+
+ // Expected error code.
+ const Enum code_;
+
+ // Expected error message (empty if none expected and verified).
+ const std::string message_;
+};
+
+// StatusMatcherGenerator is an intermediate object returned by
+// iree::testing::status::StatusIs().
+// It implements implicit type-cast operators to supported matcher types:
+// Matcher<const Status &> and Matcher<const StatusOr<T> &>. These typecast
+// operators create gMock matchers that test OK expectations on a status
+// container.
+template <typename Enum>
+class StatusIsMatcherGenerator {
+ public:
+ StatusIsMatcherGenerator(Enum code, std::string message)
+ : code_(code), message_(std::move(message)) {}
+
+ operator ::testing::Matcher<const StatusCode &>() const {
+ return ::testing::MakeMatcher(
+ new internal::StatusMatcher<Enum, const StatusCode &>(code_, message_));
+ }
+
+ operator ::testing::Matcher<const iree_status_t &>() const {
+ return ::testing::MakeMatcher(
+ new internal::StatusMatcher<Enum, const iree_status_t &>(code_,
+ message_));
+ }
+
+ operator ::testing::Matcher<const Status &>() const {
+ return ::testing::MakeMatcher(
+ new internal::StatusMatcher<Enum, const Status &>(code_, message_));
+ }
+
+ template <class T>
+ operator ::testing::Matcher<const StatusOr<T> &>() const {
+ return ::testing::MakeMatcher(
+ new internal::StatusMatcher<Enum, const StatusOr<T> &>(code_,
+ message_));
+ }
+
+ private:
+ // Expected error code.
+ const Enum code_;
+
+ // Expected error message (empty if none expected and verified).
+ const std::string message_;
+};
+
+// Implements a gMock matcher that checks whether a status container (e.g.
+// iree::Status or iree::StatusOr<T>) has an OK status.
+template <class T>
+class IsOkMatcherImpl : public ::testing::MatcherInterface<T> {
+ public:
+ IsOkMatcherImpl() = default;
+
+ // From testing::MatcherInterface.
+ //
+ // Describes the OK expectation.
+ void DescribeTo(std::ostream *os) const override { *os << "is OK"; }
+
+ // From testing::MatcherInterface.
+ //
+ // Describes the negative OK expectation.
+ void DescribeNegationTo(std::ostream *os) const override {
+ *os << "is not OK";
+ }
+
+ // From testing::MatcherInterface.
+ //
+ // Tests whether |status_container|'s OK value meets this matcher's
+ // expectation.
+ bool MatchAndExplain(
+ const T &status_container,
+ ::testing::MatchResultListener *listener) const override {
+ if (!::iree::IsOk(status_container)) {
+ *listener << "which is not OK";
+ return false;
+ }
+ return true;
+ }
+};
+
+// IsOkMatcherGenerator is an intermediate object returned by iree::IsOk().
+// It implements implicit type-cast operators to supported matcher types:
+// Matcher<const Status &> and Matcher<const StatusOr<T> &>. These typecast
+// operators create gMock matchers that test OK expectations on a status
+// container.
+class IsOkMatcherGenerator {
+ public:
+ operator ::testing::Matcher<const iree_status_t &>() const {
+ return ::testing::MakeMatcher(
+ new internal::IsOkMatcherImpl<const iree_status_t &>());
+ }
+
+ operator ::testing::Matcher<const Status &>() const {
+ return ::testing::MakeMatcher(
+ new internal::IsOkMatcherImpl<const Status &>());
+ }
+
+ template <class T>
+ operator ::testing::Matcher<const StatusOr<T> &>() const {
+ return ::testing::MakeMatcher(
+ new internal::IsOkMatcherImpl<const StatusOr<T> &>());
+ }
+};
+
+} // namespace internal
+
+namespace testing {
+namespace status {
+
+// Returns a gMock matcher that expects an iree::StatusOr<T> object to have an
+// OK status and for the contained T object to match |value_matcher|.
+//
+// Example:
+//
+// StatusOr<string> raven_speech_result = raven.Speak();
+// EXPECT_THAT(raven_speech_result, IsOkAndHolds(HasSubstr("nevermore")));
+//
+// If foo is an object of type T and foo_result is an object of type
+// StatusOr<T>, you can write:
+//
+// EXPECT_THAT(foo_result, IsOkAndHolds(foo));
+//
+// instead of:
+//
+// EXPECT_THAT(foo_result, IsOkAndHolds(Eq(foo)));
+template <typename ValueMatcherT>
+internal::IsOkAndHoldsGenerator<ValueMatcherT> IsOkAndHolds(
+ ValueMatcherT value_matcher) {
+ return internal::IsOkAndHoldsGenerator<ValueMatcherT>(value_matcher);
+}
+
+// Returns a gMock matcher that expects an iree::Status object to have the
+// given |code|.
+template <typename Enum>
+internal::StatusIsMatcherGenerator<Enum> StatusIs(Enum code) {
+ return internal::StatusIsMatcherGenerator<Enum>(code, "");
+}
+
+// Returns a gMock matcher that expects an iree::Status object to have the
+// given |code| and |message|.
+template <typename Enum>
+internal::StatusIsMatcherGenerator<Enum> StatusIs(Enum code,
+ std::string message) {
+ return internal::StatusIsMatcherGenerator<Enum>(code, std::move(message));
+}
+
+// Returns an internal::IsOkMatcherGenerator, which may be typecast to a
+// Matcher<iree::Status> or Matcher<iree::StatusOr<T>>. These gMock
+// matchers test that a given status container has an OK status.
+inline internal::IsOkMatcherGenerator IsOk() {
+ return internal::IsOkMatcherGenerator();
+}
+
+} // namespace status
+} // namespace testing
+
+// Macros for testing the results of functions that return iree::Status or
+// iree::StatusOr<T> (for any type T).
+#define IREE_EXPECT_OK(rexpr) \
+ EXPECT_THAT(rexpr, ::iree::testing::status::StatusIs(::iree::StatusCode::kOk))
+#define IREE_ASSERT_OK(rexpr) \
+ ASSERT_THAT(rexpr, ::iree::testing::status::StatusIs(::iree::StatusCode::kOk))
+#define IREE_EXPECT_STATUS_IS(expected_code, expr) \
+ EXPECT_THAT(expr, ::iree::testing::status::StatusIs( \
+ static_cast<::iree::StatusCode>(expected_code)))
+
+// Executes an expression that returns an iree::StatusOr<T>, and assigns the
+// contained variable to lhs if the error code is OK.
+// If the Status is non-OK, generates a test failure and returns from the
+// current function, which must have a void return type.
+//
+// Example: Assigning to an existing value
+// IREE_ASSERT_OK_AND_ASSIGN(ValueType value, MaybeGetValue(arg));
+//
+// The value assignment example might expand into:
+// StatusOr<ValueType> status_or_value = MaybeGetValue(arg);
+// IREE_ASSERT_OK(status_or_value.status());
+// ValueType value = status_or_value.value();
+#define IREE_ASSERT_OK_AND_ASSIGN(lhs, rexpr) \
+ IREE_ASSERT_OK_AND_ASSIGN_IMPL( \
+ IREE_STATUS_MACROS_CONCAT_NAME(_status_or_value, __COUNTER__), lhs, \
+ rexpr);
+
+#define IREE_ASSERT_OK_AND_ASSIGN_IMPL(statusor, lhs, rexpr) \
+ auto statusor = (rexpr); \
+ IREE_ASSERT_OK(statusor.status()); \
+ lhs = std::move(statusor.value())
+#define IREE_STATUS_MACROS_CONCAT_NAME(x, y) \
+ IREE_STATUS_MACROS_CONCAT_IMPL(x, y)
+#define IREE_STATUS_MACROS_CONCAT_IMPL(x, y) x##y
+
+// Implements the PrintTo() method for iree::StatusOr<T>. This method is
+// used by gUnit to print iree::StatusOr<T> objects for debugging. The
+// implementation relies on gUnit for printing values of T when a
+// iree::StatusOr<T> object is OK and contains a value.
+template <typename T>
+void PrintTo(const StatusOr<T> &statusor, std::ostream *os) {
+ if (!statusor.ok()) {
+ *os << statusor.status();
+ } else {
+ *os << "OK: " << ::testing::PrintToString(statusor.value());
+ }
+}
+
+} // namespace iree
+
+#endif // IREE_TESTING_STATUS_MATCHERS_H_
diff --git a/runtime/src/iree/testing/vulkan/CMakeLists.txt b/runtime/src/iree/testing/vulkan/CMakeLists.txt
new file mode 100644
index 0000000..1dd197f
--- /dev/null
+++ b/runtime/src/iree/testing/vulkan/CMakeLists.txt
@@ -0,0 +1,70 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+if(NOT "${IREE_HAL_DRIVER_VULKAN}" OR NOT "${IREE_BUILD_SAMPLES}")
+ return()
+endif()
+
+# This target statically links against Vulkan.
+# One way to achieve this is by installing the Vulkan SDK from
+# https://vulkan.lunarg.com/.
+# If Vulkan is not found, omit the target.
+include(FindVulkan)
+if(NOT Vulkan_FOUND)
+ message(VERBOSE "Could not find Vulkan, disabling Vulkan GUI programs")
+ return()
+endif()
+
+# vcpkg install imgui[vulkan-binding,sdl2-binding]
+find_package(imgui CONFIG QUIET)
+if(NOT imgui_FOUND)
+ message(VERBOSE "Could not find Dear ImGui, disabling Vulkan GUI programs")
+ return()
+endif()
+
+# vcpkg install sdl2[vulkan]
+find_package(SDL2 CONFIG QUIET)
+if(NOT SDL2_FOUND)
+ message(VERBOSE "Could not find SDL2, disabling Vulkan GUI programs")
+ return()
+endif()
+
+iree_cc_library(
+ NAME
+ vulkan_gui_util
+ HDRS
+ "vulkan_gui_util.h"
+ SRCS
+ "vulkan_gui_util.cc"
+ DEPS
+ imgui::imgui
+ iree::base
+ iree::base::logging
+ iree::hal::vulkan
+ SDL2::SDL2
+ Vulkan::Vulkan
+)
+
+iree_cc_binary(
+ NAME
+ iree-run-module-vulkan-gui
+ SRCS
+ "iree-run-module-vulkan-gui-main.cc"
+ DEPS
+ ::vulkan_gui_util
+ iree::base::cc
+ iree::base::internal::file_io
+ iree::base::internal::flags
+ iree::base::internal::main
+ iree::base::tracing
+ iree::hal::vulkan::registration
+ iree::modules::hal
+ iree::tools::utils::vm_util
+ iree::vm
+ iree::vm::bytecode_module
+ LINKOPTS
+ "${IREE_TARGET_GUI_LINKOPTS}"
+)
diff --git a/runtime/src/iree/testing/vulkan/iree-run-module-vulkan-gui-main.cc b/runtime/src/iree/testing/vulkan/iree-run-module-vulkan-gui-main.cc
new file mode 100644
index 0000000..bfb8816
--- /dev/null
+++ b/runtime/src/iree/testing/vulkan/iree-run-module-vulkan-gui-main.cc
@@ -0,0 +1,436 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Vulkan GUI utility functions
+// Other matters here: we need to pull in this first to make sure Vulkan API
+// prototypes are defined so that we can statically link against them.
+#include "iree/testing/vulkan/vulkan_gui_util.h"
+
+// Other dependencies (helpers, etc.)
+#include "iree/base/internal/file_io.h"
+#include "iree/base/internal/flags.h"
+#include "iree/base/internal/main.h"
+#include "iree/base/status_cc.h"
+#include "iree/hal/vulkan/registration/driver_module.h"
+#include "iree/modules/hal/module.h"
+#include "iree/tools/utils/vm_util.h"
+#include "iree/vm/api.h"
+#include "iree/vm/bytecode_module.h"
+
+IREE_FLAG(string, module_file, "-",
+ "File containing the module to load that contains the entry "
+ "function. Defaults to stdin.");
+
+IREE_FLAG(string, entry_function, "",
+ "Name of a function contained in the module specified by input_file "
+ "to run.");
+
+static iree_status_t parse_function_input(iree_string_view_t flag_name,
+ void* storage,
+ iree_string_view_t value) {
+ auto* list = (std::vector<std::string>*)storage;
+ list->push_back(std::string(value.data, value.size));
+ return iree_ok_status();
+}
+static void print_function_input(iree_string_view_t flag_name, void* storage,
+ FILE* file) {
+ auto* list = (std::vector<std::string>*)storage;
+ if (list->empty()) {
+ fprintf(file, "# --%.*s=\n", (int)flag_name.size, flag_name.data);
+ } else {
+ for (size_t i = 0; i < list->size(); ++i) {
+ fprintf(file, "--%.*s=\"%s\"\n", (int)flag_name.size, flag_name.data,
+ list->at(i).c_str());
+ }
+ }
+}
+static std::vector<std::string> FLAG_function_inputs;
+IREE_FLAG_CALLBACK(
+ parse_function_input, print_function_input, &FLAG_function_inputs,
+ function_input,
+ "An input value or buffer of the format:\n"
+ " [shape]xtype=[value]\n"
+ " 2x2xi32=1 2 3 4\n"
+ "Optionally, brackets may be used to separate the element values:\n"
+ " 2x2xi32=[[1 2][3 4]]\n"
+ "Each occurrence of the flag indicates an input in the order they were\n"
+ "specified on the command line.");
+
+static VkAllocationCallbacks* g_Allocator = NULL;
+static VkInstance g_Instance = VK_NULL_HANDLE;
+static VkPhysicalDevice g_PhysicalDevice = VK_NULL_HANDLE;
+static VkDevice g_Device = VK_NULL_HANDLE;
+static uint32_t g_QueueFamily = (uint32_t)-1;
+static VkQueue g_Queue = VK_NULL_HANDLE;
+static VkPipelineCache g_PipelineCache = VK_NULL_HANDLE;
+static VkDescriptorPool g_DescriptorPool = VK_NULL_HANDLE;
+
+static ImGui_ImplVulkanH_Window g_MainWindowData;
+static uint32_t g_MinImageCount = 2;
+static bool g_SwapChainRebuild = false;
+static int g_SwapChainResizeWidth = 0;
+static int g_SwapChainResizeHeight = 0;
+
+namespace iree {
+namespace {
+
+void check_vk_result(VkResult err) {
+ if (err == 0) return;
+ IREE_LOG(FATAL) << "VkResult: " << err;
+}
+
+void CleanupVulkan() {
+ vkDestroyDescriptorPool(g_Device, g_DescriptorPool, g_Allocator);
+
+ vkDestroyDevice(g_Device, g_Allocator);
+ vkDestroyInstance(g_Instance, g_Allocator);
+}
+
+void CleanupVulkanWindow() {
+ ImGui_ImplVulkanH_DestroyWindow(g_Instance, g_Device, &g_MainWindowData,
+ g_Allocator);
+}
+
+iree_status_t GetModuleContentsFromFlags(iree_file_contents_t** out_contents) {
+ IREE_TRACE_SCOPE0("GetModuleContentsFromFlags");
+ auto module_file = std::string(FLAG_module_file);
+ if (module_file == "-") {
+ return iree_stdin_read_contents(iree_allocator_system(), out_contents);
+ } else {
+ return iree_file_read_contents(module_file.c_str(), iree_allocator_system(),
+ out_contents);
+ }
+}
+
+// Runs the current IREE bytecode module and renders its result to a window
+// using ImGui.
+Status RunModuleAndUpdateImGuiWindow(
+ iree_hal_device_t* device, iree_vm_context_t* context,
+ iree_vm_function_t function, const std::string& function_name,
+ const vm::ref<iree_vm_list_t>& function_inputs,
+ const std::string& window_title) {
+ vm::ref<iree_vm_list_t> outputs;
+ IREE_RETURN_IF_ERROR(iree_vm_list_create(/*element_type=*/nullptr, 16,
+ iree_allocator_system(), &outputs));
+
+ IREE_LOG(INFO) << "EXEC @" << function_name;
+ IREE_RETURN_IF_ERROR(iree_vm_invoke(
+ context, function, IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/nullptr,
+ function_inputs.get(), outputs.get(), iree_allocator_system()));
+
+ std::ostringstream oss;
+ IREE_RETURN_IF_ERROR(PrintVariantList(outputs.get(), &oss));
+
+ outputs.reset();
+
+ ImGui::Begin(window_title.c_str(), /*p_open=*/nullptr,
+ ImGuiWindowFlags_AlwaysAutoResize);
+
+ ImGui::Text("Entry function:");
+ ImGui::Text("%s", function_name.c_str());
+ ImGui::Separator();
+
+ ImGui::Text("Invocation result:");
+ ImGui::Text("%s", oss.str().c_str());
+ ImGui::Separator();
+
+ // Framerate counter.
+ ImGui::Text("Application average %.3f ms/frame (%.1f FPS)",
+ 1000.0f / ImGui::GetIO().Framerate, ImGui::GetIO().Framerate);
+
+ ImGui::End();
+ return OkStatus();
+}
+} // namespace
+
+extern "C" int iree_main(int argc, char** argv) {
+ iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_DEFAULT, &argc, &argv);
+ IREE_CHECK_OK(iree_hal_vulkan_driver_module_register(
+ iree_hal_driver_registry_default()));
+
+ // --------------------------------------------------------------------------
+ // Create a window.
+ if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER) != 0) {
+ IREE_LOG(FATAL) << "Failed to initialize SDL";
+ return 1;
+ }
+
+ // Setup window
+ SDL_WindowFlags window_flags = (SDL_WindowFlags)( //
+ SDL_WINDOW_VULKAN | SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI);
+ SDL_Window* window = SDL_CreateWindow(
+ "IREE Samples - Vulkan Inference GUI", SDL_WINDOWPOS_CENTERED,
+ SDL_WINDOWPOS_CENTERED, 1280, 720, window_flags);
+ if (!window) {
+ IREE_LOG(FATAL) << "Failed to create SDL window";
+ return 1;
+ }
+
+ // Setup Vulkan
+ iree_hal_vulkan_features_t iree_vulkan_features =
+ static_cast<iree_hal_vulkan_features_t>(
+ IREE_HAL_VULKAN_FEATURE_ENABLE_VALIDATION_LAYERS |
+ IREE_HAL_VULKAN_FEATURE_ENABLE_DEBUG_UTILS);
+ std::vector<const char*> layers = GetInstanceLayers(iree_vulkan_features);
+ std::vector<const char*> extensions =
+ GetInstanceExtensions(window, iree_vulkan_features);
+ SetupVulkan(iree_vulkan_features, layers.data(), layers.size(),
+ extensions.data(), extensions.size(), g_Allocator, &g_Instance,
+ &g_QueueFamily, &g_PhysicalDevice, &g_Queue, &g_Device,
+ &g_DescriptorPool);
+
+ // Create Window Surface
+ VkSurfaceKHR surface;
+ VkResult err;
+ if (SDL_Vulkan_CreateSurface(window, g_Instance, &surface) == 0) {
+ printf("Failed to create Vulkan surface.\n");
+ return 1;
+ }
+
+ // Create Framebuffers
+ int w, h;
+ SDL_GetWindowSize(window, &w, &h);
+ ImGui_ImplVulkanH_Window* wd = &g_MainWindowData;
+ SetupVulkanWindow(wd, g_Allocator, g_Instance, g_QueueFamily,
+ g_PhysicalDevice, g_Device, surface, w, h, g_MinImageCount);
+
+ // Setup Dear ImGui context
+ IMGUI_CHECKVERSION();
+ ImGui::CreateContext();
+ ImGuiIO& io = ImGui::GetIO();
+ (void)io;
+
+ ImGui::StyleColorsDark();
+
+ // Setup Platform/Renderer bindings
+ ImGui_ImplSDL2_InitForVulkan(window);
+ ImGui_ImplVulkan_InitInfo init_info = {};
+ init_info.Instance = g_Instance;
+ init_info.PhysicalDevice = g_PhysicalDevice;
+ init_info.Device = g_Device;
+ init_info.QueueFamily = g_QueueFamily;
+ init_info.Queue = g_Queue;
+ init_info.PipelineCache = g_PipelineCache;
+ init_info.DescriptorPool = g_DescriptorPool;
+ init_info.Allocator = g_Allocator;
+ init_info.MinImageCount = g_MinImageCount;
+ init_info.ImageCount = wd->ImageCount;
+ init_info.CheckVkResultFn = check_vk_result;
+ ImGui_ImplVulkan_Init(&init_info, wd->RenderPass);
+
+ // Upload Fonts
+ {
+ // Use any command queue
+ VkCommandPool command_pool = wd->Frames[wd->FrameIndex].CommandPool;
+ VkCommandBuffer command_buffer = wd->Frames[wd->FrameIndex].CommandBuffer;
+
+ err = vkResetCommandPool(g_Device, command_pool, 0);
+ check_vk_result(err);
+ VkCommandBufferBeginInfo begin_info = {};
+ begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ begin_info.flags |= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+ err = vkBeginCommandBuffer(command_buffer, &begin_info);
+ check_vk_result(err);
+
+ ImGui_ImplVulkan_CreateFontsTexture(command_buffer);
+
+ VkSubmitInfo end_info = {};
+ end_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ end_info.commandBufferCount = 1;
+ end_info.pCommandBuffers = &command_buffer;
+ err = vkEndCommandBuffer(command_buffer);
+ check_vk_result(err);
+ err = vkQueueSubmit(g_Queue, 1, &end_info, VK_NULL_HANDLE);
+ check_vk_result(err);
+
+ err = vkDeviceWaitIdle(g_Device);
+ check_vk_result(err);
+ ImGui_ImplVulkan_DestroyFontUploadObjects();
+ }
+ // --------------------------------------------------------------------------
+
+ // --------------------------------------------------------------------------
+ // Setup IREE.
+
+ // Check API version.
+ iree_api_version_t actual_version;
+ iree_status_t status =
+ iree_api_version_check(IREE_API_VERSION_LATEST, &actual_version);
+ if (iree_status_is_ok(status)) {
+ IREE_LOG(INFO) << "IREE runtime API version " << actual_version;
+ } else {
+ IREE_LOG(FATAL) << "Unsupported runtime API version " << actual_version;
+ }
+
+ // Register HAL module types.
+ IREE_CHECK_OK(iree_hal_module_register_types());
+
+ // Create a runtime Instance.
+ iree_vm_instance_t* iree_instance = nullptr;
+ IREE_CHECK_OK(
+ iree_vm_instance_create(iree_allocator_system(), &iree_instance));
+
+ // Create IREE Vulkan Driver and Device, sharing our VkInstance/VkDevice.
+ IREE_LOG(INFO) << "Creating Vulkan driver/device";
+ // Load symbols from our static `vkGetInstanceProcAddr` for IREE to use.
+ iree_hal_vulkan_syms_t* iree_vk_syms = nullptr;
+ IREE_CHECK_OK(iree_hal_vulkan_syms_create(
+ reinterpret_cast<void*>(&vkGetInstanceProcAddr), iree_allocator_system(),
+ &iree_vk_syms));
+ // Create the driver sharing our VkInstance.
+ iree_hal_driver_t* iree_vk_driver = nullptr;
+ iree_string_view_t driver_identifier = iree_make_cstring_view("vulkan");
+ iree_hal_vulkan_driver_options_t driver_options;
+ driver_options.api_version = VK_API_VERSION_1_2;
+ driver_options.requested_features = static_cast<iree_hal_vulkan_features_t>(
+ IREE_HAL_VULKAN_FEATURE_ENABLE_DEBUG_UTILS);
+ IREE_CHECK_OK(iree_hal_vulkan_driver_create_using_instance(
+ driver_identifier, &driver_options, iree_vk_syms, g_Instance,
+ iree_allocator_system(), &iree_vk_driver));
+ // Create a device sharing our VkDevice and queue. This makes capturing with
+ // vendor tools easier because we will have sync compute residing in the
+ // rendered frame.
+ iree_string_view_t device_identifier = iree_make_cstring_view("vulkan");
+ iree_hal_vulkan_queue_set_t compute_queue_set;
+ compute_queue_set.queue_family_index = g_QueueFamily;
+ compute_queue_set.queue_indices = 1 << 0;
+ iree_hal_vulkan_queue_set_t transfer_queue_set;
+ transfer_queue_set.queue_indices = 0;
+ iree_hal_device_t* iree_vk_device = nullptr;
+ IREE_CHECK_OK(iree_hal_vulkan_wrap_device(
+ device_identifier, &driver_options.device_options, iree_vk_syms,
+ g_Instance, g_PhysicalDevice, g_Device, &compute_queue_set,
+ &transfer_queue_set, iree_allocator_system(), &iree_vk_device));
+ // Create a HAL module using the HAL device.
+ iree_vm_module_t* hal_module = nullptr;
+ IREE_CHECK_OK(iree_hal_module_create(iree_vk_device, iree_allocator_system(),
+ &hal_module));
+
+ // Load bytecode module from embedded data.
+ IREE_LOG(INFO) << "Loading IREE byecode module...";
+ iree_file_contents_t* flatbuffer_contents = NULL;
+ IREE_CHECK_OK(iree::GetModuleContentsFromFlags(&flatbuffer_contents));
+ iree_vm_module_t* bytecode_module = nullptr;
+ IREE_CHECK_OK(iree_vm_bytecode_module_create(
+ flatbuffer_contents->const_buffer,
+ iree_file_contents_deallocator(flatbuffer_contents),
+ iree_allocator_system(), &bytecode_module));
+
+ // Allocate a context that will hold the module state across invocations.
+ iree_vm_context_t* iree_context = nullptr;
+ std::vector<iree_vm_module_t*> modules = {hal_module, bytecode_module};
+ IREE_CHECK_OK(iree_vm_context_create_with_modules(
+ iree_instance, IREE_VM_CONTEXT_FLAG_NONE, modules.data(), modules.size(),
+ iree_allocator_system(), &iree_context));
+ IREE_LOG(INFO) << "Context with modules is ready for use";
+
+ // Lookup the entry point function.
+ std::string entry_function = FLAG_entry_function;
+ iree_vm_function_t main_function;
+ IREE_CHECK_OK(bytecode_module->lookup_function(
+ bytecode_module->self, IREE_VM_FUNCTION_LINKAGE_EXPORT,
+ iree_string_view_t{entry_function.data(), entry_function.size()},
+ &main_function));
+ iree_string_view_t main_function_name = iree_vm_function_name(&main_function);
+ IREE_LOG(INFO) << "Resolved main function named '"
+ << std::string(main_function_name.data,
+ main_function_name.size)
+ << "'";
+
+ vm::ref<iree_vm_list_t> main_function_inputs;
+ IREE_CHECK_OK(ParseToVariantList(
+ iree_hal_device_allocator(iree_vk_device),
+ iree::span<const std::string>{FLAG_function_inputs.data(),
+ FLAG_function_inputs.size()},
+ &main_function_inputs));
+
+ const std::string window_title = std::string(FLAG_module_file);
+ // --------------------------------------------------------------------------
+
+ // --------------------------------------------------------------------------
+ // Main loop.
+ bool done = false;
+ while (!done) {
+ SDL_Event event;
+
+ while (SDL_PollEvent(&event)) {
+ if (event.type == SDL_QUIT) {
+ done = true;
+ }
+
+ ImGui_ImplSDL2_ProcessEvent(&event);
+ if (event.type == SDL_QUIT) done = true;
+ if (event.type == SDL_WINDOWEVENT &&
+ event.window.event == SDL_WINDOWEVENT_RESIZED &&
+ event.window.windowID == SDL_GetWindowID(window)) {
+ g_SwapChainResizeWidth = (int)event.window.data1;
+ g_SwapChainResizeHeight = (int)event.window.data2;
+ g_SwapChainRebuild = true;
+ }
+ }
+
+ if (g_SwapChainRebuild) {
+ g_SwapChainRebuild = false;
+ ImGui_ImplVulkan_SetMinImageCount(g_MinImageCount);
+ ImGui_ImplVulkanH_CreateOrResizeWindow(
+ g_Instance, g_PhysicalDevice, g_Device, &g_MainWindowData,
+ g_QueueFamily, g_Allocator, g_SwapChainResizeWidth,
+ g_SwapChainResizeHeight, g_MinImageCount);
+ g_MainWindowData.FrameIndex = 0;
+ }
+
+ // Start the Dear ImGui frame
+ ImGui_ImplVulkan_NewFrame();
+ ImGui_ImplSDL2_NewFrame(window);
+ ImGui::NewFrame();
+
+ // Custom window.
+ auto status = RunModuleAndUpdateImGuiWindow(
+ iree_vk_device, iree_context, main_function, entry_function,
+ main_function_inputs, window_title);
+ if (!status.ok()) {
+ IREE_LOG(FATAL) << status;
+ done = true;
+ continue;
+ }
+
+ // Rendering
+ ImGui::Render();
+ RenderFrame(wd, g_Device, g_Queue);
+
+ PresentFrame(wd, g_Queue);
+ }
+ // --------------------------------------------------------------------------
+
+ // --------------------------------------------------------------------------
+ // Cleanup
+ iree_vm_ref_release(main_function_inputs);
+
+ iree_vm_module_release(hal_module);
+ iree_vm_module_release(bytecode_module);
+ iree_vm_context_release(iree_context);
+ iree_hal_device_release(iree_vk_device);
+ iree_hal_driver_release(iree_vk_driver);
+ iree_hal_vulkan_syms_release(iree_vk_syms);
+ iree_vm_instance_release(iree_instance);
+
+ err = vkDeviceWaitIdle(g_Device);
+ check_vk_result(err);
+ ImGui_ImplVulkan_Shutdown();
+ ImGui_ImplSDL2_Shutdown();
+ ImGui::DestroyContext();
+
+ CleanupVulkanWindow();
+ CleanupVulkan();
+
+ SDL_DestroyWindow(window);
+ SDL_Quit();
+ // --------------------------------------------------------------------------
+
+ return 0;
+}
+
+} // namespace iree
diff --git a/runtime/src/iree/testing/vulkan/vulkan_gui_util.cc b/runtime/src/iree/testing/vulkan/vulkan_gui_util.cc
new file mode 100644
index 0000000..7569d94
--- /dev/null
+++ b/runtime/src/iree/testing/vulkan/vulkan_gui_util.cc
@@ -0,0 +1,426 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/testing/vulkan/vulkan_gui_util.h"
+
+#include <cstring>
+#include <set>
+
+#include "iree/base/api.h"
+#include "iree/base/logging.h"
+
+namespace iree {
+
+namespace {
+
+void check_vk_result(VkResult err) {
+ if (err == 0) return;
+ IREE_LOG(FATAL) << "VkResult: " << err;
+}
+
+// Returns the names of the Vulkan layers used for the given IREE
+// |extensibility_set| and |features|.
+std::vector<const char*> GetIreeLayers(
+ iree_hal_vulkan_extensibility_set_t extensibility_set,
+ iree_hal_vulkan_features_t features) {
+ iree_host_size_t required_count;
+ iree_hal_vulkan_query_extensibility_set(
+ features, extensibility_set, /*string_capacity=*/0,
+ /*out_string_values=*/NULL, &required_count);
+ std::vector<const char*> layers(required_count);
+ iree_hal_vulkan_query_extensibility_set(features, extensibility_set,
+ layers.size(), layers.data(),
+ &required_count);
+ return layers;
+}
+
+// Returns the names of the Vulkan extensions used for the given IREE
+// |extensibility_set| and |features|.
+std::vector<const char*> GetIreeExtensions(
+ iree_hal_vulkan_extensibility_set_t extensibility_set,
+ iree_hal_vulkan_features_t features) {
+ iree_host_size_t required_count;
+ iree_hal_vulkan_query_extensibility_set(
+ features, extensibility_set, /*string_capacity=*/0,
+ /*out_string_values=*/NULL, &required_count);
+ std::vector<const char*> extensions(required_count);
+ iree_hal_vulkan_query_extensibility_set(features, extensibility_set,
+ extensions.size(), extensions.data(),
+ &required_count);
+ return extensions;
+}
+
+// Returns the names of the Vulkan extensions used for the given IREE
+// |vulkan_features|.
+std::vector<const char*> GetDeviceExtensions(
+ VkPhysicalDevice physical_device,
+ iree_hal_vulkan_features_t vulkan_features) {
+ std::vector<const char*> iree_required_extensions = GetIreeExtensions(
+ IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_REQUIRED,
+ vulkan_features);
+ std::vector<const char*> iree_optional_extensions = GetIreeExtensions(
+ IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL,
+ vulkan_features);
+
+ uint32_t extension_count = 0;
+ check_vk_result(vkEnumerateDeviceExtensionProperties(
+ physical_device, nullptr, &extension_count, nullptr));
+ std::vector<VkExtensionProperties> extension_properties(extension_count);
+ check_vk_result(vkEnumerateDeviceExtensionProperties(
+ physical_device, nullptr, &extension_count, extension_properties.data()));
+
+ // Merge extensions lists, including optional and required for simplicity.
+ std::set<const char*> ext_set;
+ ext_set.insert("VK_KHR_swapchain");
+ ext_set.insert(iree_required_extensions.begin(),
+ iree_required_extensions.end());
+ for (int i = 0; i < iree_optional_extensions.size(); ++i) {
+ const char* optional_extension = iree_optional_extensions[i];
+ for (int j = 0; j < extension_count; ++j) {
+ if (strcmp(optional_extension, extension_properties[j].extensionName) ==
+ 0) {
+ ext_set.insert(optional_extension);
+ break;
+ }
+ }
+ }
+ std::vector<const char*> extensions(ext_set.begin(), ext_set.end());
+ return extensions;
+}
+
+} // namespace
+
+std::vector<const char*> GetInstanceLayers(
+ iree_hal_vulkan_features_t vulkan_features) {
+ // Query the layers that IREE wants / needs.
+ std::vector<const char*> required_layers = GetIreeLayers(
+ IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_REQUIRED, vulkan_features);
+ std::vector<const char*> optional_layers = GetIreeLayers(
+ IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_OPTIONAL, vulkan_features);
+
+ // Query the layers that are available on the Vulkan ICD.
+ uint32_t layer_property_count = 0;
+ check_vk_result(
+ vkEnumerateInstanceLayerProperties(&layer_property_count, NULL));
+ std::vector<VkLayerProperties> layer_properties(layer_property_count);
+ check_vk_result(vkEnumerateInstanceLayerProperties(&layer_property_count,
+ layer_properties.data()));
+
+ // Match between optional/required and available layers.
+ std::vector<const char*> layers;
+ for (const char* layer_name : required_layers) {
+ bool found = false;
+ for (const auto& layer_property : layer_properties) {
+ if (std::strcmp(layer_name, layer_property.layerName) == 0) {
+ found = true;
+ layers.push_back(layer_name);
+ break;
+ }
+ }
+ if (!found) {
+ IREE_LOG(FATAL) << "Required layer " << layer_name << " not available";
+ }
+ }
+ for (const char* layer_name : optional_layers) {
+ for (const auto& layer_property : layer_properties) {
+ if (std::strcmp(layer_name, layer_property.layerName) == 0) {
+ layers.push_back(layer_name);
+ break;
+ }
+ }
+ }
+
+ return layers;
+}
+
+std::vector<const char*> GetInstanceExtensions(
+ SDL_Window* window, iree_hal_vulkan_features_t vulkan_features) {
+ // Ask SDL for its list of required instance extensions.
+ uint32_t sdl_extensions_count = 0;
+ SDL_Vulkan_GetInstanceExtensions(window, &sdl_extensions_count, NULL);
+ std::vector<const char*> sdl_extensions(sdl_extensions_count);
+ SDL_Vulkan_GetInstanceExtensions(window, &sdl_extensions_count,
+ sdl_extensions.data());
+
+ std::vector<const char*> iree_required_extensions = GetIreeExtensions(
+ IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_REQUIRED,
+ vulkan_features);
+ std::vector<const char*> iree_optional_extensions = GetIreeExtensions(
+ IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_OPTIONAL,
+ vulkan_features);
+
+ // Merge extensions lists, including optional and required for simplicity.
+ std::set<const char*> ext_set;
+ ext_set.insert(sdl_extensions.begin(), sdl_extensions.end());
+ ext_set.insert(iree_required_extensions.begin(),
+ iree_required_extensions.end());
+ ext_set.insert(iree_optional_extensions.begin(),
+ iree_optional_extensions.end());
+ std::vector<const char*> extensions(ext_set.begin(), ext_set.end());
+ return extensions;
+}
+
+void SetupVulkan(iree_hal_vulkan_features_t vulkan_features,
+ const char** instance_layers, uint32_t instance_layers_count,
+ const char** instance_extensions,
+ uint32_t instance_extensions_count,
+ const VkAllocationCallbacks* allocator, VkInstance* instance,
+ uint32_t* queue_family_index,
+ VkPhysicalDevice* physical_device, VkQueue* queue,
+ VkDevice* device, VkDescriptorPool* descriptor_pool) {
+ VkResult err;
+
+ // Create Vulkan Instance
+ {
+ VkInstanceCreateInfo create_info = {};
+ create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
+ create_info.enabledLayerCount = instance_layers_count;
+ create_info.ppEnabledLayerNames = instance_layers;
+ create_info.enabledExtensionCount = instance_extensions_count;
+ create_info.ppEnabledExtensionNames = instance_extensions;
+ err = vkCreateInstance(&create_info, allocator, instance);
+ check_vk_result(err);
+ }
+
+ // Select GPU
+ {
+ uint32_t gpu_count;
+ err = vkEnumeratePhysicalDevices(*instance, &gpu_count, NULL);
+ check_vk_result(err);
+ IM_ASSERT(gpu_count > 0);
+
+ VkPhysicalDevice* gpus =
+ (VkPhysicalDevice*)malloc(sizeof(VkPhysicalDevice) * gpu_count);
+ err = vkEnumeratePhysicalDevices(*instance, &gpu_count, gpus);
+ check_vk_result(err);
+
+ // Use the first reported GPU for simplicity.
+ *physical_device = gpus[0];
+
+ VkPhysicalDeviceProperties properties;
+ vkGetPhysicalDeviceProperties(*physical_device, &properties);
+ IREE_LOG(INFO) << "Selected Vulkan device: " << properties.deviceName;
+ free(gpus);
+ }
+
+ // Select queue family. We want a single queue with graphics and compute for
+ // simplicity, but we could also discover and use separate queues for each.
+ {
+ uint32_t count;
+ vkGetPhysicalDeviceQueueFamilyProperties(*physical_device, &count, NULL);
+ VkQueueFamilyProperties* queues = (VkQueueFamilyProperties*)malloc(
+ sizeof(VkQueueFamilyProperties) * count);
+ vkGetPhysicalDeviceQueueFamilyProperties(*physical_device, &count, queues);
+ for (uint32_t i = 0; i < count; i++) {
+ if (queues[i].queueFlags &
+ (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) {
+ *queue_family_index = i;
+ break;
+ }
+ }
+ free(queues);
+ IM_ASSERT(*queue_family_index != (uint32_t)-1);
+ }
+
+ // Create Logical Device (with 1 queue)
+ {
+ std::vector<const char*> device_extensions =
+ GetDeviceExtensions(*physical_device, vulkan_features);
+ const float queue_priority[] = {1.0f};
+ VkDeviceQueueCreateInfo queue_info = {};
+ queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+ queue_info.queueFamilyIndex = *queue_family_index;
+ queue_info.queueCount = 1;
+ queue_info.pQueuePriorities = queue_priority;
+ VkDeviceCreateInfo create_info = {};
+ create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
+ create_info.queueCreateInfoCount = 1;
+ create_info.pQueueCreateInfos = &queue_info;
+ create_info.enabledExtensionCount =
+ static_cast<uint32_t>(device_extensions.size());
+ create_info.ppEnabledExtensionNames = device_extensions.data();
+
+ // Enable timeline semaphores.
+ VkPhysicalDeviceFeatures2 features2;
+ memset(&features2, 0, sizeof(features2));
+ features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+ create_info.pNext = &features2;
+ VkPhysicalDeviceTimelineSemaphoreFeatures semaphore_features;
+ memset(&semaphore_features, 0, sizeof(semaphore_features));
+ semaphore_features.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES;
+ semaphore_features.pNext = features2.pNext;
+ features2.pNext = &semaphore_features;
+ semaphore_features.timelineSemaphore = VK_TRUE;
+
+ err = vkCreateDevice(*physical_device, &create_info, allocator, device);
+ check_vk_result(err);
+ vkGetDeviceQueue(*device, *queue_family_index, 0, queue);
+ }
+
+ // Create Descriptor Pool
+ {
+ VkDescriptorPoolSize pool_sizes[] = {
+ {VK_DESCRIPTOR_TYPE_SAMPLER, 1000},
+ {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1000},
+ {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1000},
+ {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1000},
+ {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1000},
+ {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1000},
+ {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1000},
+ {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1000},
+ {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1000},
+ {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, 1000},
+ {VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 1000}};
+ VkDescriptorPoolCreateInfo pool_info = {};
+ pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
+ pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
+ pool_info.maxSets = 1000 * IREE_ARRAYSIZE(pool_sizes);
+ pool_info.poolSizeCount = (uint32_t)IREE_ARRAYSIZE(pool_sizes);
+ pool_info.pPoolSizes = pool_sizes;
+ err =
+ vkCreateDescriptorPool(*device, &pool_info, allocator, descriptor_pool);
+ check_vk_result(err);
+ }
+}
+
+void SetupVulkanWindow(ImGui_ImplVulkanH_Window* wd,
+ const VkAllocationCallbacks* allocator,
+ VkInstance instance, uint32_t queue_family_index,
+ VkPhysicalDevice physical_device, VkDevice device,
+ VkSurfaceKHR surface, int width, int height,
+ uint32_t min_image_count) {
+ wd->Surface = surface;
+
+ // Check for WSI support
+ VkBool32 res;
+ vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, queue_family_index,
+ wd->Surface, &res);
+ if (res != VK_TRUE) {
+ fprintf(stderr, "Error no WSI support on physical device 0\n");
+ exit(-1);
+ }
+
+ // Select Surface Format
+ const VkFormat requestSurfaceImageFormat[] = {
+ VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_R8G8B8A8_UNORM,
+ VK_FORMAT_B8G8R8_UNORM, VK_FORMAT_R8G8B8_UNORM};
+ const VkColorSpaceKHR requestSurfaceColorSpace =
+ VK_COLORSPACE_SRGB_NONLINEAR_KHR;
+ wd->SurfaceFormat = ImGui_ImplVulkanH_SelectSurfaceFormat(
+ physical_device, wd->Surface, requestSurfaceImageFormat,
+ (size_t)IREE_ARRAYSIZE(requestSurfaceImageFormat),
+ requestSurfaceColorSpace);
+
+ // Select Present Mode
+#ifdef IMGUI_UNLIMITED_FRAME_RATE
+ VkPresentModeKHR present_modes[] = {VK_PRESENT_MODE_MAILBOX_KHR,
+ VK_PRESENT_MODE_IMMEDIATE_KHR,
+ VK_PRESENT_MODE_FIFO_KHR};
+#else
+ VkPresentModeKHR present_modes[] = {VK_PRESENT_MODE_FIFO_KHR};
+#endif
+ wd->PresentMode = ImGui_ImplVulkanH_SelectPresentMode(
+ physical_device, wd->Surface, &present_modes[0],
+ IREE_ARRAYSIZE(present_modes));
+
+ // Create SwapChain, RenderPass, Framebuffer, etc.
+ IM_ASSERT(min_image_count >= 2);
+ ImGui_ImplVulkanH_CreateOrResizeWindow(instance, physical_device, device, wd,
+ queue_family_index, allocator, width,
+ height, min_image_count);
+
+ // Set clear color.
+ ImVec4 clear_color = ImVec4(0.45f, 0.55f, 0.60f, 1.00f);
+ memcpy(&wd->ClearValue.color.float32[0], &clear_color, 4 * sizeof(float));
+}
+
+void RenderFrame(ImGui_ImplVulkanH_Window* wd, VkDevice device, VkQueue queue) {
+ VkResult err;
+
+ VkSemaphore image_acquired_semaphore =
+ wd->FrameSemaphores[wd->SemaphoreIndex].ImageAcquiredSemaphore;
+ VkSemaphore render_complete_semaphore =
+ wd->FrameSemaphores[wd->SemaphoreIndex].RenderCompleteSemaphore;
+ err = vkAcquireNextImageKHR(device, wd->Swapchain, UINT64_MAX,
+ image_acquired_semaphore, VK_NULL_HANDLE,
+ &wd->FrameIndex);
+ check_vk_result(err);
+
+ ImGui_ImplVulkanH_Frame* fd = &wd->Frames[wd->FrameIndex];
+ {
+ err = vkWaitForFences(
+ device, 1, &fd->Fence, VK_TRUE,
+ UINT64_MAX); // wait indefinitely instead of periodically checking
+ check_vk_result(err);
+
+ err = vkResetFences(device, 1, &fd->Fence);
+ check_vk_result(err);
+ }
+ {
+ err = vkResetCommandPool(device, fd->CommandPool, 0);
+ check_vk_result(err);
+ VkCommandBufferBeginInfo info = {};
+ info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ info.flags |= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+ err = vkBeginCommandBuffer(fd->CommandBuffer, &info);
+ check_vk_result(err);
+ }
+ {
+ VkRenderPassBeginInfo info = {};
+ info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
+ info.renderPass = wd->RenderPass;
+ info.framebuffer = fd->Framebuffer;
+ info.renderArea.extent.width = wd->Width;
+ info.renderArea.extent.height = wd->Height;
+ info.clearValueCount = 1;
+ info.pClearValues = &wd->ClearValue;
+ vkCmdBeginRenderPass(fd->CommandBuffer, &info, VK_SUBPASS_CONTENTS_INLINE);
+ }
+
+ // Record Imgui Draw Data and draw funcs into command buffer
+ ImGui_ImplVulkan_RenderDrawData(ImGui::GetDrawData(), fd->CommandBuffer);
+
+ // Submit command buffer
+ vkCmdEndRenderPass(fd->CommandBuffer);
+ {
+ VkPipelineStageFlags wait_stage =
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ VkSubmitInfo info = {};
+ info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ info.waitSemaphoreCount = 1;
+ info.pWaitSemaphores = &image_acquired_semaphore;
+ info.pWaitDstStageMask = &wait_stage;
+ info.commandBufferCount = 1;
+ info.pCommandBuffers = &fd->CommandBuffer;
+ info.signalSemaphoreCount = 1;
+ info.pSignalSemaphores = &render_complete_semaphore;
+
+ err = vkEndCommandBuffer(fd->CommandBuffer);
+ check_vk_result(err);
+ err = vkQueueSubmit(queue, 1, &info, fd->Fence);
+ check_vk_result(err);
+ }
+}
+
+void PresentFrame(ImGui_ImplVulkanH_Window* wd, VkQueue queue) {
+ VkSemaphore render_complete_semaphore =
+ wd->FrameSemaphores[wd->SemaphoreIndex].RenderCompleteSemaphore;
+ VkPresentInfoKHR info = {};
+ info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
+ info.waitSemaphoreCount = 1;
+ info.pWaitSemaphores = &render_complete_semaphore;
+ info.swapchainCount = 1;
+ info.pSwapchains = &wd->Swapchain;
+ info.pImageIndices = &wd->FrameIndex;
+ VkResult err = vkQueuePresentKHR(queue, &info);
+ check_vk_result(err);
+ wd->SemaphoreIndex =
+ (wd->SemaphoreIndex + 1) %
+ wd->ImageCount; // Now we can use the next set of semaphores
+}
+
+} // namespace iree
diff --git a/runtime/src/iree/testing/vulkan/vulkan_gui_util.h b/runtime/src/iree/testing/vulkan/vulkan_gui_util.h
new file mode 100644
index 0000000..2e7f158
--- /dev/null
+++ b/runtime/src/iree/testing/vulkan/vulkan_gui_util.h
@@ -0,0 +1,73 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_TESTING_VULKAN_VULKAN_GUI_UTIL_H_
+#define IREE_TESTING_VULKAN_VULKAN_GUI_UTIL_H_
+
+#include <SDL.h>
+#include <SDL_vulkan.h>
+#include <imgui.h>
+#include <imgui_impl_sdl.h>
+#include <imgui_impl_vulkan.h>
+#include <vulkan/vulkan.h>
+
+#include <vector>
+
+#include "iree/hal/vulkan/api.h"
+
+namespace iree {
+
+// Returns the names of the Vulkan instance layers needed for the given IREE
+// |vulkan_features|.
+std::vector<const char*> GetInstanceLayers(
+ iree_hal_vulkan_features_t vulkan_features);
+
+// Returns the names of the Vulkan instance extensions needed for the given IREE
+// |vulkan_features|.
+std::vector<const char*> GetInstanceExtensions(
+ SDL_Window* window, iree_hal_vulkan_features_t vulkan_features);
+
+// Initializes the Vulkan environment with the given |vulkan_features| and
+// layers/extensions, and writes various Vulkan handles. If errors occur, this
+// function asserts and aborts.
+//
+// This function creates Vulkan |instance|, selects a GPU and
+// |queue_family_index| with both graphics and compute bits, gets the
+// |physical_device|, creates a logical |device| from it, and creates a
+// |descriptor_pool|.
+void SetupVulkan(iree_hal_vulkan_features_t vulkan_features,
+ const char** instance_layers, uint32_t instance_layers_count,
+ const char** instance_extensions,
+ uint32_t instance_extensions_count,
+ const VkAllocationCallbacks* allocator, VkInstance* instance,
+ uint32_t* queue_family_index,
+ VkPhysicalDevice* physical_device, VkQueue* queue,
+ VkDevice* device, VkDescriptorPool* descriptor_pool);
+
+// Sets up a ImGui Vukan GUI window.
+//
+// This function creates surface, swapchain, framebuffer, and others in
+// prepration for rendering.
+void SetupVulkanWindow(ImGui_ImplVulkanH_Window* wd,
+ const VkAllocationCallbacks* allocator,
+ VkInstance instance, uint32_t queue_family_index,
+ VkPhysicalDevice physical_device, VkDevice device,
+ VkSurfaceKHR surface, int width, int height,
+ uint32_t min_image_count);
+
+// Renders the next frame of the ImGui Vulkan GUI window.
+//
+// This function acquires next swapchain image, creates a command buffer
+// containing a render pass for the next frame, and finally submits to the
+// queue.
+void RenderFrame(ImGui_ImplVulkanH_Window* wd, VkDevice device, VkQueue queue);
+
+// Presents the next frame of the ImGui Vukan GUI window.
+void PresentFrame(ImGui_ImplVulkanH_Window* wd, VkQueue queue);
+
+} // namespace iree
+
+#endif // IREE_TESTING_VULKAN_VULKAN_GUI_UTIL_H_
diff --git a/runtime/src/iree/vm/BUILD b/runtime/src/iree/vm/BUILD
new file mode 100644
index 0000000..34ededc
--- /dev/null
+++ b/runtime/src/iree/vm/BUILD
@@ -0,0 +1,335 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_cmake_extra_content", "iree_runtime_cc_library", "iree_runtime_cc_test")
+load("//build_tools/bazel:iree_bytecode_module.bzl", "iree_bytecode_module")
+load("//build_tools/bazel:cc_binary_benchmark.bzl", "cc_binary_benchmark")
+# load("@llvm-project//mlir:tblgen.bzl", "gentbl_cc_library")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+#===------------------------------------------------------------------------===#
+# Public API
+#===------------------------------------------------------------------------===#
+
+iree_runtime_cc_library(
+ name = "vm",
+ hdrs = [
+ "api.h",
+ ],
+ deps = [
+ ":impl",
+ "//runtime/src/iree/base",
+ ],
+)
+
+# TODO(benvanik): make these srcs and only expose an api_cc.h.
+iree_runtime_cc_library(
+ name = "cc",
+ hdrs = [
+ "native_module_cc.h",
+ "native_module_packing.h",
+ "ref_cc.h",
+ ],
+ deps = [
+ ":vm",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:cc",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base/internal:span",
+ ],
+)
+
+#===------------------------------------------------------------------------===#
+# Implementation
+#===------------------------------------------------------------------------===#
+
+iree_runtime_cc_library(
+ name = "impl",
+ srcs = [
+ "buffer.c",
+ "builtin_types.c",
+ "context.c",
+ "instance.c",
+ "invocation.c",
+ "list.c",
+ "module.c",
+ "native_module.c",
+ "ref.c",
+ "shims.c",
+ "stack.c",
+ ],
+ hdrs = [
+ "buffer.h",
+ "builtin_types.h",
+ "context.h",
+ "instance.h",
+ "invocation.h",
+ "list.h",
+ "module.h",
+ "native_module.h",
+ "ref.h",
+ "shims.h",
+ "stack.h",
+ "type_def.h",
+ "value.h",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "buffer_test",
+ srcs = ["buffer_test.cc"],
+ deps = [
+ ":cc",
+ ":impl",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "list_test",
+ srcs = ["list_test.cc"],
+ deps = [
+ ":cc",
+ ":impl",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "native_module_test",
+ srcs = ["native_module_test.cc"],
+ deps = [
+ ":cc",
+ ":impl",
+ ":native_module_test_hdrs",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:cc",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "native_module_test_hdrs",
+ hdrs = [
+ "native_module_test.h",
+ ],
+ deps = [
+ ":impl",
+ "//runtime/src/iree/base",
+ ],
+)
+
+cc_binary_benchmark(
+ name = "native_module_benchmark",
+ srcs = ["native_module_benchmark.cc"],
+ deps = [
+ ":impl",
+ ":native_module_test_hdrs",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:logging",
+ "//runtime/src/iree/testing:benchmark_main",
+ "@com_google_benchmark//:benchmark",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "ref_test",
+ srcs = ["ref_test.cc"],
+ deps = [
+ ":cc",
+ ":impl",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+iree_runtime_cc_test(
+ name = "stack_test",
+ srcs = ["stack_test.cc"],
+ deps = [
+ ":impl",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ ],
+)
+
+#===------------------------------------------------------------------------===#
+# Bytecode interpreter module
+#===------------------------------------------------------------------------===#
+
+iree_runtime_cc_library(
+ name = "bytecode_module",
+ srcs = [
+ "bytecode_disasm.c",
+ "bytecode_disasm.h",
+ "bytecode_dispatch.c",
+ "bytecode_dispatch_util.h",
+ "bytecode_module.c",
+ "bytecode_module_impl.h",
+ "generated/bytecode_op_table.h",
+ ],
+ hdrs = [
+ "bytecode_module.h",
+ ],
+ deps = [
+ ":ops",
+ ":vm",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:core_headers",
+ "//runtime/src/iree/base:tracing",
+ "//runtime/src/iree/base/internal",
+ "//runtime/src/iree/base/internal/flatcc:parsing",
+ "//runtime/src/iree/schemas:bytecode_module_def_c_fbs",
+ ],
+)
+
+# TODO(#357): Add a script to update bytecode_op_table.h.
+# gentbl_cc_library(
+# name = "bytecode_op_table_gen",
+# tbl_outs = [
+# (["-gen-iree-vm-op-table-defs"], "bytecode_op_table.h"),
+# ],
+# tblgen = "//iree/tools:iree-tblgen",
+# td_file = "//iree/compiler/Dialect/VM/IR:VMOps.td",
+# td_srcs = [
+# "//iree/compiler/Dialect/Util/IR:td_files",
+# "//iree/compiler/Dialect/VM/IR:td_files",
+# "@llvm-project//mlir:OpBaseTdFiles",
+# "@llvm-project//mlir:include/mlir/IR/SymbolInterfaces.td",
+# "@llvm-project//mlir:include/mlir/Interfaces/CallInterfaces.td",
+# "@llvm-project//mlir:include/mlir/Interfaces/ControlFlowInterfaces.td",
+# "@llvm-project//mlir:SideEffectTdFiles",
+# ],
+# )
+
+iree_cmake_extra_content(
+ content = """
+if(${IREE_BUILD_COMPILER})
+""",
+ inline = True,
+)
+
+iree_runtime_cc_test(
+ name = "bytecode_module_test",
+ srcs = [
+ "bytecode_dispatch_test.cc",
+ "bytecode_module_test.cc",
+ ],
+ tags = [
+ # TODO(benvanik): Fix type casting errors for --config=android_arm.
+ "notap",
+ ],
+ deps = [
+ ":bytecode_module",
+ ":vm",
+ "//runtime/src/iree/base:cc",
+ "//runtime/src/iree/base:logging",
+ "//runtime/src/iree/testing:gtest",
+ "//runtime/src/iree/testing:gtest_main",
+ "//runtime/src/iree/vm/test:all_bytecode_modules_c",
+ ],
+)
+
+cc_binary_benchmark(
+ name = "bytecode_module_benchmark",
+ testonly = True,
+ srcs = ["bytecode_module_benchmark.cc"],
+ deps = [
+ ":bytecode_module",
+ ":bytecode_module_benchmark_module_c",
+ ":vm",
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base:logging",
+ "//runtime/src/iree/testing:benchmark_main",
+ "@com_google_benchmark//:benchmark",
+ ],
+)
+
+iree_bytecode_module(
+ name = "bytecode_module_benchmark_module",
+ testonly = True,
+ src = "bytecode_module_benchmark.mlir",
+ c_identifier = "iree_vm_bytecode_module_benchmark_module",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+cc_binary_benchmark(
+ name = "bytecode_module_size_benchmark",
+ srcs = ["bytecode_module_size_benchmark.cc"],
+ deps = [
+ ":bytecode_module",
+ ":bytecode_module_size_benchmark_module_c",
+ ":vm",
+ "//runtime/src/iree/base",
+ ],
+)
+
+iree_bytecode_module(
+ name = "bytecode_module_size_benchmark_module",
+ testonly = True,
+ src = "bytecode_module_size_benchmark.mlir",
+ c_identifier = "iree_vm_bytecode_module_size_benchmark_module",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_cmake_extra_content(
+ content = """
+endif()
+""",
+ inline = True,
+)
+
+#===------------------------------------------------------------------------===#
+# Common VM op implementations
+#===------------------------------------------------------------------------===#
+
+iree_runtime_cc_library(
+ name = "ops",
+ hdrs = [
+ "ops.h",
+ ],
+ deps = [
+ "//runtime/src/iree/base",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "ops_emitc",
+ hdrs = [
+ "ops_emitc.h",
+ ],
+)
+
+iree_runtime_cc_library(
+ name = "shims_emitc",
+ hdrs = [
+ "shims_emitc.h",
+ ],
+ deps = [
+ ":impl",
+ "//runtime/src/iree/base:core_headers",
+ ],
+)
diff --git a/runtime/src/iree/vm/CMakeLists.txt b/runtime/src/iree/vm/CMakeLists.txt
new file mode 100644
index 0000000..ba43737
--- /dev/null
+++ b/runtime/src/iree/vm/CMakeLists.txt
@@ -0,0 +1,306 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/vm/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+ NAME
+ vm
+ HDRS
+ "api.h"
+ DEPS
+ ::impl
+ iree::base
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ cc
+ HDRS
+ "native_module_cc.h"
+ "native_module_packing.h"
+ "ref_cc.h"
+ DEPS
+ ::vm
+ iree::base
+ iree::base::cc
+ iree::base::core_headers
+ iree::base::internal::span
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ impl
+ HDRS
+ "buffer.h"
+ "builtin_types.h"
+ "context.h"
+ "instance.h"
+ "invocation.h"
+ "list.h"
+ "module.h"
+ "native_module.h"
+ "ref.h"
+ "shims.h"
+ "stack.h"
+ "type_def.h"
+ "value.h"
+ SRCS
+ "buffer.c"
+ "builtin_types.c"
+ "context.c"
+ "instance.c"
+ "invocation.c"
+ "list.c"
+ "module.c"
+ "native_module.c"
+ "ref.c"
+ "shims.c"
+ "stack.c"
+ DEPS
+ iree::base
+ iree::base::core_headers
+ iree::base::internal
+ iree::base::tracing
+ PUBLIC
+)
+
+iree_cc_test(
+ NAME
+ buffer_test
+ SRCS
+ "buffer_test.cc"
+ DEPS
+ ::cc
+ ::impl
+ iree::base
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_test(
+ NAME
+ list_test
+ SRCS
+ "list_test.cc"
+ DEPS
+ ::cc
+ ::impl
+ iree::base
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_test(
+ NAME
+ native_module_test
+ SRCS
+ "native_module_test.cc"
+ DEPS
+ ::cc
+ ::impl
+ ::native_module_test_hdrs
+ iree::base
+ iree::base::cc
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_library(
+ NAME
+ native_module_test_hdrs
+ HDRS
+ "native_module_test.h"
+ DEPS
+ ::impl
+ iree::base
+ PUBLIC
+)
+
+iree_cc_binary_benchmark(
+ NAME
+ native_module_benchmark
+ SRCS
+ "native_module_benchmark.cc"
+ DEPS
+ ::impl
+ ::native_module_test_hdrs
+ benchmark
+ iree::base
+ iree::base::logging
+ iree::testing::benchmark_main
+ TESTONLY
+)
+
+iree_cc_test(
+ NAME
+ ref_test
+ SRCS
+ "ref_test.cc"
+ DEPS
+ ::cc
+ ::impl
+ iree::base
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_test(
+ NAME
+ stack_test
+ SRCS
+ "stack_test.cc"
+ DEPS
+ ::impl
+ iree::base
+ iree::testing::gtest
+ iree::testing::gtest_main
+)
+
+iree_cc_library(
+ NAME
+ bytecode_module
+ HDRS
+ "bytecode_module.h"
+ SRCS
+ "bytecode_disasm.c"
+ "bytecode_disasm.h"
+ "bytecode_dispatch.c"
+ "bytecode_dispatch_util.h"
+ "bytecode_module.c"
+ "bytecode_module_impl.h"
+ "generated/bytecode_op_table.h"
+ DEPS
+ ::ops
+ ::vm
+ iree::base
+ iree::base::core_headers
+ iree::base::internal
+ iree::base::internal::flatcc::parsing
+ iree::base::tracing
+ iree::schemas::bytecode_module_def_c_fbs
+ PUBLIC
+)
+
+if(${IREE_BUILD_COMPILER})
+
+iree_cc_test(
+ NAME
+ bytecode_module_test
+ SRCS
+ "bytecode_dispatch_test.cc"
+ "bytecode_module_test.cc"
+ DEPS
+ ::bytecode_module
+ ::vm
+ iree::base::cc
+ iree::base::logging
+ iree::testing::gtest
+ iree::testing::gtest_main
+ iree::vm::test::all_bytecode_modules_c
+ LABELS
+ "notap"
+)
+
+iree_cc_binary_benchmark(
+ NAME
+ bytecode_module_benchmark
+ SRCS
+ "bytecode_module_benchmark.cc"
+ DEPS
+ ::bytecode_module
+ ::bytecode_module_benchmark_module_c
+ ::vm
+ benchmark
+ iree::base
+ iree::base::logging
+ iree::testing::benchmark_main
+ TESTONLY
+)
+
+iree_bytecode_module(
+ NAME
+ bytecode_module_benchmark_module
+ SRC
+ "bytecode_module_benchmark.mlir"
+ C_IDENTIFIER
+ "iree_vm_bytecode_module_benchmark_module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ TESTONLY
+ PUBLIC
+)
+
+iree_cc_binary_benchmark(
+ NAME
+ bytecode_module_size_benchmark
+ SRCS
+ "bytecode_module_size_benchmark.cc"
+ DEPS
+ ::bytecode_module
+ ::bytecode_module_size_benchmark_module_c
+ ::vm
+ iree::base
+ TESTONLY
+)
+
+iree_bytecode_module(
+ NAME
+ bytecode_module_size_benchmark_module
+ SRC
+ "bytecode_module_size_benchmark.mlir"
+ C_IDENTIFIER
+ "iree_vm_bytecode_module_size_benchmark_module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ TESTONLY
+ PUBLIC
+)
+
+endif()
+
+iree_cc_library(
+ NAME
+ ops
+ HDRS
+ "ops.h"
+ DEPS
+ iree::base
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ ops_emitc
+ HDRS
+ "ops_emitc.h"
+ DEPS
+
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
+ shims_emitc
+ HDRS
+ "shims_emitc.h"
+ DEPS
+ ::impl
+ iree::base::core_headers
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/vm/api.h b/runtime/src/iree/vm/api.h
new file mode 100644
index 0000000..3f559f7
--- /dev/null
+++ b/runtime/src/iree/vm/api.h
@@ -0,0 +1,25 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_API_H_
+#define IREE_VM_API_H_
+
+#include "iree/base/api.h"
+#include "iree/vm/buffer.h" // IWYU pragma: export
+#include "iree/vm/builtin_types.h" // IWYU pragma: export
+#include "iree/vm/context.h" // IWYU pragma: export
+#include "iree/vm/instance.h" // IWYU pragma: export
+#include "iree/vm/invocation.h" // IWYU pragma: export
+#include "iree/vm/list.h" // IWYU pragma: export
+#include "iree/vm/module.h" // IWYU pragma: export
+#include "iree/vm/native_module.h" // IWYU pragma: export
+#include "iree/vm/ref.h" // IWYU pragma: export
+#include "iree/vm/shims.h" // IWYU pragma: export
+#include "iree/vm/stack.h" // IWYU pragma: export
+#include "iree/vm/type_def.h" // IWYU pragma: export
+#include "iree/vm/value.h" // IWYU pragma: export
+
+#endif // IREE_VM_API_H_
diff --git a/runtime/src/iree/vm/buffer.c b/runtime/src/iree/vm/buffer.c
new file mode 100644
index 0000000..d433a9f
--- /dev/null
+++ b/runtime/src/iree/vm/buffer.c
@@ -0,0 +1,309 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/buffer.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+
+static iree_vm_ref_type_descriptor_t iree_vm_buffer_descriptor = {0};
+
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_vm_buffer, iree_vm_buffer_t);
+
+static iree_status_t iree_vm_buffer_map(const iree_vm_buffer_t* buffer,
+ iree_host_size_t offset,
+ iree_host_size_t length,
+ iree_host_size_t alignment,
+ uint8_t** out_data,
+ iree_host_size_t* out_data_length) {
+ // Force alignment.
+ offset &= ~(alignment - 1);
+ length &= ~(alignment - 1);
+ const iree_host_size_t end = offset + length;
+ if (IREE_UNLIKELY(end > buffer->data.data_length)) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "out-of-bounds access detected (offset=%zu, "
+ "length=%zu, alignment=%zu, buffer length=%zu)",
+ offset, length, alignment,
+ buffer->data.data_length);
+ }
+ *out_data = buffer->data.data + offset;
+ *out_data_length = length;
+ return iree_ok_status();
+}
+
+// Maps a subrange to a span of bytes within the |buffer| for read-only access.
+// |offset| and |length| must match the provided |alignment| (1, 2, 4, 8) and
+// will be rounded toward zero if they do not.
+static iree_status_t iree_vm_buffer_map_ro(const iree_vm_buffer_t* buffer,
+ iree_host_size_t offset,
+ iree_host_size_t length,
+ iree_host_size_t alignment,
+ iree_const_byte_span_t* out_span) {
+ // Always allowed regardless of access.
+ return iree_vm_buffer_map(buffer, offset, length, alignment,
+ (uint8_t**)&out_span->data, &out_span->data_length);
+}
+
+// Maps a subrange to a span of bytes within the |buffer| for read/write access.
+// |offset| and |length| must match the provided |alignment| (1, 2, 4, 8) and
+// will be rounded toward zero if they do not.
+static iree_status_t iree_vm_buffer_map_rw(const iree_vm_buffer_t* buffer,
+ iree_host_size_t offset,
+ iree_host_size_t length,
+ iree_host_size_t alignment,
+ iree_byte_span_t* out_span) {
+ // Buffer requires mutable access.
+ if (IREE_UNLIKELY(
+ !iree_all_bits_set(buffer->access, IREE_VM_BUFFER_ACCESS_MUTABLE))) {
+ return iree_make_status(
+ IREE_STATUS_PERMISSION_DENIED,
+ "buffer is read-only and cannot be mapped for mutation");
+ }
+ return iree_vm_buffer_map(buffer, offset, length, alignment, &out_span->data,
+ &out_span->data_length);
+}
+
+IREE_API_EXPORT void iree_vm_buffer_initialize(iree_vm_buffer_access_t access,
+ iree_byte_span_t data,
+ iree_allocator_t allocator,
+ iree_vm_buffer_t* out_buffer) {
+ IREE_ASSERT_ARGUMENT(out_buffer);
+ iree_atomic_ref_count_init(&out_buffer->ref_object.counter);
+ out_buffer->access = access;
+ out_buffer->data = data;
+ out_buffer->allocator = allocator;
+}
+
+IREE_API_EXPORT void iree_vm_buffer_deinitialize(iree_vm_buffer_t* buffer) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ iree_atomic_ref_count_abort_if_uses(&buffer->ref_object.counter);
+ iree_allocator_free(buffer->allocator, buffer->data.data);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_buffer_create(
+ iree_vm_buffer_access_t access, iree_host_size_t length,
+ iree_allocator_t allocator, iree_vm_buffer_t** out_buffer) {
+ IREE_ASSERT_ARGUMENT(out_buffer);
+ *out_buffer = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // The actual buffer payload is prefixed with the buffer type so we need only
+ // a single allocation.
+ iree_host_size_t prefix_size = iree_sizeof_struct(**out_buffer);
+ iree_host_size_t total_size = prefix_size + length;
+
+ // Allocate combined [prefix | buffer] memory.
+ uint8_t* data_ptr = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(allocator, total_size, (void**)&data_ptr));
+
+ // Initialize the prefix buffer handle.
+ iree_vm_buffer_t* buffer = (iree_vm_buffer_t*)data_ptr;
+ memset(data_ptr, 0, prefix_size - sizeof(*buffer)); // padding
+ iree_byte_span_t target_span =
+ iree_make_byte_span(data_ptr + prefix_size, length);
+ iree_vm_buffer_initialize(access, target_span, allocator, buffer);
+
+ *out_buffer = buffer;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void iree_vm_buffer_destroy(void* ptr) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Buffers are stored as [prefix | data]; freeing the prefix is all we need
+ // to do to free it all.
+ iree_vm_buffer_t* buffer = (iree_vm_buffer_t*)ptr;
+ iree_allocator_free(buffer->allocator, buffer);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT void iree_vm_buffer_retain(iree_vm_buffer_t* buffer) {
+ iree_vm_ref_object_retain(buffer, &iree_vm_buffer_descriptor);
+}
+
+IREE_API_EXPORT void iree_vm_buffer_release(iree_vm_buffer_t* buffer) {
+ iree_vm_ref_object_release(buffer, &iree_vm_buffer_descriptor);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_buffer_clone(
+ iree_vm_buffer_access_t access, const iree_vm_buffer_t* source_buffer,
+ iree_host_size_t source_offset, iree_host_size_t length,
+ iree_allocator_t allocator, iree_vm_buffer_t** out_buffer) {
+ IREE_ASSERT_ARGUMENT(source_buffer);
+ IREE_ASSERT_ARGUMENT(out_buffer);
+ *out_buffer = NULL;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Try to map the source buffer first; no use continuing if we can't read the
+ // data to clone.
+ iree_const_byte_span_t source_span;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_vm_buffer_map_ro(source_buffer, source_offset, length, 1,
+ &source_span));
+
+ // The actual buffer payload is prefixed with the buffer type so we need only
+ // a single allocation.
+ iree_host_size_t prefix_size =
+ iree_host_align(sizeof(iree_vm_buffer_t), iree_max_align_t);
+ iree_host_size_t total_size = prefix_size + source_span.data_length;
+
+ // Allocate combined [prefix | buffer] memory.
+ // NOTE: we are allocating without initialization here as we will be writing
+ // over all of it.
+ uint8_t* data_ptr = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc_uninitialized(allocator, total_size,
+ (void**)&data_ptr));
+
+ // Initialize the prefix buffer handle.
+ iree_vm_buffer_t* buffer = (iree_vm_buffer_t*)data_ptr;
+ memset(data_ptr, 0, prefix_size - sizeof(*buffer)); // padding
+ iree_byte_span_t target_span =
+ iree_make_byte_span(data_ptr + prefix_size, length);
+ iree_vm_buffer_initialize(access, target_span, allocator, buffer);
+
+ // Copy the data from the source buffer.
+ memcpy(target_span.data, source_span.data, target_span.data_length);
+
+ *out_buffer = buffer;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_host_size_t
+iree_vm_buffer_length(const iree_vm_buffer_t* buffer) {
+ IREE_ASSERT_ARGUMENT(buffer);
+ return buffer->data.data_length;
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_buffer_copy_bytes(
+ const iree_vm_buffer_t* source_buffer, iree_host_size_t source_offset,
+ const iree_vm_buffer_t* target_buffer, iree_host_size_t target_offset,
+ iree_host_size_t length) {
+ IREE_ASSERT_ARGUMENT(source_buffer);
+ IREE_ASSERT_ARGUMENT(target_buffer);
+ iree_const_byte_span_t source_span;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_map_ro(source_buffer, source_offset,
+ length, 1, &source_span));
+ iree_byte_span_t target_span;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_map_rw(target_buffer, target_offset,
+ length, 1, &target_span));
+ memcpy(target_span.data, source_span.data, length);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_buffer_compare_bytes(
+ const iree_vm_buffer_t* lhs_buffer, iree_host_size_t lhs_offset,
+ const iree_vm_buffer_t* rhs_buffer, iree_host_size_t rhs_offset,
+ iree_host_size_t length, bool* out_result) {
+ IREE_ASSERT_ARGUMENT(lhs_buffer);
+ IREE_ASSERT_ARGUMENT(rhs_buffer);
+ iree_const_byte_span_t lhs_span;
+ IREE_RETURN_IF_ERROR(
+ iree_vm_buffer_map_ro(lhs_buffer, lhs_offset, length, 1, &lhs_span));
+ iree_const_byte_span_t rhs_span;
+ IREE_RETURN_IF_ERROR(
+ iree_vm_buffer_map_ro(rhs_buffer, rhs_offset, length, 1, &rhs_span));
+ *out_result = memcmp(lhs_span.data, rhs_span.data, length) == 0;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_buffer_fill_bytes(
+ const iree_vm_buffer_t* target_buffer, iree_host_size_t target_offset,
+ iree_host_size_t length, uint8_t value) {
+ return iree_vm_buffer_fill_elements(target_buffer, target_offset, length, 1,
+ &value);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_buffer_fill_elements(
+ const iree_vm_buffer_t* target_buffer, iree_host_size_t target_offset,
+ iree_host_size_t element_count, iree_host_size_t element_length,
+ const void* value) {
+ IREE_ASSERT_ARGUMENT(target_buffer);
+ iree_byte_span_t span;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_map_rw(target_buffer, target_offset,
+ element_count * element_length,
+ element_length, &span));
+ switch (element_length) {
+ case 1: {
+ const uint8_t pattern_value = *(const uint8_t*)value;
+ memset(span.data, pattern_value, span.data_length);
+ } break;
+ case 2: {
+ const uint16_t pattern_value = *(const uint16_t*)value;
+ uint16_t* target_ptr = (uint16_t*)span.data;
+ for (iree_host_size_t i = 0; i < element_count; ++i) {
+ target_ptr[i] = pattern_value;
+ }
+ } break;
+ case 4: {
+ const uint32_t pattern_value = *(const uint32_t*)value;
+ uint32_t* target_ptr = (uint32_t*)span.data;
+ for (iree_host_size_t i = 0; i < element_count; ++i) {
+ target_ptr[i] = pattern_value;
+ }
+ } break;
+ case 8: {
+ const uint64_t pattern_value = *(const uint64_t*)value;
+ uint64_t* target_ptr = (uint64_t*)span.data;
+ for (iree_host_size_t i = 0; i < element_count; ++i) {
+ target_ptr[i] = pattern_value;
+ }
+ } break;
+ default:
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "invalid element length %d; expected one of [1, 2, 4, 8]",
+ (int)element_length);
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_buffer_read_elements(
+ const iree_vm_buffer_t* source_buffer, iree_host_size_t source_offset,
+ void* target_ptr, iree_host_size_t element_count,
+ iree_host_size_t element_length) {
+ IREE_ASSERT_ARGUMENT(source_buffer);
+ iree_const_byte_span_t source_span;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_map_ro(source_buffer, source_offset,
+ element_count * element_length,
+ element_length, &source_span));
+ memcpy(target_ptr, source_span.data, source_span.data_length);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_buffer_write_elements(
+ const void* source_ptr, const iree_vm_buffer_t* target_buffer,
+ iree_host_size_t target_offset, iree_host_size_t element_count,
+ iree_host_size_t element_length) {
+ IREE_ASSERT_ARGUMENT(source_ptr);
+ IREE_ASSERT_ARGUMENT(target_buffer);
+ iree_byte_span_t target_span;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_map_rw(target_buffer, target_offset,
+ element_count * element_length,
+ element_length, &target_span));
+ memcpy(target_span.data, source_ptr, target_span.data_length);
+ return iree_ok_status();
+}
+
+iree_status_t iree_vm_buffer_register_types(void) {
+ if (iree_vm_buffer_descriptor.type != IREE_VM_REF_TYPE_NULL) {
+ // Already registered.
+ return iree_ok_status();
+ }
+
+ iree_vm_buffer_descriptor.destroy = iree_vm_buffer_destroy;
+ iree_vm_buffer_descriptor.offsetof_counter =
+ offsetof(iree_vm_buffer_t, ref_object.counter);
+ iree_vm_buffer_descriptor.type_name = iree_make_cstring_view("vm.buffer");
+ return iree_vm_ref_register_type(&iree_vm_buffer_descriptor);
+}
diff --git a/runtime/src/iree/vm/buffer.h b/runtime/src/iree/vm/buffer.h
new file mode 100644
index 0000000..1667e49
--- /dev/null
+++ b/runtime/src/iree/vm/buffer.h
@@ -0,0 +1,191 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_BUFFER_H_
+#define IREE_VM_BUFFER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/vm/ref.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Describes where a byte buffer originates from, what guarantees can be made
+// about its lifetime and ownership, and how it may be accessed.
+// Note that buffers may always be read.
+enum iree_vm_buffer_access_bits_t {
+ // The guest is allowed to write to the buffer.
+ // If not specified the buffer is read-only.
+ IREE_VM_BUFFER_ACCESS_MUTABLE = 1u << 0,
+
+ // Buffer references memory in the module space (rodata or rwdata) that is
+ // guaranteed to be live for the lifetime of the module.
+ IREE_VM_BUFFER_ACCESS_ORIGIN_MODULE = 1u << 1,
+ // Buffer references memory created by the guest module code. It has a
+ // lifetime less than that of the module but is always tracked with proper
+ // references (a handle existing to the memory implies it is valid).
+ IREE_VM_BUFFER_ACCESS_ORIGIN_GUEST = 1u << 2,
+ // Buffer references external host memory with an unknown lifetime.
+ IREE_VM_BUFFER_ACCESS_ORIGIN_HOST = 1u << 3,
+};
+typedef uint32_t iree_vm_buffer_access_t;
+
+// A simple byte range with options for ownership and wrapping semantics.
+// The access flags indicate what access is allowed from the VM.
+// Buffers are fixed-length and may only contain primitive values.
+// For resizable lists with mixed element types and ref objects use
+// iree_vm_list_t.
+//
+// Note that because buffers are just bags of bytes endianness issues are very
+// likely depending on usage. In general IREE takes the stance that
+// little-endian is all that is practically relevant nowadays and big-endian
+// targets will need their own modules compiled with such a setting. This is to
+// avoid the significant amount of work trying to ensure cross-endian
+// correctness in things like packed .rodata, cross-device switching (host in
+// a different endianness than HAL device), etc.
+//
+// For stack-allocated buffers setup with iree_vm_buffer_initialize the
+// allocator provided will be used to free the data when the buffer is
+// deinitialized. It may be iree_allocator_null to indicate the data is unowned.
+//
+// For heap-allocated buffers created with iree_vm_buffer_create/clone/etc the
+// allocator is used to free the entire iree_vm_buffer_t and the co-allocated
+// buffer data that lives after it in memory.
+typedef struct iree_vm_buffer_t {
+ iree_vm_ref_object_t ref_object;
+ iree_vm_buffer_access_t access;
+ iree_byte_span_t data;
+ iree_allocator_t allocator;
+} iree_vm_buffer_t;
+
+// Initializes a buffer in-place with the given byte contents.
+// This can be used to avoid buffer allocation overhead when wrapping existing
+// buffers for API interop but buffer lifetime must be observed carefully by
+// the caller.
+//
+// Some systems may assume that the data is aligned to at least the natural
+// word size of the machine. If possible align to iree_max_align_t.
+//
+// |data| will be freed with |allocator| when the buffer is deinitialized.
+// If the data is not owned then iree_allocator_null can be used to no-op the
+// free.
+//
+// |access| can be used to control who (guest, host, etc) and how (read/write)
+// the buffer may be accessed. If the allocation being wrapped has its own
+// access requirements (read-only, etc) the caller must specify those flags.
+IREE_API_EXPORT void iree_vm_buffer_initialize(iree_vm_buffer_access_t access,
+ iree_byte_span_t data,
+ iree_allocator_t allocator,
+ iree_vm_buffer_t* out_buffer);
+
+// Deinitializes a buffer previously initialized in-place with
+// iree_vm_buffer_initialize. Invalid to call on a buffer that was allocated
+// on the heap via iree_vm_buffer_create. Aborts if there are still references
+// remaining.
+IREE_API_EXPORT void iree_vm_buffer_deinitialize(iree_vm_buffer_t* buffer);
+
+// Creates a new zero-initialized buffer of the given byte |length|.
+// The underlying storage buffer may be allocated larger to ensure alignment.
+// The allocated data will be aligned to iree_max_align_t.
+//
+// |access| can be used to control who (guest, host, etc) and how (read/write)
+// the buffer may be accessed.
+IREE_API_EXPORT iree_status_t iree_vm_buffer_create(
+ iree_vm_buffer_access_t access, iree_host_size_t length,
+ iree_allocator_t allocator, iree_vm_buffer_t** out_buffer);
+
+// Retains the given |buffer| for the caller.
+IREE_API_EXPORT void iree_vm_buffer_retain(iree_vm_buffer_t* buffer);
+
+// Releases the given |buffer| from the caller.
+IREE_API_EXPORT void iree_vm_buffer_release(iree_vm_buffer_t* buffer);
+
+// Clones a range of bytes in |source| to a new buffer.
+// The allocated data will be aligned to iree_max_align_t.
+//
+// |access| can be used to control who (guest, host, etc) and how (read/write)
+// the buffer may be accessed. As this returns a newly allocated buffer the
+// new access may be more permissive than the source buffer.
+IREE_API_EXPORT iree_status_t iree_vm_buffer_clone(
+ iree_vm_buffer_access_t access, const iree_vm_buffer_t* source_buffer,
+ iree_host_size_t source_offset, iree_host_size_t length,
+ iree_allocator_t allocator, iree_vm_buffer_t** out_buffer);
+
+// Returns the user-visible length of the buffer in bytes.
+IREE_API_EXPORT iree_host_size_t
+iree_vm_buffer_length(const iree_vm_buffer_t* buffer);
+
+// Returns the underlying data storage for the buffer.
+// WARNING: this performs no validation of the access allowance on the buffer
+// and the caller is responsible for all range checking. Use with caution and
+// prefer the utility methods instead.
+IREE_API_EXPORT iree_byte_span_t
+iree_vm_buffer_data(const iree_vm_buffer_t* buffer);
+
+// Copies a byte range from |source_buffer| to |target_buffer|.
+IREE_API_EXPORT iree_status_t iree_vm_buffer_copy_bytes(
+ const iree_vm_buffer_t* source_buffer, iree_host_size_t source_offset,
+ const iree_vm_buffer_t* target_buffer, iree_host_size_t target_offset,
+ iree_host_size_t length);
+
+// Compares |lhs_buffer| to |rhs_buffer| for bitwise equality.
+// |out_result| will receive 1 if the byte ranges are equal and 0 otherwise.
+IREE_API_EXPORT iree_status_t iree_vm_buffer_compare_bytes(
+ const iree_vm_buffer_t* lhs_buffer, iree_host_size_t lhs_offset,
+ const iree_vm_buffer_t* rhs_buffer, iree_host_size_t rhs_offset,
+ iree_host_size_t length, bool* out_result);
+
+// Fills a byte range of |target_buffer| with the byte pattern.
+IREE_API_EXPORT iree_status_t iree_vm_buffer_fill_bytes(
+ const iree_vm_buffer_t* target_buffer, iree_host_size_t target_offset,
+ iree_host_size_t length, uint8_t value);
+
+// Fills an element range of |buffer| with the given pattern.
+// Only |pattern_length| values with 1, 2, 4, or 8 bytes are supported.
+// The |target_offset|, in bytes, must match the alignment of the pattern.
+IREE_API_EXPORT iree_status_t iree_vm_buffer_fill_elements(
+ const iree_vm_buffer_t* target_buffer, iree_host_size_t target_offset,
+ iree_host_size_t element_count, iree_host_size_t element_length,
+ const void* value);
+
+// Reads |element_count| elements each of |element_length| bytes from the
+// |source_buffer| into |out_target_ptr|. The |source_offset|, in bytes, must be
+// aligned to at least the |element_length|.
+// This routine performs checks on bounds, alignment, and access rights.
+IREE_API_EXPORT iree_status_t iree_vm_buffer_read_elements(
+ const iree_vm_buffer_t* source_buffer, iree_host_size_t source_offset,
+ void* target_ptr, iree_host_size_t element_count,
+ iree_host_size_t element_length);
+
+// Writes |element_count| elements each of |element_length| bytes to the
+// |target_buffer| from |source_ptr|. The |target_offset|, in bytes, must be
+// aligned to at least the |element_length|.
+// This routine performs checks on bounds, alignment, and access rights.
+IREE_API_EXPORT iree_status_t iree_vm_buffer_write_elements(
+ const void* source_ptr, const iree_vm_buffer_t* target_buffer,
+ iree_host_size_t target_offset, iree_host_size_t element_count,
+ iree_host_size_t element_length);
+
+// Returns the a string view referencing the given |value| buffer.
+// The returned view will only be valid for as long as the buffer is live.
+static inline iree_string_view_t iree_vm_buffer_as_string(
+ const iree_vm_buffer_t* value) {
+ return value ? iree_make_string_view((const char*)value->data.data,
+ value->data.data_length)
+ : iree_string_view_empty();
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_vm_buffer, iree_vm_buffer_t);
+
+#endif // IREE_VM_BUFFER_H_
diff --git a/runtime/src/iree/vm/buffer_test.cc b/runtime/src/iree/vm/buffer_test.cc
new file mode 100644
index 0000000..f7b3029
--- /dev/null
+++ b/runtime/src/iree/vm/buffer_test.cc
@@ -0,0 +1,51 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/buffer.h"
+
+#include <cstddef>
+
+#include "iree/base/api.h"
+#include "iree/testing/gtest.h"
+#include "iree/vm/builtin_types.h"
+
+namespace {
+
+class VMBufferTest : public ::testing::Test {
+ protected:
+ static void SetUpTestSuite() {
+ IREE_CHECK_OK(iree_vm_register_builtin_types());
+ }
+};
+
+// Tests that the data allocator is correctly called when using stack
+// initialization of a buffer.
+TEST_F(VMBufferTest, Initialize) {
+ bool did_free = false;
+ iree_allocator_t test_allocator = {
+ /*.self=*/&did_free,
+ /*.ctl=*/
+ +[](void* self, iree_allocator_command_t command, const void* params,
+ void** inout_ptr) {
+ if (command == IREE_ALLOCATOR_COMMAND_FREE) {
+ *(bool*)self = true;
+ }
+ return iree_ok_status();
+ },
+ };
+
+ uint32_t data[] = {0, 1, 2, 3};
+ iree_vm_buffer_t buffer;
+ iree_vm_buffer_initialize(
+ IREE_VM_BUFFER_ACCESS_MUTABLE | IREE_VM_BUFFER_ACCESS_ORIGIN_HOST,
+ iree_make_byte_span(data, sizeof(data)), test_allocator, &buffer);
+
+ ASSERT_FALSE(did_free);
+ iree_vm_buffer_deinitialize(&buffer);
+ ASSERT_TRUE(did_free);
+}
+
+} // namespace
diff --git a/runtime/src/iree/vm/builtin_types.c b/runtime/src/iree/vm/builtin_types.c
new file mode 100644
index 0000000..6e133ae
--- /dev/null
+++ b/runtime/src/iree/vm/builtin_types.c
@@ -0,0 +1,16 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/builtin_types.h"
+
+iree_status_t iree_vm_buffer_register_types(void);
+iree_status_t iree_vm_list_register_types(void);
+
+IREE_API_EXPORT iree_status_t iree_vm_register_builtin_types(void) {
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_register_types());
+ IREE_RETURN_IF_ERROR(iree_vm_list_register_types());
+ return iree_ok_status();
+}
diff --git a/runtime/src/iree/vm/builtin_types.h b/runtime/src/iree/vm/builtin_types.h
new file mode 100644
index 0000000..b3e6890
--- /dev/null
+++ b/runtime/src/iree/vm/builtin_types.h
@@ -0,0 +1,24 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_BUILTIN_TYPES_H_
+#define IREE_VM_BUILTIN_TYPES_H_
+
+#include "iree/base/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Registers the builtin VM types. This must be called on startup. Safe to call
+// multiple times.
+IREE_API_EXPORT iree_status_t iree_vm_register_builtin_types(void);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_VM_BUILTIN_TYPES_H_
diff --git a/runtime/src/iree/vm/bytecode_disasm.c b/runtime/src/iree/vm/bytecode_disasm.c
new file mode 100644
index 0000000..f44c230
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode_disasm.c
@@ -0,0 +1,2249 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/bytecode_disasm.h"
+
+#include <inttypes.h>
+
+#include "iree/base/config.h"
+#include "iree/vm/ops.h"
+
+#define BEGIN_DISASM_PREFIX(op_name, ext) \
+ case IREE_VM_OP_CORE_##op_name: { \
+ switch (bytecode_data[pc++]) {
+#define END_DISASM_PREFIX() \
+ default: \
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED, \
+ "unhandled ext opcode"); \
+ } \
+ break; \
+ }
+#define UNHANDLED_DISASM_PREFIX(op_name, ext) \
+ case IREE_VM_OP_CORE_##op_name: { \
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED, \
+ "unhandled dispatch extension " #ext); \
+ }
+
+#define DISASM_OP(ext, op_name) case IREE_VM_OP_##ext##_##op_name:
+
+#define VM_ParseConstI8(name) \
+ OP_I8(0); \
+ ++pc;
+#define VM_ParseConstI32(name) \
+ OP_I32(0); \
+ pc += 4;
+#define VM_ParseConstI64(name) \
+ OP_I64(0); \
+ pc += 8;
+#define VM_ParseConstF32(name) \
+ OP_F32(0); \
+ pc += 4;
+#define VM_ParseConstF64(name) \
+ OP_F64(0); \
+ pc += 8;
+#define VM_ParseOpcode(opcode) VM_ParseConstI8(#opcode)
+#define VM_ParseFuncAttr(name) VM_ParseConstI32(name)
+#define VM_ParseGlobalAttr(name) VM_ParseConstI32(name)
+#define VM_ParseRodataAttr(name) VM_ParseConstI32(name)
+#define VM_ParseType(name) \
+ iree_vm_map_type(module, OP_I32(0)); \
+ pc += 4;
+#define VM_ParseTypeOf(name) VM_ParseType(name)
+#define VM_ParseIntAttr32(name) VM_ParseConstI32(name)
+#define VM_ParseIntAttr64(name) VM_ParseConstI64(name)
+#define VM_ParseFloatAttr32(name) VM_ParseConstF32(name)
+#define VM_ParseFloatAttr64(name) VM_ParseConstF64(name)
+#define VM_ParseStrAttr(name, out_str) \
+ (out_str)->size = (iree_host_size_t)OP_I16(0); \
+ (out_str)->data = (const char*)&bytecode_data[pc + 2]; \
+ pc += 2 + (out_str)->size;
+#define VM_ParseBranchTarget(block_name) VM_ParseConstI32(name)
+#define VM_ParseBranchOperands(operands_name) \
+ VM_DecBranchOperandsImpl(bytecode_data, &pc)
+#define VM_ParseOperandRegI32(name) \
+ OP_I16(0) & regs->i32_mask; \
+ pc += kRegSize;
+#define VM_ParseOperandRegI64(name) \
+ OP_I16(0) & (regs->i32_mask & ~1); \
+ pc += kRegSize;
+#define VM_ParseOperandRegF32(name) \
+ OP_I16(0) & regs->i32_mask; \
+ pc += kRegSize;
+#define VM_ParseOperandRegF64(name) \
+ OP_I16(0) & (regs->i32_mask & ~1); \
+ pc += kRegSize;
+#define VM_ParseOperandRegRef(name, out_is_move) \
+ OP_I16(0) & regs->ref_mask; \
+ *(out_is_move) = 0; /*= OP_I16(0) & IREE_REF_REGISTER_MOVE_BIT;*/ \
+ pc += kRegSize;
+#define VM_ParseVariadicOperands(name) \
+ VM_DecVariadicOperandsImpl(bytecode_data, &pc)
+#define VM_ParseResultRegI32(name) \
+ OP_I16(0) & regs->i32_mask; \
+ pc += kRegSize;
+#define VM_ParseResultRegI64(name) \
+ OP_I16(0) & (regs->i32_mask & ~1); \
+ pc += kRegSize;
+#define VM_ParseResultRegF32(name) \
+ OP_I16(0) & regs->i32_mask; \
+ pc += kRegSize;
+#define VM_ParseResultRegF64(name) \
+ OP_I16(0) & (regs->i32_mask & ~1); \
+ pc += kRegSize;
+#define VM_ParseResultRegRef(name, out_is_move) \
+ OP_I16(0) & regs->ref_mask; \
+ *(out_is_move) = 0; /*= OP_I16(0) & IREE_REF_REGISTER_MOVE_BIT;*/ \
+ pc += kRegSize;
+#define VM_ParseVariadicResults(name) VM_ParseVariadicOperands(name)
+
+#define EMIT_REG_NAME(reg) \
+ if ((reg)&IREE_REF_REGISTER_TYPE_BIT) { \
+ EMIT_REF_REG_NAME(reg); \
+ } else { \
+ EMIT_I32_REG_NAME(reg); \
+ }
+#define EMIT_I32_REG_NAME(reg) \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format( \
+ b, "%%i%u", ((reg)&IREE_I32_REGISTER_MASK)));
+#define EMIT_I64_REG_NAME(reg) \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format( \
+ b, "%%i%u:%u", ((reg)&IREE_I32_REGISTER_MASK), \
+ ((reg)&IREE_I32_REGISTER_MASK) + 1));
+#define EMIT_F32_REG_NAME(reg) EMIT_I32_REG_NAME(reg)
+#define EMIT_REF_REG_NAME(reg) \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format( \
+ b, "%%r%u", ((reg)&IREE_REF_REGISTER_MASK)));
+
+#define EMIT_REG_VALUE(regs, reg) \
+ if ((reg)&IREE_REF_REGISTER_TYPE_BIT) { \
+ iree_vm_ref_t* ref = &(regs)->ref[(reg)&IREE_REF_REGISTER_MASK]; \
+ if (iree_vm_ref_is_null(ref)) { \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "null")); \
+ } else { \
+ iree_string_view_t type_name = iree_vm_ref_type_name(ref->type); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format( \
+ b, "!%.*s/0x%p", (int)type_name.size, type_name.data, ref->ptr)); \
+ } \
+ } else { \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format( \
+ b, "%u", ((regs)->i32[(reg)&IREE_I32_REGISTER_MASK]))); \
+ }
+
+static iree_status_t iree_vm_bytecode_disasm_emit_type_name(
+ const iree_vm_type_def_t* type_def, iree_string_builder_t* b) {
+ if (iree_vm_type_def_is_value(type_def)) {
+ const char* type_name;
+ switch (type_def->value_type) {
+ case IREE_VM_VALUE_TYPE_I8:
+ type_name = "i8";
+ break;
+ case IREE_VM_VALUE_TYPE_I16:
+ type_name = "i16";
+ break;
+ case IREE_VM_VALUE_TYPE_I32:
+ type_name = "i32";
+ break;
+ case IREE_VM_VALUE_TYPE_I64:
+ type_name = "i64";
+ break;
+ case IREE_VM_VALUE_TYPE_F32:
+ type_name = "f32";
+ break;
+ case IREE_VM_VALUE_TYPE_F64:
+ type_name = "f64";
+ break;
+ default:
+ type_name = "unknown";
+ break;
+ }
+ return iree_string_builder_append_cstring(b, type_name);
+ } else if (iree_vm_type_def_is_ref(type_def)) {
+ iree_string_view_t type_name = iree_vm_ref_type_name(type_def->ref_type);
+ return iree_string_builder_append_format(b, "%.*s", (int)type_name.size,
+ type_name.data);
+ } else {
+ return iree_string_builder_append_cstring(b, "*");
+ }
+}
+#define EMIT_TYPE_NAME(type_def) \
+ iree_vm_bytecode_disasm_emit_type_name(type_def, b);
+
+static iree_status_t iree_vm_bytecode_disasm_emit_operand_list(
+ const iree_vm_registers_t* regs, const iree_vm_register_list_t* list,
+ iree_vm_bytecode_disasm_format_t format, iree_string_builder_t* b) {
+ bool include_values =
+ regs && (format & IREE_VM_BYTECODE_DISASM_FORMAT_INLINE_VALUES);
+ for (uint16_t i = 0; i < list->size; ++i) {
+ if (i > 0) {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ }
+ uint16_t reg = list->registers[i];
+ EMIT_REG_NAME(reg);
+ if (include_values) {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "("));
+ EMIT_REG_VALUE(regs, reg);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ")"));
+ }
+ }
+ return iree_ok_status();
+}
+#define EMIT_OPERAND_REG_LIST(reg_list) \
+ iree_vm_bytecode_disasm_emit_operand_list(regs, reg_list, format, b)
+static iree_status_t iree_vm_bytecode_disasm_emit_result_list(
+ const iree_vm_register_list_t* list,
+ iree_vm_bytecode_disasm_format_t format, iree_string_builder_t* b) {
+ for (uint16_t i = 0; i < list->size; ++i) {
+ if (i > 0) {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ }
+ uint16_t reg = list->registers[i];
+ EMIT_REG_NAME(reg);
+ }
+ return iree_ok_status();
+}
+#define EMIT_RESULT_REG_LIST(reg_list) \
+ iree_vm_bytecode_disasm_emit_result_list(reg_list, format, b)
+static iree_status_t iree_vm_bytecode_disasm_emit_remap_list(
+ const iree_vm_registers_t* regs,
+ const iree_vm_register_remap_list_t* remap_list,
+ iree_vm_bytecode_disasm_format_t format, iree_string_builder_t* b) {
+ bool include_values =
+ regs && (format & IREE_VM_BYTECODE_DISASM_FORMAT_INLINE_VALUES);
+ for (uint16_t i = 0; i < remap_list->size; ++i) {
+ if (i > 0) {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ }
+ EMIT_REG_NAME(remap_list->pairs[i].src_reg);
+ if (include_values) {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "("));
+ EMIT_REG_VALUE(regs, remap_list->pairs[i].src_reg);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ")"));
+ }
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "->"));
+ EMIT_REG_NAME(remap_list->pairs[i].dst_reg);
+ }
+ return iree_ok_status();
+}
+#define EMIT_REMAP_LIST(remap_list) \
+ iree_vm_bytecode_disasm_emit_remap_list(regs, remap_list, format, b)
+
+#define EMIT_OPTIONAL_VALUE_I32(expr) \
+ if (regs && (format & IREE_VM_BYTECODE_DISASM_FORMAT_INLINE_VALUES)) { \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(b, "(%" PRId32 ")", \
+ (int32_t)(expr))); \
+ }
+#define EMIT_OPTIONAL_VALUE_I64(expr) \
+ if (regs && (format & IREE_VM_BYTECODE_DISASM_FORMAT_INLINE_VALUES)) { \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format( \
+ b, "(%" PRId64 ")", *(int64_t*)&(expr))); \
+ }
+#define EMIT_OPTIONAL_VALUE_F32(expr) \
+ if (regs && (format & IREE_VM_BYTECODE_DISASM_FORMAT_INLINE_VALUES)) { \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, "(%f)", *(float*)&(expr))); \
+ }
+#define EMIT_OPTIONAL_VALUE_F64(expr) \
+ if (regs && (format & IREE_VM_BYTECODE_DISASM_FORMAT_INLINE_VALUES)) { \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, "(%f)", *(double*)&(expr))); \
+ }
+#define EMIT_OPTIONAL_VALUE_REF(expr) \
+ if (regs && (format & IREE_VM_BYTECODE_DISASM_FORMAT_INLINE_VALUES)) { \
+ iree_vm_ref_t* ref = (expr); \
+ if (iree_vm_ref_is_null(ref)) { \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "(null)")); \
+ } else { \
+ iree_string_view_t type_name = iree_vm_ref_type_name(ref->type); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format( \
+ b, "(!%.*s/0x%p)", (int)type_name.size, type_name.data, ref->ptr)); \
+ } \
+ }
+
+#define DISASM_OP_CORE_UNARY_I32(op_name, op_mnemonic) \
+ DISASM_OP(CORE, op_name) { \
+ uint16_t operand_reg = VM_ParseOperandRegI32("operand"); \
+ uint16_t result_reg = VM_ParseResultRegI32("result"); \
+ EMIT_I32_REG_NAME(result_reg); \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, " = %s ", op_mnemonic)); \
+ EMIT_I32_REG_NAME(operand_reg); \
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[operand_reg]); \
+ break; \
+ }
+
+#define DISASM_OP_CORE_BINARY_I32(op_name, op_mnemonic) \
+ DISASM_OP(CORE, op_name) { \
+ uint16_t lhs_reg = VM_ParseOperandRegI32("lhs"); \
+ uint16_t rhs_reg = VM_ParseOperandRegI32("rhs"); \
+ uint16_t result_reg = VM_ParseResultRegI32("result"); \
+ EMIT_I32_REG_NAME(result_reg); \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, " = %s ", op_mnemonic)); \
+ EMIT_I32_REG_NAME(lhs_reg); \
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[lhs_reg]); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); \
+ EMIT_I32_REG_NAME(rhs_reg); \
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[rhs_reg]); \
+ break; \
+ }
+
+#define DISASM_OP_CORE_TERNARY_I32(op_name, op_mnemonic) \
+ DISASM_OP(CORE, op_name) { \
+ uint16_t a_reg = VM_ParseOperandRegI32("a"); \
+ uint16_t b_reg = VM_ParseOperandRegI32("b"); \
+ uint16_t c_reg = VM_ParseOperandRegI32("c"); \
+ uint16_t result_reg = VM_ParseResultRegI32("result"); \
+ EMIT_I32_REG_NAME(result_reg); \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, " = %s ", op_mnemonic)); \
+ EMIT_I32_REG_NAME(a_reg); \
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[a_reg]); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); \
+ EMIT_I32_REG_NAME(b_reg); \
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[b_reg]); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); \
+ EMIT_I32_REG_NAME(c_reg); \
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[c_reg]); \
+ break; \
+ }
+
+#define DISASM_OP_EXT_I64_UNARY_I64(op_name, op_mnemonic) \
+ DISASM_OP(EXT_I64, op_name) { \
+ uint16_t operand_reg = VM_ParseOperandRegI64("operand"); \
+ uint16_t result_reg = VM_ParseResultRegI64("result"); \
+ EMIT_I64_REG_NAME(result_reg); \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, " = %s ", op_mnemonic)); \
+ EMIT_I64_REG_NAME(operand_reg); \
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[operand_reg]); \
+ break; \
+ }
+
+#define DISASM_OP_EXT_I64_BINARY_I64(op_name, op_mnemonic) \
+ DISASM_OP(EXT_I64, op_name) { \
+ uint16_t lhs_reg = VM_ParseOperandRegI64("lhs"); \
+ uint16_t rhs_reg = VM_ParseOperandRegI64("rhs"); \
+ uint16_t result_reg = VM_ParseResultRegI64("result"); \
+ EMIT_I64_REG_NAME(result_reg); \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, " = %s ", op_mnemonic)); \
+ EMIT_I64_REG_NAME(lhs_reg); \
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[lhs_reg]); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); \
+ EMIT_I64_REG_NAME(rhs_reg); \
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[rhs_reg]); \
+ break; \
+ }
+
+#define DISASM_OP_EXT_I64_TERNARY_I64(op_name, op_mnemonic) \
+ DISASM_OP(EXT_I64, op_name) { \
+ uint16_t a_reg = VM_ParseOperandRegI64("a"); \
+ uint16_t b_reg = VM_ParseOperandRegI64("b"); \
+ uint16_t c_reg = VM_ParseOperandRegI64("c"); \
+ uint16_t result_reg = VM_ParseResultRegI64("result"); \
+ EMIT_I64_REG_NAME(result_reg); \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, " = %s ", op_mnemonic)); \
+ EMIT_I64_REG_NAME(a_reg); \
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[a_reg]); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); \
+ EMIT_I64_REG_NAME(b_reg); \
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[b_reg]); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); \
+ EMIT_I64_REG_NAME(c_reg); \
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[c_reg]); \
+ break; \
+ }
+
+#define DISASM_OP_EXT_F32_UNARY_F32(op_name, op_mnemonic) \
+ DISASM_OP(EXT_F32, op_name) { \
+ uint16_t operand_reg = VM_ParseOperandRegF32("operand"); \
+ uint16_t result_reg = VM_ParseResultRegF32("result"); \
+ EMIT_F32_REG_NAME(result_reg); \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, " = %s ", op_mnemonic)); \
+ EMIT_F32_REG_NAME(operand_reg); \
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[operand_reg]); \
+ break; \
+ }
+
+#define DISASM_OP_EXT_F32_BINARY_F32(op_name, op_mnemonic) \
+ DISASM_OP(EXT_F32, op_name) { \
+ uint16_t lhs_reg = VM_ParseOperandRegF32("lhs"); \
+ uint16_t rhs_reg = VM_ParseOperandRegF32("rhs"); \
+ uint16_t result_reg = VM_ParseResultRegF32("result"); \
+ EMIT_F32_REG_NAME(result_reg); \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, " = %s ", op_mnemonic)); \
+ EMIT_F32_REG_NAME(lhs_reg); \
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[lhs_reg]); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); \
+ EMIT_F32_REG_NAME(rhs_reg); \
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[rhs_reg]); \
+ break; \
+ }
+
+#define DISASM_OP_EXT_F32_TERNARY_F32(op_name, op_mnemonic) \
+ DISASM_OP(EXT_F32, op_name) { \
+ uint16_t a_reg = VM_ParseOperandRegF32("a"); \
+ uint16_t b_reg = VM_ParseOperandRegF32("b"); \
+ uint16_t c_reg = VM_ParseOperandRegF32("c"); \
+ uint16_t result_reg = VM_ParseResultRegF32("result"); \
+ EMIT_F32_REG_NAME(result_reg); \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, " = %s ", op_mnemonic)); \
+ EMIT_F32_REG_NAME(a_reg); \
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[a_reg]); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); \
+ EMIT_F32_REG_NAME(b_reg); \
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[b_reg]); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); \
+ EMIT_F32_REG_NAME(c_reg); \
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[c_reg]); \
+ break; \
+ }
+
+iree_status_t iree_vm_bytecode_disasm_op(
+ iree_vm_bytecode_module_t* module,
+ iree_vm_bytecode_module_state_t* module_state, uint16_t function_ordinal,
+ iree_vm_source_offset_t pc, const iree_vm_registers_t* regs,
+ iree_vm_bytecode_disasm_format_t format, iree_string_builder_t* b) {
+ const uint8_t* IREE_RESTRICT bytecode_data =
+ module->bytecode_data.data +
+ module->function_descriptor_table[function_ordinal].bytecode_offset;
+
+ switch (bytecode_data[pc++]) {
+ //===------------------------------------------------------------------===//
+ // Globals
+ //===------------------------------------------------------------------===//
+
+ DISASM_OP(CORE, GlobalLoadI32) {
+ uint32_t byte_offset = VM_ParseGlobalAttr("global");
+ uint16_t value_reg = VM_ParseResultRegI32("value");
+ EMIT_I32_REG_NAME(value_reg);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, " = vm.global.load.i32 .rwdata[%u]", byte_offset));
+ EMIT_OPTIONAL_VALUE_I32(
+ vm_global_load_i32(module_state->rwdata_storage.data, byte_offset));
+ break;
+ }
+
+ DISASM_OP(CORE, GlobalStoreI32) {
+ uint32_t byte_offset = VM_ParseGlobalAttr("global");
+ uint16_t value_reg = VM_ParseOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, "vm.global.store.i32 "));
+ EMIT_I32_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[value_reg]);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, ", .rwdata[%u]", byte_offset));
+ break;
+ }
+
+ DISASM_OP(CORE, GlobalLoadIndirectI32) {
+ uint16_t byte_offset_reg = VM_ParseOperandRegI32("global");
+ uint16_t value_reg = VM_ParseResultRegI32("value");
+ EMIT_I32_REG_NAME(value_reg);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(
+ b, " = vm.global.load.indirect.i32 .rwdata["));
+ EMIT_I32_REG_NAME(byte_offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[byte_offset_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "]"));
+ EMIT_OPTIONAL_VALUE_I32(vm_global_load_i32(
+ module_state->rwdata_storage.data, regs->i32[byte_offset_reg]));
+ break;
+ }
+
+ DISASM_OP(CORE, GlobalStoreIndirectI32) {
+ uint16_t byte_offset_reg = VM_ParseOperandRegI32("global");
+ uint16_t value_reg = VM_ParseOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(
+ b, "vm.global.store.indirect.i32 "));
+ EMIT_I32_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[value_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", .rwdata["));
+ EMIT_I32_REG_NAME(byte_offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[byte_offset_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "]"));
+ break;
+ }
+
+ DISASM_OP(CORE, GlobalLoadRef) {
+ uint32_t global = VM_ParseGlobalAttr("global");
+ const iree_vm_type_def_t* type_def = VM_ParseTypeOf("value");
+ bool result_is_move;
+ uint16_t result_reg = VM_ParseResultRegRef("value", &result_is_move);
+ EMIT_REF_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, " = vm.global.load.ref .refs[%u]", global));
+ EMIT_OPTIONAL_VALUE_REF(&module_state->global_ref_table[global]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " : !"));
+ EMIT_TYPE_NAME(type_def);
+ break;
+ }
+
+ DISASM_OP(CORE, GlobalStoreRef) {
+ uint32_t global = VM_ParseGlobalAttr("global");
+ const iree_vm_type_def_t* type_def = VM_ParseTypeOf("value");
+ bool value_is_move;
+ uint16_t value_reg = VM_ParseOperandRegRef("value", &value_is_move);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.global.store.ref "));
+ EMIT_REF_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[value_reg]);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, ", .refs[%u] : !", global));
+ EMIT_TYPE_NAME(type_def);
+ break;
+ }
+
+ DISASM_OP(CORE, GlobalLoadIndirectRef) {
+ uint16_t global_reg = VM_ParseOperandRegI32("global");
+ const iree_vm_type_def_t* type_def = VM_ParseTypeOf("value");
+ bool result_is_move;
+ uint16_t result_reg = VM_ParseResultRegRef("value", &result_is_move);
+ EMIT_REF_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(
+ b, " = vm.global.load.indirect.ref .refs["));
+ EMIT_I32_REG_NAME(global_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[global_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "]"));
+ EMIT_OPTIONAL_VALUE_REF(
+ &module_state->global_ref_table[regs->i32[global_reg]]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " : !"));
+ EMIT_TYPE_NAME(type_def);
+ break;
+ }
+
+ DISASM_OP(CORE, GlobalStoreIndirectRef) {
+ uint16_t global_reg = VM_ParseOperandRegI32("global");
+ const iree_vm_type_def_t* type_def = VM_ParseTypeOf("value");
+ bool value_is_move;
+ uint16_t value_reg = VM_ParseOperandRegRef("value", &value_is_move);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, "vm.global.store.indirect.ref "));
+ EMIT_REF_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[value_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(b, ", .refs["));
+ EMIT_I32_REG_NAME(global_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[global_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(b, "] : !"));
+ EMIT_TYPE_NAME(type_def);
+ break;
+ }
+
+ //===------------------------------------------------------------------===//
+ // Constants
+ //===------------------------------------------------------------------===//
+
+ DISASM_OP(CORE, ConstI32) {
+ int32_t value = VM_ParseIntAttr32("value");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, " = vm.const.i32 %d // 0x%08X", value, value));
+ break;
+ }
+
+ DISASM_OP(CORE, ConstI32Zero) {
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.const.i32.zero"));
+ break;
+ }
+
+ DISASM_OP(CORE, ConstRefZero) {
+ bool result_is_move;
+ uint16_t result_reg = VM_ParseResultRegRef("result", &result_is_move);
+ EMIT_REF_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.const.ref.zero"));
+ break;
+ }
+
+ DISASM_OP(CORE, ConstRefRodata) {
+ uint32_t rodata_ordinal = VM_ParseRodataAttr("rodata");
+ bool result_is_move;
+ uint16_t result_reg = VM_ParseResultRegRef("value", &result_is_move);
+ iree_vm_buffer_t* buffer =
+ &module_state->rodata_ref_table[rodata_ordinal];
+ EMIT_REF_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, " = vm.const.ref.rodata %u // 0x%p %" PRIhsz "b", rodata_ordinal,
+ buffer->data.data, buffer->data.data_length));
+ break;
+ }
+
+ //===------------------------------------------------------------------===//
+ // Buffers
+ //===------------------------------------------------------------------===//
+
+ DISASM_OP(CORE, BufferAlloc) {
+ uint16_t length_reg = VM_ParseOperandRegI32("length");
+ bool result_is_move;
+ uint16_t result_reg = VM_ParseResultRegRef("result", &result_is_move);
+ EMIT_REF_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.buffer.alloc "));
+ EMIT_I32_REG_NAME(length_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[length_reg]);
+ break;
+ }
+
+ DISASM_OP(CORE, BufferClone) {
+ bool source_is_move;
+ uint16_t source_reg = VM_ParseOperandRegRef("source", &source_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("offset");
+ uint16_t length_reg = VM_ParseOperandRegI32("length");
+ bool result_is_move;
+ uint16_t result_reg = VM_ParseResultRegRef("result", &result_is_move);
+ EMIT_REF_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.buffer.clone "));
+ EMIT_REF_REG_NAME(source_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[source_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(length_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[length_reg]);
+ break;
+ }
+
+ DISASM_OP(CORE, BufferLength) {
+ bool buffer_is_move;
+ uint16_t buffer_reg = VM_ParseOperandRegRef("buffer", &buffer_is_move);
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.buffer.length "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ break;
+ }
+
+ DISASM_OP(CORE, BufferCopy) {
+ bool source_buffer_is_move;
+ uint16_t source_buffer_reg =
+ VM_ParseOperandRegRef("source_buffer", &source_buffer_is_move);
+ uint16_t source_offset_reg = VM_ParseOperandRegI32("source_offset");
+ bool target_buffer_is_move;
+ uint16_t target_buffer_reg =
+ VM_ParseOperandRegRef("target_buffer", &target_buffer_is_move);
+ uint16_t target_offset_reg = VM_ParseOperandRegI32("target_offset");
+ uint16_t length_reg = VM_ParseOperandRegI32("length");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.buffer.copy "));
+ EMIT_REF_REG_NAME(source_buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[source_buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(source_offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[source_offset_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(target_buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[target_buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(target_offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[target_offset_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(length_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[length_reg]);
+ break;
+ }
+
+ DISASM_OP(CORE, BufferCompare) {
+ bool lhs_buffer_is_move;
+ uint16_t lhs_buffer_reg =
+ VM_ParseOperandRegRef("lhs_buffer", &lhs_buffer_is_move);
+ uint16_t lhs_offset_reg = VM_ParseOperandRegI32("lhs_offset");
+ bool rhs_buffer_is_move;
+ uint16_t rhs_buffer_reg =
+ VM_ParseOperandRegRef("rhs_buffer", &rhs_buffer_is_move);
+ uint16_t rhs_offset_reg = VM_ParseOperandRegI32("rhs_offset");
+ uint16_t length_reg = VM_ParseOperandRegI32("length");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.buffer.compare "));
+ EMIT_REF_REG_NAME(lhs_buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[lhs_buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(lhs_offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[lhs_offset_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(rhs_buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[rhs_buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(rhs_offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[rhs_offset_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(length_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[length_reg]);
+ break;
+ }
+
+ DISASM_OP(CORE, BufferFillI8) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("target_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("target_offset");
+ uint16_t length_reg = VM_ParseOperandRegI32("length");
+ uint16_t value_reg = VM_ParseOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.buffer.fill.i8 "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(length_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[length_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_I32((uint8_t)regs->i32[value_reg]);
+ break;
+ }
+ DISASM_OP(CORE, BufferFillI16) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("target_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("target_offset");
+ uint16_t length_reg = VM_ParseOperandRegI32("length");
+ uint16_t value_reg = VM_ParseOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.buffer.fill.i16 "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg] / sizeof(uint16_t));
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(length_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[length_reg] / sizeof(uint16_t));
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_I32((uint16_t)regs->i32[value_reg]);
+ break;
+ }
+ DISASM_OP(CORE, BufferFillI32) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("target_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("target_offset");
+ uint16_t length_reg = VM_ParseOperandRegI32("length");
+ uint16_t value_reg = VM_ParseOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.buffer.fill.i32 "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg] / sizeof(uint32_t));
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(length_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[length_reg] / sizeof(uint32_t));
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[value_reg]);
+ break;
+ }
+
+ DISASM_OP(CORE, BufferLoadI8U) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("source_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("source_offset");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.buffer.load.i8.u "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg]);
+ break;
+ }
+ DISASM_OP(CORE, BufferLoadI8S) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("source_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("source_offset");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.buffer.load.i8.s "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg]);
+ break;
+ }
+ DISASM_OP(CORE, BufferLoadI16U) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("source_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("source_offset");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.buffer.load.i16.u "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg] / sizeof(uint16_t));
+ break;
+ }
+ DISASM_OP(CORE, BufferLoadI16S) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("source_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("source_offset");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.buffer.load.i16.s "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg] / sizeof(uint16_t));
+ break;
+ }
+ DISASM_OP(CORE, BufferLoadI32) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("source_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("source_offset");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.buffer.load.i32 "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg] / sizeof(uint32_t));
+ break;
+ }
+
+ DISASM_OP(CORE, BufferStoreI8) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("target_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("target_offset");
+ uint16_t value_reg = VM_ParseOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.buffer.store.i8 "));
+ EMIT_I32_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_I32((uint8_t)regs->i32[value_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg]);
+ break;
+ }
+ DISASM_OP(CORE, BufferStoreI16) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("target_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("target_offset");
+ uint16_t value_reg = VM_ParseOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.buffer.store.i16 "));
+ EMIT_I32_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_I32((uint16_t)regs->i32[value_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg] / sizeof(uint16_t));
+ break;
+ }
+ DISASM_OP(CORE, BufferStoreI32) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("target_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("target_offset");
+ uint16_t value_reg = VM_ParseOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.buffer.store.i32 "));
+ EMIT_I32_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[value_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg] / sizeof(uint32_t));
+ break;
+ }
+
+ //===------------------------------------------------------------------===//
+ // Lists
+ //===------------------------------------------------------------------===//
+
+ DISASM_OP(CORE, ListAlloc) {
+ const iree_vm_type_def_t* element_type_def =
+ VM_ParseTypeOf("element_type");
+ uint16_t initial_capacity_reg = VM_ParseOperandRegI32("initial_capacity");
+ bool result_is_move;
+ uint16_t result_reg = VM_ParseResultRegRef("result", &result_is_move);
+ EMIT_REF_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.list.alloc "));
+ EMIT_I32_REG_NAME(initial_capacity_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[initial_capacity_reg]);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " : !vm.list<"));
+ EMIT_TYPE_NAME(element_type_def);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ">"));
+ break;
+ }
+
+ DISASM_OP(CORE, ListReserve) {
+ bool list_is_move;
+ uint16_t list_reg = VM_ParseOperandRegRef("list", &list_is_move);
+ uint16_t minimum_capacity_reg = VM_ParseOperandRegI32("minimum_capacity");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.list.reserve "));
+ EMIT_REF_REG_NAME(list_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[list_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(minimum_capacity_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[minimum_capacity_reg]);
+ break;
+ }
+
+ DISASM_OP(CORE, ListSize) {
+ bool list_is_move;
+ uint16_t list_reg = VM_ParseOperandRegRef("list", &list_is_move);
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.list.size "));
+ EMIT_REF_REG_NAME(list_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[list_reg]);
+ break;
+ }
+
+ DISASM_OP(CORE, ListResize) {
+ bool list_is_move;
+ uint16_t list_reg = VM_ParseOperandRegRef("list", &list_is_move);
+ uint16_t new_size_reg = VM_ParseOperandRegI32("new_size");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.list.resize "));
+ EMIT_REF_REG_NAME(list_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[list_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(new_size_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[new_size_reg]);
+ break;
+ }
+
+ DISASM_OP(CORE, ListGetI32) {
+ bool list_is_move;
+ uint16_t list_reg = VM_ParseOperandRegRef("list", &list_is_move);
+ uint16_t index_reg = VM_ParseOperandRegI32("index");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.list.get.i32 "));
+ EMIT_REF_REG_NAME(list_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[list_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(index_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]);
+ break;
+ }
+
+ DISASM_OP(CORE, ListSetI32) {
+ bool list_is_move;
+ uint16_t list_reg = VM_ParseOperandRegRef("list", &list_is_move);
+ uint16_t index_reg = VM_ParseOperandRegI32("index");
+ uint16_t raw_value_reg = VM_ParseOperandRegI32("raw_value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.list.set.i32 "));
+ EMIT_REF_REG_NAME(list_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[list_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(index_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(raw_value_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[raw_value_reg]);
+ break;
+ }
+
+ DISASM_OP(CORE, ListGetRef) {
+ bool list_is_move;
+ uint16_t list_reg = VM_ParseOperandRegRef("list", &list_is_move);
+ uint16_t index_reg = VM_ParseOperandRegI32("index");
+ const iree_vm_type_def_t* type_def = VM_ParseTypeOf("result");
+ bool result_is_move;
+ uint16_t result_reg = VM_ParseResultRegRef("result", &result_is_move);
+ EMIT_REF_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.list.get.ref "));
+ EMIT_REF_REG_NAME(list_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[list_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(index_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]);
+ EMIT_TYPE_NAME(type_def);
+ break;
+ }
+
+ DISASM_OP(CORE, ListSetRef) {
+ bool list_is_move;
+ uint16_t list_reg = VM_ParseOperandRegRef("list", &list_is_move);
+ uint16_t index_reg = VM_ParseOperandRegI32("index");
+ bool operand_is_move;
+ uint16_t operand_reg = VM_ParseOperandRegRef("value", &operand_is_move);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.list.set.ref "));
+ EMIT_REF_REG_NAME(list_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[list_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(index_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(operand_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[operand_reg]);
+ break;
+ }
+
+ //===------------------------------------------------------------------===//
+ // Conditional assignment
+ //===------------------------------------------------------------------===//
+
+ DISASM_OP(CORE, SelectI32) {
+ uint16_t condition_reg = VM_ParseOperandRegI32("condition");
+ uint16_t true_value_reg = VM_ParseOperandRegI32("true_value");
+ uint16_t false_value_reg = VM_ParseOperandRegI32("false_value");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.select.i32 "));
+ EMIT_I32_REG_NAME(condition_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[condition_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " ? "));
+ EMIT_I32_REG_NAME(true_value_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[true_value_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " : "));
+ EMIT_I32_REG_NAME(false_value_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[false_value_reg]);
+ break;
+ }
+
+ DISASM_OP(CORE, SelectRef) {
+ uint16_t condition_reg = VM_ParseOperandRegI32("condition");
+ const iree_vm_type_def_t* type_def = VM_ParseTypeOf("true_value");
+ bool true_value_is_move;
+ uint16_t true_value_reg =
+ VM_ParseOperandRegRef("true_value", &true_value_is_move);
+ bool false_value_is_move;
+ uint16_t false_value_reg =
+ VM_ParseOperandRegRef("false_value", &false_value_is_move);
+ bool result_is_move;
+ uint16_t result_reg = VM_ParseResultRegRef("result", &result_is_move);
+ EMIT_REF_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.select.ref "));
+ EMIT_I32_REG_NAME(condition_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[condition_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " ? "));
+ EMIT_REF_REG_NAME(true_value_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[true_value_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " : "));
+ EMIT_REF_REG_NAME(false_value_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[false_value_reg]);
+ EMIT_TYPE_NAME(type_def);
+ break;
+ }
+
+ DISASM_OP(CORE, SwitchI32) {
+ uint16_t index_reg = VM_ParseOperandRegI32("index");
+ int32_t default_value = VM_ParseIntAttr32("default_value");
+ const iree_vm_register_list_t* value_reg_list =
+ VM_ParseVariadicOperands("values");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.switch.i32 "));
+ EMIT_I32_REG_NAME(index_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "["));
+ EMIT_OPERAND_REG_LIST(value_reg_list);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, "] else %u", default_value));
+ break;
+ }
+
+ DISASM_OP(CORE, SwitchRef) {
+ uint16_t index_reg = VM_ParseOperandRegI32("index");
+ bool default_is_move;
+ uint16_t default_value_reg =
+ VM_ParseOperandRegRef("default_value", &default_is_move);
+ const iree_vm_register_list_t* value_reg_list =
+ VM_ParseVariadicOperands("values");
+ bool result_is_move;
+ uint16_t result_reg = VM_ParseResultRegRef("result", &result_is_move);
+ EMIT_REF_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.switch.ref "));
+ EMIT_I32_REG_NAME(index_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "["));
+ EMIT_OPERAND_REG_LIST(value_reg_list);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "] else "));
+ EMIT_REF_REG_NAME(default_value_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[default_value_reg]);
+ break;
+ }
+
+ //===------------------------------------------------------------------===//
+ // Native integer arithmetic
+ //===------------------------------------------------------------------===//
+
+ DISASM_OP_CORE_BINARY_I32(AddI32, "vm.add.i32");
+ DISASM_OP_CORE_BINARY_I32(SubI32, "vm.sub.i32");
+ DISASM_OP_CORE_BINARY_I32(MulI32, "vm.mul.i32");
+ DISASM_OP_CORE_BINARY_I32(DivI32S, "vm.div.i32.s");
+ DISASM_OP_CORE_BINARY_I32(DivI32U, "vm.div.i32.u");
+ DISASM_OP_CORE_BINARY_I32(RemI32S, "vm.rem.i32.s");
+ DISASM_OP_CORE_BINARY_I32(RemI32U, "vm.rem.i32.u");
+ DISASM_OP_CORE_TERNARY_I32(FMAI32, "vm.fma.i32");
+ DISASM_OP_CORE_UNARY_I32(NotI32, "vm.not.i32");
+ DISASM_OP_CORE_BINARY_I32(AndI32, "vm.and.i32");
+ DISASM_OP_CORE_BINARY_I32(OrI32, "vm.or.i32");
+ DISASM_OP_CORE_BINARY_I32(XorI32, "vm.xor.i32");
+
+ //===------------------------------------------------------------------===//
+ // Casting and type conversion/emulation
+ //===------------------------------------------------------------------===//
+
+ DISASM_OP_CORE_UNARY_I32(TruncI32I8, "vm.trunc.i32.i8");
+ DISASM_OP_CORE_UNARY_I32(TruncI32I16, "vm.trunc.i32.i16");
+ DISASM_OP_CORE_UNARY_I32(ExtI8I32S, "vm.ext.i8.i32.s");
+ DISASM_OP_CORE_UNARY_I32(ExtI8I32U, "vm.ext.i8.i32.u");
+ DISASM_OP_CORE_UNARY_I32(ExtI16I32S, "vm.ext.i16.i32.s");
+ DISASM_OP_CORE_UNARY_I32(ExtI16I32U, "vm.ext.i16.i32.u");
+
+ //===------------------------------------------------------------------===//
+ // Native bitwise shifts and rotates
+ //===------------------------------------------------------------------===//
+
+#define DISASM_OP_CORE_SHIFT_I32(op_name, op_mnemonic) \
+ DISASM_OP(CORE, op_name) { \
+ uint16_t operand_reg = VM_ParseOperandRegI32("operand"); \
+ uint16_t amount_reg = VM_ParseOperandRegI32("amount"); \
+ uint16_t result_reg = VM_ParseResultRegI32("result"); \
+ EMIT_I32_REG_NAME(result_reg); \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, " = %s ", op_mnemonic)); \
+ EMIT_I32_REG_NAME(operand_reg); \
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[operand_reg]); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); \
+ EMIT_I32_REG_NAME(amount_reg); \
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[amount_reg]); \
+ break; \
+ }
+
+ DISASM_OP_CORE_SHIFT_I32(ShlI32, "vm.shl.i32");
+ DISASM_OP_CORE_SHIFT_I32(ShrI32S, "vm.shr.i32.s");
+ DISASM_OP_CORE_SHIFT_I32(ShrI32U, "vm.shr.i32.u");
+
+ //===------------------------------------------------------------------===//
+ // Comparison ops
+ //===------------------------------------------------------------------===//
+
+ DISASM_OP_CORE_BINARY_I32(CmpEQI32, "vm.cmp.eq.i32");
+ DISASM_OP_CORE_BINARY_I32(CmpNEI32, "vm.cmp.ne.i32");
+ DISASM_OP_CORE_BINARY_I32(CmpLTI32S, "vm.cmp.lt.i32.s");
+ DISASM_OP_CORE_BINARY_I32(CmpLTI32U, "vm.cmp.lt.i32.u");
+ DISASM_OP_CORE_UNARY_I32(CmpNZI32, "vm.cmp.nz.i32");
+
+ DISASM_OP(CORE, CmpEQRef) {
+ bool lhs_is_move;
+ uint16_t lhs_reg = VM_ParseOperandRegRef("lhs", &lhs_is_move);
+ bool rhs_is_move;
+ uint16_t rhs_reg = VM_ParseOperandRegRef("rhs", &rhs_is_move);
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.cmp.eq.ref "));
+ EMIT_REF_REG_NAME(lhs_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[lhs_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(rhs_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[rhs_reg]);
+ break;
+ }
+ DISASM_OP(CORE, CmpNERef) {
+ bool lhs_is_move;
+ uint16_t lhs_reg = VM_ParseOperandRegRef("lhs", &lhs_is_move);
+ bool rhs_is_move;
+ uint16_t rhs_reg = VM_ParseOperandRegRef("rhs", &rhs_is_move);
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.cmp.ne.ref "));
+ EMIT_REF_REG_NAME(lhs_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[lhs_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(rhs_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[rhs_reg]);
+ break;
+ }
+ DISASM_OP(CORE, CmpNZRef) {
+ bool operand_is_move;
+ uint16_t operand_reg = VM_ParseOperandRegRef("operand", &operand_is_move);
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.cmp.nz.ref "));
+ EMIT_REF_REG_NAME(operand_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[operand_reg]);
+ break;
+ }
+
+ //===------------------------------------------------------------------===//
+ // Control flow
+ //===------------------------------------------------------------------===//
+
+ DISASM_OP(CORE, Branch) {
+ int32_t block_pc = VM_ParseBranchTarget("dest");
+ const iree_vm_register_remap_list_t* remap_list =
+ VM_ParseBranchOperands("operands");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, "vm.br ^%08X(", block_pc));
+ EMIT_REMAP_LIST(remap_list);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ")"));
+ break;
+ }
+
+ DISASM_OP(CORE, CondBranch) {
+ uint16_t condition_reg = VM_ParseOperandRegI32("condition");
+ int32_t true_block_pc = VM_ParseBranchTarget("true_dest");
+ const iree_vm_register_remap_list_t* true_remap_list =
+ VM_ParseBranchOperands("true_operands");
+ int32_t false_block_pc = VM_ParseBranchTarget("false_dest");
+ const iree_vm_register_remap_list_t* false_remap_list =
+ VM_ParseBranchOperands("false_operands");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.cond_br "));
+ EMIT_I32_REG_NAME(condition_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[condition_reg]);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, ", ^%08X(", true_block_pc));
+ EMIT_REMAP_LIST(true_remap_list);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, "), ^%08X(", false_block_pc));
+ EMIT_REMAP_LIST(false_remap_list);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ")"));
+ break;
+ }
+
+ DISASM_OP(CORE, Call) {
+ int32_t function_ordinal = VM_ParseFuncAttr("callee");
+ const iree_vm_register_list_t* src_reg_list =
+ VM_ParseVariadicOperands("operands");
+ const iree_vm_register_list_t* dst_reg_list =
+ VM_ParseVariadicResults("results");
+ if (dst_reg_list->size > 0) {
+ EMIT_RESULT_REG_LIST(dst_reg_list);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " = "));
+ }
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "vm.call @"));
+ int is_import = (function_ordinal & 0x80000000u) != 0;
+ iree_vm_function_t function;
+ if (is_import) {
+ const iree_vm_bytecode_import_t* import =
+ &module_state->import_table[function_ordinal & 0x7FFFFFFFu];
+ function = import->function;
+ } else {
+ function.module = &module->interface;
+ function.linkage = IREE_VM_FUNCTION_LINKAGE_INTERNAL;
+ function.ordinal = function_ordinal;
+ }
+ if (function.module) {
+ iree_string_view_t module_name = iree_vm_module_name(function.module);
+ iree_string_view_t func_name = iree_vm_function_name(&function);
+ if (iree_string_view_is_empty(func_name)) {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, "%.*s:%u", (int)module_name.size, module_name.data,
+ function.ordinal));
+ } else {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, "%.*s.%.*s", (int)module_name.size, module_name.data,
+ (int)func_name.size, func_name.data));
+ }
+ } else {
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "{{UNRESOLVED}}"));
+ }
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "("));
+ EMIT_OPERAND_REG_LIST(src_reg_list);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ")"));
+ break;
+ }
+
+ DISASM_OP(CORE, CallVariadic) {
+ int32_t function_ordinal = VM_ParseFuncAttr("callee");
+ // TODO(benvanik): print segment sizes.
+ // const iree_vm_register_list_t* segment_size_list =
+ VM_ParseVariadicOperands("segment_sizes");
+ const iree_vm_register_list_t* src_reg_list =
+ VM_ParseVariadicOperands("operands");
+ const iree_vm_register_list_t* dst_reg_list =
+ VM_ParseVariadicResults("results");
+ if (dst_reg_list->size > 0) {
+ EMIT_RESULT_REG_LIST(dst_reg_list);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " = "));
+ }
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.call.varadic @"));
+ int is_import = (function_ordinal & 0x80000000u) != 0;
+ iree_vm_function_t function;
+ if (is_import) {
+ const iree_vm_bytecode_import_t* import =
+ &module_state->import_table[function_ordinal & 0x7FFFFFFFu];
+ function = import->function;
+ } else {
+ function.module = &module->interface;
+ function.linkage = IREE_VM_FUNCTION_LINKAGE_INTERNAL;
+ function.ordinal = function_ordinal;
+ }
+ iree_string_view_t module_name = iree_vm_module_name(function.module);
+ iree_string_view_t func_name = iree_vm_function_name(&function);
+ if (iree_string_view_is_empty(func_name)) {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, "%.*s:%u", (int)module_name.size, module_name.data,
+ function.ordinal));
+ } else {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, "%.*s.%.*s", (int)module_name.size, module_name.data,
+ (int)func_name.size, func_name.data));
+ }
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "("));
+ EMIT_OPERAND_REG_LIST(src_reg_list);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ")"));
+ break;
+ }
+
+ DISASM_OP(CORE, Return) {
+ const iree_vm_register_list_t* src_reg_list =
+ VM_ParseVariadicOperands("operands");
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "vm.return "));
+ EMIT_OPERAND_REG_LIST(src_reg_list);
+ break;
+ }
+
+ DISASM_OP(CORE, Fail) {
+ uint16_t status_code_reg = VM_ParseOperandRegI32("status");
+ iree_string_view_t message;
+ VM_ParseStrAttr("message", &message);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "vm.fail "));
+ EMIT_I32_REG_NAME(status_code_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[status_code_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, ", \"%.*s\"", (int)message.size, message.data));
+ break;
+ }
+
+ DISASM_OP(CORE, ImportResolved) {
+ int32_t function_ordinal = VM_ParseFuncAttr("import");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "= vm.import.exists @"));
+ int is_import = (function_ordinal & 0x80000000u) != 0;
+ if (IREE_UNLIKELY(!is_import)) {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, "{{INVALID ORDINAL %d}}", function_ordinal));
+ break;
+ }
+ uint32_t import_ordinal = function_ordinal & 0x7FFFFFFFu;
+ if (IREE_UNLIKELY(import_ordinal >= module_state->import_count)) {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, "{{OUT OF RANGE ORDINAL %u}}", import_ordinal));
+ break;
+ }
+ iree_vm_function_t decl_function;
+ IREE_RETURN_IF_ERROR(iree_vm_module_lookup_function_by_ordinal(
+ &module->interface, IREE_VM_FUNCTION_LINKAGE_IMPORT_OPTIONAL,
+ import_ordinal, &decl_function));
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_string(
+ b, iree_vm_function_name(&decl_function)));
+ const iree_vm_bytecode_import_t* import =
+ &module_state->import_table[import_ordinal];
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(
+ b, import->function.module != NULL ? " // (resolved)"
+ : " // (unresolved)"));
+ break;
+ }
+
+ //===------------------------------------------------------------------===//
+ // Async/fiber ops
+ //===------------------------------------------------------------------===//
+
+ DISASM_OP(CORE, Yield) {
+ int32_t block_pc = VM_DecBranchTarget("dest");
+ const iree_vm_register_remap_list_t* remap_list =
+ VM_ParseBranchOperands("operands");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, "vm.yield ^%08X(", block_pc));
+ EMIT_REMAP_LIST(remap_list);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ")"));
+ break;
+ }
+
+ //===------------------------------------------------------------------===//
+ // Debugging
+ //===------------------------------------------------------------------===//
+
+ DISASM_OP(CORE, Trace) {
+ iree_string_view_t event_name;
+ VM_ParseStrAttr("event_name", &event_name);
+ const iree_vm_register_list_t* src_reg_list =
+ VM_ParseVariadicOperands("operands");
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, "vm.trace \"%.*s\"(", (int)event_name.size, event_name.data));
+ EMIT_OPERAND_REG_LIST(src_reg_list);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ")"));
+ break;
+ }
+
+ DISASM_OP(CORE, Print) {
+ iree_string_view_t event_name;
+ VM_ParseStrAttr("event_name", &event_name);
+ const iree_vm_register_list_t* src_reg_list =
+ VM_ParseVariadicOperands("operands");
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, "vm.print \"%.*s\"(", (int)event_name.size, event_name.data));
+ EMIT_OPERAND_REG_LIST(src_reg_list);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ")"));
+ break;
+ }
+
+ DISASM_OP(CORE, Break) {
+ int32_t block_pc = VM_DecBranchTarget("dest");
+ const iree_vm_register_remap_list_t* remap_list =
+ VM_ParseBranchOperands("operands");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, "vm.break ^%08X(", block_pc));
+ EMIT_REMAP_LIST(remap_list);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ")"));
+ break;
+ }
+
+ DISASM_OP(CORE, CondBreak) {
+ uint16_t condition_reg = VM_ParseOperandRegI32("condition");
+ int32_t block_pc = VM_ParseBranchTarget("dest");
+ const iree_vm_register_remap_list_t* remap_list =
+ VM_ParseBranchOperands("operands");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.cond_break "));
+ EMIT_I32_REG_NAME(condition_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[condition_reg]);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, ", ^%08X(", block_pc));
+ EMIT_REMAP_LIST(remap_list);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ")"));
+ break;
+ }
+
+ //===------------------------------------------------------------------===//
+ // Extension trampolines
+ //===------------------------------------------------------------------===//
+
+#if IREE_VM_EXT_I64_ENABLE
+ BEGIN_DISASM_PREFIX(PrefixExtI64, EXT_I64)
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Globals
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP(EXT_I64, GlobalLoadI64) {
+ uint32_t byte_offset = VM_ParseGlobalAttr("global");
+ uint16_t value_reg = VM_ParseResultRegI64("value");
+ EMIT_I32_REG_NAME(value_reg);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, " = vm.global.load.i64 .rwdata[%u]", byte_offset));
+ EMIT_OPTIONAL_VALUE_I64(module_state->rwdata_storage.data[byte_offset]);
+ break;
+ }
+
+ DISASM_OP(EXT_I64, GlobalStoreI64) {
+ uint32_t byte_offset = VM_ParseGlobalAttr("global");
+ uint16_t value_reg = VM_ParseOperandRegI64("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, "vm.global.store.i64 "));
+ EMIT_I64_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[value_reg]);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, ", .rwdata[%u]", byte_offset));
+ break;
+ }
+
+ DISASM_OP(EXT_I64, GlobalLoadIndirectI64) {
+ uint16_t byte_offset_reg = VM_ParseOperandRegI32("global");
+ uint16_t value_reg = VM_ParseResultRegI64("value");
+ EMIT_I64_REG_NAME(value_reg);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(
+ b, " = vm.global.load.indirect.i64 .rwdata["));
+ EMIT_I32_REG_NAME(byte_offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[byte_offset_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "]"));
+ EMIT_OPTIONAL_VALUE_I64(
+ module_state->rwdata_storage.data[regs->i32[byte_offset_reg]]);
+ break;
+ }
+
+ DISASM_OP(EXT_I64, GlobalStoreIndirectI64) {
+ uint16_t byte_offset_reg = VM_ParseOperandRegI32("global");
+ uint16_t value_reg = VM_ParseOperandRegI64("value");
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(
+ b, "vm.global.store.indirect.i64 "));
+ EMIT_I64_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[value_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", .rwdata["));
+ EMIT_I32_REG_NAME(byte_offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[byte_offset_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "]"));
+ break;
+ }
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Constants
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP(EXT_I64, ConstI64) {
+ int64_t value = VM_ParseIntAttr64("value");
+ uint16_t result_reg = VM_ParseResultRegI64("result");
+ EMIT_I64_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, " = vm.const.i64 %" PRId64 " // 0x%08" PRIX64 "", value, value));
+ break;
+ }
+
+ DISASM_OP(EXT_I64, ConstI64Zero) {
+ uint16_t result_reg = VM_ParseResultRegI64("result");
+ EMIT_I64_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.const.i64.zero"));
+ break;
+ }
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Lists
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP(EXT_I64, ListGetI64) {
+ bool list_is_move;
+ uint16_t list_reg = VM_ParseOperandRegRef("list", &list_is_move);
+ uint16_t index_reg = VM_ParseOperandRegI32("index");
+ uint16_t result_reg = VM_ParseResultRegI64("result");
+ EMIT_I64_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.list.get.i64 "));
+ EMIT_REF_REG_NAME(list_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[list_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(index_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]);
+ break;
+ }
+
+ DISASM_OP(EXT_I64, ListSetI64) {
+ bool list_is_move;
+ uint16_t list_reg = VM_ParseOperandRegRef("list", &list_is_move);
+ uint16_t index_reg = VM_ParseOperandRegI32("index");
+ uint16_t value_reg = VM_ParseOperandRegI64("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.list.set.i64 "));
+ EMIT_REF_REG_NAME(list_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[list_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(index_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I64_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[value_reg]);
+ break;
+ }
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Conditional assignment
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP(EXT_I64, SelectI64) {
+ uint16_t condition_reg = VM_ParseOperandRegI32("condition");
+ uint16_t true_value_reg = VM_ParseOperandRegI64("true_value");
+ uint16_t false_value_reg = VM_ParseOperandRegI64("false_value");
+ uint16_t result_reg = VM_ParseResultRegI64("result");
+ EMIT_I64_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.select.i64 "));
+ EMIT_I32_REG_NAME(condition_reg);
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[condition_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " ? "));
+ EMIT_I64_REG_NAME(true_value_reg);
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[true_value_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " : "));
+ EMIT_I64_REG_NAME(false_value_reg);
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[false_value_reg]);
+ break;
+ }
+
+ DISASM_OP(EXT_I64, SwitchI64) {
+ uint16_t index_reg = VM_ParseOperandRegI32("index");
+ int64_t default_value = VM_ParseIntAttr64("default_value");
+ const iree_vm_register_list_t* value_reg_list =
+ VM_ParseVariadicOperands("values");
+ uint16_t result_reg = VM_ParseResultRegI64("result");
+ EMIT_I64_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.switch.i64 "));
+ EMIT_I32_REG_NAME(index_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "["));
+ EMIT_OPERAND_REG_LIST(value_reg_list);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, "] else %" PRId64, default_value));
+ break;
+ }
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Native integer arithmetic
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP_EXT_I64_BINARY_I64(AddI64, "vm.add.i64");
+ DISASM_OP_EXT_I64_BINARY_I64(SubI64, "vm.sub.i64");
+ DISASM_OP_EXT_I64_BINARY_I64(MulI64, "vm.mul.i64");
+ DISASM_OP_EXT_I64_BINARY_I64(DivI64S, "vm.div.i64.s");
+ DISASM_OP_EXT_I64_BINARY_I64(DivI64U, "vm.div.i64.u");
+ DISASM_OP_EXT_I64_BINARY_I64(RemI64S, "vm.rem.i64.s");
+ DISASM_OP_EXT_I64_BINARY_I64(RemI64U, "vm.rem.i64.u");
+ DISASM_OP_EXT_I64_TERNARY_I64(FMAI64, "vm.fma.i64");
+ DISASM_OP_EXT_I64_UNARY_I64(NotI64, "vm.not.i64");
+ DISASM_OP_EXT_I64_BINARY_I64(AndI64, "vm.and.i64");
+ DISASM_OP_EXT_I64_BINARY_I64(OrI64, "vm.or.i64");
+ DISASM_OP_EXT_I64_BINARY_I64(XorI64, "vm.xor.i64");
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Casting and type conversion/emulation
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP(EXT_I64, TruncI64I32) {
+ uint16_t operand_reg = VM_ParseOperandRegI64("operand");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.trunc.i64.i32 "));
+ EMIT_I64_REG_NAME(operand_reg);
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[operand_reg]);
+ break;
+ }
+ DISASM_OP(EXT_I64, ExtI32I64S) {
+ uint16_t operand_reg = VM_ParseOperandRegI32("operand");
+ uint16_t result_reg = VM_ParseResultRegI64("result");
+ EMIT_I64_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.ext.i32.i64.s "));
+ EMIT_I64_REG_NAME(operand_reg);
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[operand_reg]);
+ break;
+ }
+ DISASM_OP(EXT_I64, ExtI32I64U) {
+ uint16_t operand_reg = VM_ParseOperandRegI32("operand");
+ uint16_t result_reg = VM_ParseResultRegI64("result");
+ EMIT_I64_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.ext.i32.i64.u "));
+ EMIT_I64_REG_NAME(operand_reg);
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[operand_reg]);
+ break;
+ }
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Native bitwise shifts and rotates
+ //===----------------------------------------------------------------===//
+
+#define DISASM_OP_EXT_I64_SHIFT_I64(op_name, op_mnemonic) \
+ DISASM_OP(EXT_I64, op_name) { \
+ uint16_t operand_reg = VM_ParseOperandRegI64("operand"); \
+ uint16_t amount_reg = VM_ParseOperandRegI32("amount"); \
+ uint16_t result_reg = VM_ParseResultRegI64("result"); \
+ EMIT_I64_REG_NAME(result_reg); \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, " = %s ", op_mnemonic)); \
+ EMIT_I64_REG_NAME(operand_reg); \
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[operand_reg]); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); \
+ EMIT_I32_REG_NAME(amount_reg); \
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[amount_reg]); \
+ break; \
+ }
+
+ DISASM_OP_EXT_I64_SHIFT_I64(ShlI64, "vm.shl.i64");
+ DISASM_OP_EXT_I64_SHIFT_I64(ShrI64S, "vm.shr.i64.s");
+ DISASM_OP_EXT_I64_SHIFT_I64(ShrI64U, "vm.shr.i64.u");
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Comparison ops
+ //===----------------------------------------------------------------===//
+
+#define DISASM_OP_EXT_I64_CMP_I64(op_name, op_mnemonic) \
+ DISASM_OP(EXT_I64, op_name) { \
+ uint16_t lhs_reg = VM_ParseOperandRegI64("lhs"); \
+ uint16_t rhs_reg = VM_ParseOperandRegI64("rhs"); \
+ uint16_t result_reg = VM_ParseResultRegI32("result"); \
+ EMIT_I32_REG_NAME(result_reg); \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, " = %s ", op_mnemonic)); \
+ EMIT_I64_REG_NAME(lhs_reg); \
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[lhs_reg]); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); \
+ EMIT_I64_REG_NAME(rhs_reg); \
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[rhs_reg]); \
+ break; \
+ }
+
+ DISASM_OP_EXT_I64_CMP_I64(CmpEQI64, "vm.cmp.eq.i64");
+ DISASM_OP_EXT_I64_CMP_I64(CmpNEI64, "vm.cmp.ne.i64");
+ DISASM_OP_EXT_I64_CMP_I64(CmpLTI64S, "vm.cmp.lt.i64.s");
+ DISASM_OP_EXT_I64_CMP_I64(CmpLTI64U, "vm.cmp.lt.i64.u");
+ DISASM_OP(EXT_I64, CmpNZI64) {
+ uint16_t operand_reg = VM_ParseOperandRegI64("operand");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.cmp.nz.i64 "));
+ EMIT_I64_REG_NAME(operand_reg);
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[operand_reg]);
+ break;
+ }
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Buffers
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP(EXT_I64, BufferFillI64) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("target_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("target_offset");
+ uint16_t length_reg = VM_ParseOperandRegI32("length");
+ uint16_t value_reg = VM_ParseOperandRegI64("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.buffer.fill.i64 "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg] / sizeof(uint64_t));
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(length_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[length_reg] / sizeof(uint64_t));
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I64_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[value_reg]);
+ break;
+ }
+
+ DISASM_OP(EXT_I64, BufferLoadI64) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("source_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("source_offset");
+ uint16_t result_reg = VM_ParseResultRegI64("result");
+ EMIT_I64_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.buffer.load.i64 "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg] / sizeof(uint64_t));
+ break;
+ }
+
+ DISASM_OP(EXT_I64, BufferStoreI64) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("target_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("target_offset");
+ uint16_t value_reg = VM_ParseOperandRegI64("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.buffer.store.i64 "));
+ EMIT_I64_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_I64(regs->i32[value_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg] / sizeof(uint32_t));
+ break;
+ }
+
+ END_DISASM_PREFIX()
+#else
+ UNHANDLED_DISASM_PREFIX(PrefixExtI64, EXT_I64);
+#endif // IREE_VM_EXT_I64_ENABLE
+
+#if IREE_VM_EXT_F32_ENABLE
+ BEGIN_DISASM_PREFIX(PrefixExtF32, EXT_F32)
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Globals
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP(EXT_F32, GlobalLoadF32) {
+ uint32_t byte_offset = VM_ParseGlobalAttr("global");
+ uint16_t value_reg = VM_ParseResultRegF32("value");
+ EMIT_F32_REG_NAME(value_reg);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ b, " = vm.global.load.f32 .rwdata[%u]", byte_offset));
+ EMIT_OPTIONAL_VALUE_F32(module_state->rwdata_storage.data[byte_offset]);
+ break;
+ }
+
+ DISASM_OP(EXT_F32, GlobalStoreF32) {
+ uint32_t byte_offset = VM_ParseGlobalAttr("global");
+ uint16_t value_reg = VM_ParseOperandRegF32("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, "vm.global.store.f32 "));
+ EMIT_F32_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[value_reg]);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, ", .rwdata[%u]", byte_offset));
+ break;
+ }
+
+ DISASM_OP(EXT_F32, GlobalLoadIndirectF32) {
+ uint16_t byte_offset_reg = VM_ParseOperandRegI32("global");
+ uint16_t value_reg = VM_ParseResultRegI32("value");
+ EMIT_F32_REG_NAME(value_reg);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(
+ b, " = vm.global.load.indirect.f32 .rwdata["));
+ EMIT_I32_REG_NAME(byte_offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[byte_offset_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "]"));
+ EMIT_OPTIONAL_VALUE_F32(
+ module_state->rwdata_storage.data[regs->i32[byte_offset_reg]]);
+ break;
+ }
+
+ DISASM_OP(EXT_F32, GlobalStoreIndirectF32) {
+ uint16_t byte_offset_reg = VM_ParseOperandRegI32("global");
+ uint16_t value_reg = VM_ParseOperandRegF32("value");
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(
+ b, "vm.global.store.indirect.f32 "));
+ EMIT_F32_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[value_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", .rwdata["));
+ EMIT_I32_REG_NAME(byte_offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[byte_offset_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "]"));
+ break;
+ }
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Constants
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP(EXT_F32, ConstF32) {
+ float value = VM_ParseFloatAttr32("value");
+ uint16_t result_reg = VM_ParseResultRegF32("result");
+ EMIT_F32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, " = vm.const.f32 %f", value));
+ break;
+ }
+
+ DISASM_OP(EXT_F32, ConstF32Zero) {
+ uint16_t result_reg = VM_ParseResultRegF32("result");
+ EMIT_F32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.const.f32.zero"));
+ break;
+ }
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Lists
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP(EXT_F32, ListGetF32) {
+ bool list_is_move;
+ uint16_t list_reg = VM_ParseOperandRegRef("list", &list_is_move);
+ uint16_t index_reg = VM_ParseOperandRegI32("index");
+ uint16_t result_reg = VM_ParseResultRegF32("result");
+ EMIT_F32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.list.get.f32 "));
+ EMIT_REF_REG_NAME(list_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[list_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(index_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]);
+ break;
+ }
+
+ DISASM_OP(EXT_F32, ListSetF32) {
+ bool list_is_move;
+ uint16_t list_reg = VM_ParseOperandRegRef("list", &list_is_move);
+ uint16_t index_reg = VM_ParseOperandRegI32("index");
+ uint16_t raw_value_reg = VM_ParseOperandRegF32("raw_value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.list.set.f32 "));
+ EMIT_REF_REG_NAME(list_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[list_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(index_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_F32_REG_NAME(raw_value_reg);
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[raw_value_reg]);
+ break;
+ }
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Conditional assignment
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP(EXT_F32, SelectF32) {
+ uint16_t condition_reg = VM_ParseOperandRegI32("condition");
+ uint16_t true_value_reg = VM_ParseOperandRegF32("true_value");
+ uint16_t false_value_reg = VM_ParseOperandRegF32("false_value");
+ uint16_t result_reg = VM_ParseResultRegF32("result");
+ EMIT_F32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.select.f32 "));
+ EMIT_I32_REG_NAME(condition_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[condition_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " ? "));
+ EMIT_F32_REG_NAME(true_value_reg);
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[true_value_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " : "));
+ EMIT_F32_REG_NAME(false_value_reg);
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[false_value_reg]);
+ break;
+ }
+
+ DISASM_OP(EXT_F32, SwitchF32) {
+ uint16_t index_reg = VM_ParseOperandRegI32("index");
+ float default_value = VM_ParseFloatAttr32("default_value");
+ const iree_vm_register_list_t* value_reg_list =
+ VM_ParseVariadicOperands("values");
+ uint16_t result_reg = VM_ParseResultRegF32("result");
+ EMIT_F32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.switch.f32 "));
+ EMIT_I32_REG_NAME(index_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, "["));
+ EMIT_OPERAND_REG_LIST(value_reg_list);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_format(b, "] else %f", default_value));
+ break;
+ }
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Native floating-point arithmetic
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP_EXT_F32_BINARY_F32(AddF32, "vm.add.f32");
+ DISASM_OP_EXT_F32_BINARY_F32(SubF32, "vm.sub.f32");
+ DISASM_OP_EXT_F32_BINARY_F32(MulF32, "vm.mul.f32");
+ DISASM_OP_EXT_F32_BINARY_F32(DivF32, "vm.div.f32");
+ DISASM_OP_EXT_F32_BINARY_F32(RemF32, "vm.rem.f32");
+ DISASM_OP_EXT_F32_TERNARY_F32(FMAF32, "vm.fma.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(AbsF32, "vm.abs.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(NegF32, "vm.neg.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(CeilF32, "vm.ceil.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(FloorF32, "vm.floor.f32");
+
+ DISASM_OP_EXT_F32_UNARY_F32(AtanF32, "vm.atan.f32");
+ DISASM_OP_EXT_F32_BINARY_F32(Atan2F32, "vm.atan2.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(CosF32, "vm.cos.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(SinF32, "vm.sin.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(ExpF32, "vm.exp.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(Exp2F32, "vm.exp2.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(ExpM1F32, "vm.expm1.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(LogF32, "vm.log.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(Log10F32, "vm.log10.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(Log1pF32, "vm.log1p.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(Log2F32, "vm.log2.f32");
+ DISASM_OP_EXT_F32_BINARY_F32(PowF32, "vm.pow.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(RsqrtF32, "vm.rsqrt.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(SqrtF32, "vm.sqrt.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(TanhF32, "vm.tanh.f32");
+ DISASM_OP_EXT_F32_UNARY_F32(ErfF32, "vm.erf.f32");
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Casting and type conversion/emulation
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP(EXT_F32, CastSI32F32) {
+ uint16_t operand_reg = VM_ParseOperandRegI32("operand");
+ uint16_t result_reg = VM_ParseResultRegF32("result");
+ EMIT_F32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.cast.si32.f32 "));
+ EMIT_I32_REG_NAME(operand_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[operand_reg]);
+ break;
+ }
+ DISASM_OP(EXT_F32, CastUI32F32) {
+ uint16_t operand_reg = VM_ParseOperandRegI32("operand");
+ uint16_t result_reg = VM_ParseResultRegF32("result");
+ EMIT_F32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.cast.ui32.f32 "));
+ EMIT_I32_REG_NAME(operand_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[operand_reg]);
+ break;
+ }
+ DISASM_OP(EXT_F32, CastF32SI32) {
+ uint16_t operand_reg = VM_ParseOperandRegF32("operand");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.cast.f32.sif32 "));
+ EMIT_F32_REG_NAME(operand_reg);
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[operand_reg]);
+ break;
+ }
+ DISASM_OP(EXT_F32, CastF32UI32) {
+ uint16_t operand_reg = VM_ParseOperandRegF32("operand");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.cast.f32.uif32 "));
+ EMIT_F32_REG_NAME(operand_reg);
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[operand_reg]);
+ break;
+ }
+ DISASM_OP(EXT_F32, BitcastI32F32) {
+ uint16_t operand_reg = VM_ParseOperandRegI32("operand");
+ uint16_t result_reg = VM_ParseResultRegF32("result");
+ EMIT_F32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.bitcast.i32.f32 "));
+ EMIT_I32_REG_NAME(operand_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[operand_reg]);
+ break;
+ }
+ DISASM_OP(EXT_F32, BitcastF32I32) {
+ uint16_t operand_reg = VM_ParseOperandRegF32("operand");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.bitcast.f32.if32 "));
+ EMIT_F32_REG_NAME(operand_reg);
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[operand_reg]);
+ break;
+ }
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Comparison ops
+ //===----------------------------------------------------------------===//
+
+#define DISASM_OP_EXT_F32_CMP_F32(op_name, op_mnemonic) \
+ DISASM_OP(EXT_F32, op_name) { \
+ uint16_t lhs_reg = VM_ParseOperandRegF32("lhs"); \
+ uint16_t rhs_reg = VM_ParseOperandRegF32("rhs"); \
+ uint16_t result_reg = VM_ParseResultRegI32("result"); \
+ EMIT_I32_REG_NAME(result_reg); \
+ IREE_RETURN_IF_ERROR( \
+ iree_string_builder_append_format(b, " = %s ", op_mnemonic)); \
+ EMIT_F32_REG_NAME(lhs_reg); \
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[lhs_reg]); \
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); \
+ EMIT_F32_REG_NAME(rhs_reg); \
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[rhs_reg]); \
+ break; \
+ }
+
+ DISASM_OP_EXT_F32_CMP_F32(CmpEQF32O, "vm.cmp.eq.f32.o");
+ DISASM_OP_EXT_F32_CMP_F32(CmpEQF32U, "vm.cmp.eq.f32.u");
+ DISASM_OP_EXT_F32_CMP_F32(CmpNEF32O, "vm.cmp.ne.f32.o");
+ DISASM_OP_EXT_F32_CMP_F32(CmpNEF32U, "vm.cmp.ne.f32.u");
+ DISASM_OP_EXT_F32_CMP_F32(CmpLTF32O, "vm.cmp.lt.f32.o");
+ DISASM_OP_EXT_F32_CMP_F32(CmpLTF32U, "vm.cmp.lt.f32.u");
+ DISASM_OP_EXT_F32_CMP_F32(CmpLTEF32O, "vm.cmp.lte.f32.o");
+ DISASM_OP_EXT_F32_CMP_F32(CmpLTEF32U, "vm.cmp.lte.f32.u");
+ DISASM_OP(EXT_F32, CmpNaNF32) {
+ uint16_t operand_reg = VM_ParseOperandRegF32("operand");
+ uint16_t result_reg = VM_ParseResultRegI32("result");
+ EMIT_I32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.cmp.nan.f32 "));
+ EMIT_F32_REG_NAME(operand_reg);
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[operand_reg]);
+ break;
+ }
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Buffers
+ //===----------------------------------------------------------------===//
+
+ DISASM_OP(EXT_F32, BufferFillF32) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("target_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("target_offset");
+ uint16_t length_reg = VM_ParseOperandRegI32("length");
+ uint16_t value_reg = VM_ParseOperandRegF32("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.buffer.fill.f32 "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg] / sizeof(float));
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(length_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[length_reg] / sizeof(float));
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_F32_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[value_reg]);
+ break;
+ }
+
+ DISASM_OP(EXT_F32, BufferLoadF32) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("source_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("source_offset");
+ uint16_t result_reg = VM_ParseResultRegF32("result");
+ EMIT_F32_REG_NAME(result_reg);
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, " = vm.buffer.load.f32 "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg] / sizeof(float));
+ break;
+ }
+
+ DISASM_OP(EXT_F32, BufferStoreF32) {
+ bool buffer_is_move;
+ uint16_t buffer_reg =
+ VM_ParseOperandRegRef("target_buffer", &buffer_is_move);
+ uint16_t offset_reg = VM_ParseOperandRegI32("target_offset");
+ uint16_t value_reg = VM_ParseOperandRegF32("value");
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(b, "vm.buffer.store.f32 "));
+ EMIT_F32_REG_NAME(value_reg);
+ EMIT_OPTIONAL_VALUE_F32(regs->i32[value_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_REF_REG_NAME(buffer_reg);
+ EMIT_OPTIONAL_VALUE_REF(®s->ref[buffer_reg]);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
+ EMIT_I32_REG_NAME(offset_reg);
+ EMIT_OPTIONAL_VALUE_I32(regs->i32[offset_reg] / sizeof(uint32_t));
+ break;
+ }
+
+ END_DISASM_PREFIX()
+#else
+ UNHANDLED_DISASM_PREFIX(PrefixExtF32, EXT_F32)
+#endif // IREE_VM_EXT_F32_ENABLE
+ UNHANDLED_DISASM_PREFIX(PrefixExtF64, EXT_F64)
+
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unhandled core opcode");
+ }
+ return iree_ok_status();
+}
+
+iree_status_t iree_vm_bytecode_trace_disasm(iree_vm_stack_frame_t* frame,
+ iree_vm_source_offset_t pc,
+ const iree_vm_registers_t* regs,
+ FILE* file) {
+ iree_string_builder_t b;
+ iree_string_builder_initialize(iree_allocator_system(), &b);
+
+ // TODO(benvanik): ensure frame is in-sync before call or restore original.
+ // It's shady to manipulate the frame here but I know we expect the pc to be
+ // valid only on entry/exit from a function.
+ frame->pc = pc;
+
+#if IREE_VM_EXECUTION_TRACING_SRC_LOC_ENABLE
+ iree_vm_source_location_t source_location;
+ iree_status_t status = iree_vm_module_resolve_source_location(
+ frame->function.module, frame, &source_location);
+ if (iree_status_is_ok(status)) {
+ status = iree_vm_source_location_format(
+ &source_location, IREE_VM_SOURCE_LOCATION_FORMAT_FLAG_SINGLE_LINE, &b);
+ }
+ if (iree_status_is_ok(status)) {
+ // Pad out to keep alignment. This is just guesswork based on my machine.
+ static const iree_host_size_t pad_to = 80;
+ iree_host_size_t col = iree_string_builder_size(&b);
+ if (col < pad_to) {
+ iree_string_builder_append_format(&b, "%*s ", (int)(pad_to - col), "");
+ } else {
+ status = iree_string_builder_append_cstring(&b, " ");
+ }
+ } else {
+ // Ignore failures when no source location is available.
+ if (iree_status_is_unavailable(status)) {
+ status = iree_ok_status();
+ } else {
+ return status;
+ }
+ }
+#else
+ iree_status_t status = iree_ok_status();
+#endif // IREE_VM_EXECUTION_TRACING_ENABLE
+
+ if (iree_status_is_ok(status)) {
+ iree_string_view_t module_name =
+ iree_vm_module_name(frame->function.module);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ &b, "[%.*s", (int)module_name.size, module_name.data));
+ iree_string_view_t function_name = iree_vm_function_name(&frame->function);
+ if (iree_string_view_is_empty(function_name)) {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ &b, "@%u", (uint32_t)frame->function.ordinal));
+ } else {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ &b, ".%.*s", (int)function_name.size, function_name.data));
+ }
+ status = iree_string_builder_append_format(&b, "+%08" PRIX64 "] ", pc);
+ }
+
+ if (iree_status_is_ok(status)) {
+ status = iree_vm_bytecode_disasm_op(
+ (iree_vm_bytecode_module_t*)frame->function.module,
+ (iree_vm_bytecode_module_state_t*)frame->module_state,
+ frame->function.ordinal, pc, regs,
+ IREE_VM_BYTECODE_DISASM_FORMAT_INLINE_VALUES, &b);
+ }
+
+ if (iree_status_is_ok(status)) {
+ fprintf(file, "%.*s\n", (int)iree_string_builder_size(&b),
+ iree_string_builder_buffer(&b));
+ }
+
+ iree_string_builder_deinitialize(&b);
+ return status;
+}
diff --git a/runtime/src/iree/vm/bytecode_disasm.h b/runtime/src/iree/vm/bytecode_disasm.h
new file mode 100644
index 0000000..2c73025
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode_disasm.h
@@ -0,0 +1,46 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_BYTECODE_DISASM_H_
+#define IREE_VM_BYTECODE_DISASM_H_
+
+#include <stdio.h>
+
+#include "iree/base/string_builder.h"
+#include "iree/vm/bytecode_dispatch_util.h"
+#include "iree/vm/bytecode_module_impl.h"
+#include "iree/vm/stack.h"
+
+// Controls how bytecode disassembly is formatted.
+typedef enum iree_vm_bytecode_disasm_format_e {
+ IREE_VM_BYTECODE_DISASM_FORMAT_DEFAULT = 0,
+ // Includes the input register values inline in the op text.
+ // Example: `%i0 <= ShrI32U %i2(5), %i3(6)`
+ IREE_VM_BYTECODE_DISASM_FORMAT_INLINE_VALUES = 1u << 0,
+} iree_vm_bytecode_disasm_format_t;
+
+// Disassembles the bytecode operation at |pc| using the provided module state.
+// Appends the disasembled op to |string_builder| in a format based on |format|.
+// If |regs| are available then values can be added using the format mode.
+//
+// Example: `%i0 <= ShrI32U %i2, %i3`
+//
+// WARNING: this does not currently perform any verification on the bytecode;
+// it's assumed all bytecode is valid. This is a debug tool: you shouldn't be
+// running this in production on untrusted inputs anyway.
+iree_status_t iree_vm_bytecode_disasm_op(
+ iree_vm_bytecode_module_t* module,
+ iree_vm_bytecode_module_state_t* module_state, uint16_t function_ordinal,
+ iree_vm_source_offset_t pc, const iree_vm_registers_t* regs,
+ iree_vm_bytecode_disasm_format_t format,
+ iree_string_builder_t* string_builder);
+
+iree_status_t iree_vm_bytecode_trace_disasm(iree_vm_stack_frame_t* frame,
+ iree_vm_source_offset_t pc,
+ const iree_vm_registers_t* regs,
+ FILE* file);
+
+#endif // IREE_VM_BYTECODE_DISASM_H_
diff --git a/runtime/src/iree/vm/bytecode_dispatch.c b/runtime/src/iree/vm/bytecode_dispatch.c
new file mode 100644
index 0000000..2c5eb5f
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode_dispatch.c
@@ -0,0 +1,2149 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/math.h"
+#include "iree/vm/api.h"
+#include "iree/vm/bytecode_disasm.h"
+#include "iree/vm/bytecode_dispatch_util.h"
+#include "iree/vm/bytecode_module_impl.h"
+#include "iree/vm/ops.h"
+
+//===----------------------------------------------------------------------===//
+// Register remapping utilities
+//===----------------------------------------------------------------------===//
+
+// Remaps registers from a source set to a destination set within the same stack
+// frame. This is a way to perform a conditional multi-mov sequence instead of
+// requiring the additional bytecode representation of the conditional movs.
+//
+// This assumes that the remapping list is properly ordered such that there are
+// no swapping hazards (such as 0->1,1->0). The register allocator in the
+// compiler should ensure this is the case when it can occur.
+static void iree_vm_bytecode_dispatch_remap_branch_registers(
+ const iree_vm_registers_t regs,
+ const iree_vm_register_remap_list_t* IREE_RESTRICT remap_list) {
+ for (int i = 0; i < remap_list->size; ++i) {
+ // TODO(benvanik): change encoding to avoid this branching.
+ // Could write two arrays: one for prims and one for refs.
+ uint16_t src_reg = remap_list->pairs[i].src_reg;
+ uint16_t dst_reg = remap_list->pairs[i].dst_reg;
+ if (src_reg & IREE_REF_REGISTER_TYPE_BIT) {
+ iree_vm_ref_retain_or_move(src_reg & IREE_REF_REGISTER_MOVE_BIT,
+ ®s.ref[src_reg & regs.ref_mask],
+ ®s.ref[dst_reg & regs.ref_mask]);
+ } else {
+ regs.i32[dst_reg & regs.i32_mask] = regs.i32[src_reg & regs.i32_mask];
+ }
+ }
+}
+
+// Discards ref registers in the list if they are marked move.
+// This can be used to eagerly release resources we don't need and reduces
+// memory consumption if used effectively prior to yields/waits.
+static void iree_vm_bytecode_dispatch_discard_registers(
+ const iree_vm_registers_t regs,
+ const iree_vm_register_list_t* IREE_RESTRICT reg_list) {
+ for (int i = 0; i < reg_list->size; ++i) {
+ // TODO(benvanik): change encoding to avoid this branching.
+ uint16_t reg = reg_list->registers[i];
+ if ((reg & (IREE_REF_REGISTER_TYPE_BIT | IREE_REF_REGISTER_MOVE_BIT)) ==
+ (IREE_REF_REGISTER_TYPE_BIT | IREE_REF_REGISTER_MOVE_BIT)) {
+ iree_vm_ref_release(®s.ref[reg & regs.ref_mask]);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Stack management
+//===----------------------------------------------------------------------===//
+
+static iree_vm_registers_t iree_vm_bytecode_get_register_storage(
+ iree_vm_stack_frame_t* frame) {
+ const iree_vm_bytecode_frame_storage_t* stack_storage =
+ (iree_vm_bytecode_frame_storage_t*)iree_vm_stack_frame_storage(frame);
+
+ // Masks indicate the valid bits of any register value within the range we
+ // have allocated in the storage. So for 4 registers we'd expect a 0b11 mask.
+ iree_vm_registers_t registers;
+ memset(®isters, 0, sizeof(registers));
+ registers.i32_mask = (uint16_t)(stack_storage->i32_register_count
+ ? stack_storage->i32_register_count - 1
+ : 0);
+ registers.ref_mask = (uint16_t)(stack_storage->ref_register_count
+ ? stack_storage->ref_register_count - 1
+ : 0);
+
+ // Register storage immediately follows the stack storage header.
+ registers.i32 =
+ (int32_t*)((uintptr_t)stack_storage + stack_storage->i32_register_offset);
+ registers.ref = (iree_vm_ref_t*)((uintptr_t)stack_storage +
+ stack_storage->ref_register_offset);
+
+ return registers;
+}
+
+// Releases any remaining refs held in the frame storage.
+static void iree_vm_bytecode_stack_frame_cleanup(iree_vm_stack_frame_t* frame) {
+ iree_vm_registers_t regs = iree_vm_bytecode_get_register_storage(frame);
+ // TODO(benvanik): allow the VM to elide this when it's known that there are
+ // no more live registers.
+ for (uint16_t i = 0; i <= regs.ref_mask; ++i) {
+ iree_vm_ref_t* ref = ®s.ref[i];
+ if (ref->ptr) iree_vm_ref_release(ref);
+ }
+}
+
+static iree_status_t iree_vm_bytecode_function_enter(
+ iree_vm_stack_t* stack, const iree_vm_function_t function,
+ iree_vm_stack_frame_t** out_callee_frame,
+ iree_vm_registers_t* out_callee_registers) {
+ iree_vm_bytecode_module_t* module =
+ (iree_vm_bytecode_module_t*)function.module->self;
+ if (IREE_UNLIKELY(function.ordinal >= module->function_descriptor_count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "import ordinal out of range");
+ }
+ const iree_vm_FunctionDescriptor_t* target_descriptor =
+ &module->function_descriptor_table[function.ordinal];
+
+ // We first compute the frame size of the callee and the masks we'll use to
+ // bounds check register access. This lets us allocate the entire frame
+ // (header, frame, and register storage) as a single pointer bump below.
+
+ // Round up register counts to the nearest power of 2 (if not already).
+ // This let's us use bit masks on register accesses to do bounds checking
+ // instead of more complex logic. The cost of these extra registers is only at
+ // worst 2x the required cost: so not large when thinking about the normal
+ // size of data used in an IREE app for tensors.
+ //
+ // Note that to allow the masking to work as a guard we need to ensure we at
+ // least allocate 1 register; this way an i32[reg & mask] will always point at
+ // valid memory even if mask == 0.
+ uint32_t i32_register_count = iree_math_round_up_to_pow2_u32(
+ VMMAX(1, target_descriptor->i32_register_count));
+ uint32_t ref_register_count = iree_math_round_up_to_pow2_u32(
+ VMMAX(1, target_descriptor->ref_register_count));
+ if (IREE_UNLIKELY(i32_register_count > IREE_I32_REGISTER_MASK) ||
+ IREE_UNLIKELY(ref_register_count > IREE_REF_REGISTER_MASK)) {
+ // Register count overflow. A valid compiler should never produce files that
+ // hit this.
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "register count overflow");
+ }
+
+ // We need to align the ref register start to the natural machine
+ // alignment in case the compiler is expecting that (it makes it easier to
+ // debug too).
+ iree_host_size_t header_size =
+ iree_host_align(sizeof(iree_vm_bytecode_frame_storage_t), 16);
+ iree_host_size_t i32_register_size =
+ iree_host_align(i32_register_count * sizeof(int32_t), 16);
+ iree_host_size_t ref_register_size =
+ iree_host_align(ref_register_count * sizeof(iree_vm_ref_t), 16);
+ iree_host_size_t frame_size =
+ header_size + i32_register_size + ref_register_size;
+
+ // Enter function and allocate stack frame storage.
+ IREE_RETURN_IF_ERROR(iree_vm_stack_function_enter(
+ stack, &function, IREE_VM_STACK_FRAME_BYTECODE, frame_size,
+ iree_vm_bytecode_stack_frame_cleanup, out_callee_frame));
+
+ // Stash metadata and compute register pointers.
+ iree_vm_bytecode_frame_storage_t* stack_storage =
+ (iree_vm_bytecode_frame_storage_t*)iree_vm_stack_frame_storage(
+ *out_callee_frame);
+ stack_storage->i32_register_count = i32_register_count;
+ stack_storage->ref_register_count = ref_register_count;
+ stack_storage->i32_register_offset = header_size;
+ stack_storage->ref_register_offset = header_size + i32_register_size;
+ *out_callee_registers =
+ iree_vm_bytecode_get_register_storage(*out_callee_frame);
+
+ return iree_ok_status();
+}
+
+// Enters an internal bytecode stack frame from an external caller.
+// A new |out_callee_frame| will be pushed to the stack with storage space for
+// the registers used by the function and |arguments| will be marshaled into the
+// ABI-defined registers.
+//
+// Note that callers are expected to have matched our expectations for
+// |arguments| and we don't validate that here.
+static iree_status_t iree_vm_bytecode_external_enter(
+ iree_vm_stack_t* stack, const iree_vm_function_t function,
+ iree_string_view_t cconv_arguments, iree_byte_span_t arguments,
+ iree_vm_stack_frame_t** out_callee_frame,
+ iree_vm_registers_t* out_callee_registers) {
+ // Enter the bytecode function and allocate registers.
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_function_enter(
+ stack, function, out_callee_frame, out_callee_registers));
+
+ // Marshal arguments from the ABI format to the VM registers.
+ iree_vm_registers_t callee_registers = *out_callee_registers;
+ uint16_t i32_reg = 0;
+ uint16_t ref_reg = 0;
+ const uint8_t* p = arguments.data;
+ for (iree_host_size_t i = 0; i < cconv_arguments.size; ++i) {
+ switch (cconv_arguments.data[i]) {
+ case IREE_VM_CCONV_TYPE_VOID:
+ break;
+ case IREE_VM_CCONV_TYPE_I32:
+ case IREE_VM_CCONV_TYPE_F32: {
+ uint16_t dst_reg = i32_reg++;
+ memcpy(&callee_registers.i32[dst_reg & callee_registers.i32_mask], p,
+ sizeof(int32_t));
+ p += sizeof(int32_t);
+ } break;
+ case IREE_VM_CCONV_TYPE_I64:
+ case IREE_VM_CCONV_TYPE_F64: {
+ uint16_t dst_reg = i32_reg;
+ i32_reg += 2;
+ memcpy(&callee_registers.i32[dst_reg & callee_registers.i32_mask], p,
+ sizeof(int64_t));
+ p += sizeof(int64_t);
+ } break;
+ case IREE_VM_CCONV_TYPE_REF: {
+ uint16_t dst_reg = ref_reg++;
+ iree_vm_ref_move(
+ (iree_vm_ref_t*)p,
+ &callee_registers.ref[dst_reg & callee_registers.ref_mask]);
+ p += sizeof(iree_vm_ref_t);
+ } break;
+ }
+ }
+
+ return iree_ok_status();
+}
+
+// Leaves an internal bytecode stack frame and returns to an external caller.
+// Registers will be marshaled from the |src_reg_list| to the |results| buffer.
+//
+// Note that callers are expected to have matched our expectations for
+// |results| and we don't validate that here.
+static iree_status_t iree_vm_bytecode_external_leave(
+ iree_vm_stack_t* stack, iree_vm_stack_frame_t* callee_frame,
+ const iree_vm_registers_t* IREE_RESTRICT callee_registers,
+ const iree_vm_register_list_t* IREE_RESTRICT src_reg_list,
+ iree_string_view_t cconv_results, iree_byte_span_t results) {
+ // Marshal results from registers to the ABI results buffer.
+ uint8_t* p = results.data;
+ for (iree_host_size_t i = 0; i < cconv_results.size; ++i) {
+ uint16_t src_reg = src_reg_list->registers[i];
+ switch (cconv_results.data[i]) {
+ case IREE_VM_CCONV_TYPE_VOID:
+ break;
+ case IREE_VM_CCONV_TYPE_I32:
+ case IREE_VM_CCONV_TYPE_F32: {
+ memcpy(p, &callee_registers->i32[src_reg & callee_registers->i32_mask],
+ sizeof(int32_t));
+ p += sizeof(int32_t);
+ } break;
+ case IREE_VM_CCONV_TYPE_I64:
+ case IREE_VM_CCONV_TYPE_F64: {
+ memcpy(
+ p,
+ &callee_registers->i32[src_reg & (callee_registers->i32_mask & ~1)],
+ sizeof(int64_t));
+ p += sizeof(int64_t);
+ } break;
+ case IREE_VM_CCONV_TYPE_REF: {
+ iree_vm_ref_retain_or_move(
+ src_reg & IREE_REF_REGISTER_MOVE_BIT,
+ &callee_registers->ref[src_reg & callee_registers->ref_mask],
+ (iree_vm_ref_t*)p);
+ p += sizeof(iree_vm_ref_t);
+ } break;
+ }
+ }
+
+ // Leave and deallocate bytecode stack frame.
+ return iree_vm_stack_function_leave(stack);
+}
+
+// Enters an internal bytecode stack frame from a parent bytecode frame.
+// Registers in |src_reg_list| will be marshaled into the callee frame and the
+// |dst_reg_list| will be stashed for use when leaving the frame.
+static iree_status_t iree_vm_bytecode_internal_enter(
+ iree_vm_stack_t* stack, iree_vm_module_t* module, int32_t function_ordinal,
+ const iree_vm_register_list_t* IREE_RESTRICT src_reg_list,
+ const iree_vm_register_list_t* IREE_RESTRICT dst_reg_list,
+ iree_vm_stack_frame_t** out_callee_frame,
+ iree_vm_registers_t* out_callee_registers) {
+ // Stash the destination register list for result values on the caller.
+ iree_vm_bytecode_frame_storage_t* caller_storage =
+ (iree_vm_bytecode_frame_storage_t*)iree_vm_stack_frame_storage(
+ iree_vm_stack_current_frame(stack));
+ caller_storage->return_registers = dst_reg_list;
+
+ // NOTE: after this call the caller registers may be invalid and need to be
+ // requeried.
+ iree_vm_function_t function;
+ function.module = module;
+ function.linkage = IREE_VM_FUNCTION_LINKAGE_INTERNAL;
+ function.ordinal = function_ordinal;
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_function_enter(
+ stack, function, out_callee_frame, out_callee_registers));
+
+ // Remaps argument/result registers from a source list in the caller/callee
+ // frame to the 0-N ABI registers in the callee/caller frame.
+ // This assumes that the destination stack frame registers are unused and ok
+ // to overwrite directly. Each bank begins left-aligned at 0 and increments
+ // per arg of its type.
+ iree_vm_registers_t src_regs =
+ iree_vm_bytecode_get_register_storage(iree_vm_stack_parent_frame(stack));
+ iree_vm_registers_t* dst_regs = out_callee_registers;
+ int i32_reg_offset = 0;
+ int ref_reg_offset = 0;
+ for (int i = 0; i < src_reg_list->size; ++i) {
+ // TODO(benvanik): change encoding to avoid this branching.
+ // Could write two arrays: one for prims and one for refs.
+ uint16_t src_reg = src_reg_list->registers[i];
+ if (src_reg & IREE_REF_REGISTER_TYPE_BIT) {
+ uint16_t dst_reg = ref_reg_offset++;
+ memset(&dst_regs->ref[dst_reg & dst_regs->ref_mask], 0,
+ sizeof(iree_vm_ref_t));
+ iree_vm_ref_retain_or_move(src_reg & IREE_REF_REGISTER_MOVE_BIT,
+ &src_regs.ref[src_reg & src_regs.ref_mask],
+ &dst_regs->ref[dst_reg & dst_regs->ref_mask]);
+ } else {
+ uint16_t dst_reg = i32_reg_offset++;
+ dst_regs->i32[dst_reg & dst_regs->i32_mask] =
+ src_regs.i32[src_reg & src_regs.i32_mask];
+ }
+ }
+
+ return iree_ok_status();
+}
+
+// Leaves an internal bytecode stack frame and returns to the parent bytecode
+// frame. |src_reg_list| registers will be marshaled into the dst_reg_list
+// provided by the caller frame when entering.
+static iree_status_t iree_vm_bytecode_internal_leave(
+ iree_vm_stack_t* stack, iree_vm_stack_frame_t* callee_frame,
+ const iree_vm_registers_t callee_registers,
+ const iree_vm_register_list_t* IREE_RESTRICT src_reg_list,
+ iree_vm_stack_frame_t** out_caller_frame,
+ iree_vm_registers_t* out_caller_registers) {
+ // Remaps registers from source to destination across frames.
+ // Registers from the |src_regs| will be copied/moved to |dst_regs| with the
+ // mappings provided by |src_reg_list| and |dst_reg_list|. It's assumed that
+ // the mappings are matching by type and - in the case that they aren't -
+ // things will get weird (but not crash).
+ *out_caller_frame = iree_vm_stack_parent_frame(stack);
+ iree_vm_bytecode_frame_storage_t* caller_storage =
+ (iree_vm_bytecode_frame_storage_t*)iree_vm_stack_frame_storage(
+ *out_caller_frame);
+ const iree_vm_register_list_t* dst_reg_list =
+ caller_storage->return_registers;
+ VMCHECK(src_reg_list->size <= dst_reg_list->size);
+ if (IREE_UNLIKELY(src_reg_list->size > dst_reg_list->size)) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "src/dst reg count mismatch on internal return");
+ }
+ iree_vm_registers_t caller_registers =
+ iree_vm_bytecode_get_register_storage(*out_caller_frame);
+ for (int i = 0; i < src_reg_list->size; ++i) {
+ // TODO(benvanik): change encoding to avoid this branching.
+ // Could write two arrays: one for prims and one for refs.
+ uint16_t src_reg = src_reg_list->registers[i];
+ uint16_t dst_reg = dst_reg_list->registers[i];
+ if (src_reg & IREE_REF_REGISTER_TYPE_BIT) {
+ iree_vm_ref_retain_or_move(
+ src_reg & IREE_REF_REGISTER_MOVE_BIT,
+ &callee_registers.ref[src_reg & callee_registers.ref_mask],
+ &caller_registers.ref[dst_reg & caller_registers.ref_mask]);
+ } else {
+ caller_registers.i32[dst_reg & caller_registers.i32_mask] =
+ callee_registers.i32[src_reg & callee_registers.i32_mask];
+ }
+ }
+
+ // Leave and deallocate bytecode stack frame.
+ *out_caller_registers = caller_registers;
+ return iree_vm_stack_function_leave(stack);
+}
+
+// Populates an import call arguments
+static void iree_vm_bytecode_populate_import_cconv_arguments(
+ iree_string_view_t cconv_arguments,
+ const iree_vm_registers_t caller_registers,
+ const iree_vm_register_list_t* IREE_RESTRICT segment_size_list,
+ const iree_vm_register_list_t* IREE_RESTRICT src_reg_list,
+ iree_byte_span_t storage) {
+ uint8_t* IREE_RESTRICT p = storage.data;
+ for (iree_host_size_t i = 0, seg_i = 0, reg_i = 0; i < cconv_arguments.size;
+ ++i, ++seg_i) {
+ switch (cconv_arguments.data[i]) {
+ case IREE_VM_CCONV_TYPE_VOID:
+ break;
+ case IREE_VM_CCONV_TYPE_I32:
+ case IREE_VM_CCONV_TYPE_F32: {
+ memcpy(p,
+ &caller_registers.i32[src_reg_list->registers[reg_i++] &
+ caller_registers.i32_mask],
+ sizeof(int32_t));
+ p += sizeof(int32_t);
+ } break;
+ case IREE_VM_CCONV_TYPE_I64:
+ case IREE_VM_CCONV_TYPE_F64: {
+ memcpy(p,
+ &caller_registers.i32[src_reg_list->registers[reg_i++] &
+ (caller_registers.i32_mask & ~1)],
+ sizeof(int64_t));
+ p += sizeof(int64_t);
+ } break;
+ case IREE_VM_CCONV_TYPE_REF: {
+ uint16_t src_reg = src_reg_list->registers[reg_i++];
+ iree_vm_ref_assign(
+ &caller_registers.ref[src_reg & caller_registers.ref_mask],
+ (iree_vm_ref_t*)p);
+ p += sizeof(iree_vm_ref_t);
+ } break;
+ case IREE_VM_CCONV_TYPE_SPAN_START: {
+ VMCHECK(segment_size_list);
+ int32_t span_count = segment_size_list->registers[seg_i];
+ memcpy(p, &span_count, sizeof(int32_t));
+ p += sizeof(int32_t);
+ if (!span_count) {
+ // No items; skip the span.
+ do {
+ ++i;
+ } while (i < cconv_arguments.size &&
+ cconv_arguments.data[i] != IREE_VM_CCONV_TYPE_SPAN_END);
+ continue;
+ }
+ iree_host_size_t span_start_i = i + 1;
+ for (int32_t j = 0; j < span_count; ++j) {
+ for (i = span_start_i;
+ i < cconv_arguments.size &&
+ cconv_arguments.data[i] != IREE_VM_CCONV_TYPE_SPAN_END;
+ ++i) {
+ // TODO(benvanik): share with switch above.
+ switch (cconv_arguments.data[i]) {
+ case IREE_VM_CCONV_TYPE_VOID:
+ break;
+ case IREE_VM_CCONV_TYPE_I32:
+ case IREE_VM_CCONV_TYPE_F32: {
+ memcpy(p,
+ &caller_registers.i32[src_reg_list->registers[reg_i++] &
+ caller_registers.i32_mask],
+ sizeof(int32_t));
+ p += sizeof(int32_t);
+ } break;
+ case IREE_VM_CCONV_TYPE_I64:
+ case IREE_VM_CCONV_TYPE_F64: {
+ memcpy(p,
+ &caller_registers.i32[src_reg_list->registers[reg_i++] &
+ (caller_registers.i32_mask & ~1)],
+ sizeof(int64_t));
+ p += sizeof(int64_t);
+ } break;
+ case IREE_VM_CCONV_TYPE_REF: {
+ uint16_t src_reg = src_reg_list->registers[reg_i++];
+ iree_vm_ref_assign(
+ &caller_registers.ref[src_reg & caller_registers.ref_mask],
+ (iree_vm_ref_t*)p);
+ p += sizeof(iree_vm_ref_t);
+ } break;
+ }
+ }
+ }
+ } break;
+ }
+ }
+}
+
+// Issues a populated import call and marshals the results into |dst_reg_list|.
+static iree_status_t iree_vm_bytecode_issue_import_call(
+ iree_vm_stack_t* stack, const iree_vm_function_call_t call,
+ iree_string_view_t cconv_results,
+ const iree_vm_register_list_t* IREE_RESTRICT dst_reg_list,
+ iree_vm_stack_frame_t** out_caller_frame,
+ iree_vm_registers_t* out_caller_registers,
+ iree_vm_execution_result_t* out_result) {
+ // Call external function.
+ iree_status_t call_status = call.function.module->begin_call(
+ call.function.module->self, stack, &call, out_result);
+ if (IREE_UNLIKELY(!iree_status_is_ok(call_status))) {
+ // TODO(benvanik): set execution result to failure/capture stack.
+ return iree_status_annotate(call_status,
+ iree_make_cstring_view("while calling import"));
+ }
+
+ // NOTE: we don't support yielding within imported functions right now so it's
+ // safe to assume the stack is still valid here. If the called function can
+ // yield then we'll need to requery all pointers here.
+ *out_caller_frame = iree_vm_stack_current_frame(stack);
+ *out_caller_registers =
+ iree_vm_bytecode_get_register_storage(*out_caller_frame);
+
+ // Marshal outputs from the ABI results buffer to registers.
+ iree_vm_registers_t caller_registers = *out_caller_registers;
+ uint8_t* IREE_RESTRICT p = call.results.data;
+ for (iree_host_size_t i = 0; i < cconv_results.size && i < dst_reg_list->size;
+ ++i) {
+ uint16_t dst_reg = dst_reg_list->registers[i];
+ switch (cconv_results.data[i]) {
+ case IREE_VM_CCONV_TYPE_VOID:
+ break;
+ case IREE_VM_CCONV_TYPE_I32:
+ case IREE_VM_CCONV_TYPE_F32:
+ memcpy(&caller_registers.i32[dst_reg & caller_registers.i32_mask], p,
+ sizeof(int32_t));
+ p += sizeof(int32_t);
+ break;
+ case IREE_VM_CCONV_TYPE_I64:
+ case IREE_VM_CCONV_TYPE_F64:
+ memcpy(
+ &caller_registers.i32[dst_reg & (caller_registers.i32_mask & ~1)],
+ p, sizeof(int64_t));
+ p += sizeof(int64_t);
+ break;
+ case IREE_VM_CCONV_TYPE_REF:
+ iree_vm_ref_move(
+ (iree_vm_ref_t*)p,
+ &caller_registers.ref[dst_reg & caller_registers.ref_mask]);
+ p += sizeof(iree_vm_ref_t);
+ break;
+ }
+ }
+
+ return iree_ok_status();
+}
+
+// Verifies that the requested import is valid and returns its table entry.
+static iree_status_t iree_vm_bytecode_verify_import(
+ iree_vm_stack_t* stack, const iree_vm_bytecode_module_state_t* module_state,
+ uint32_t import_ordinal, const iree_vm_bytecode_import_t** out_import) {
+ *out_import = NULL;
+
+ import_ordinal &= 0x7FFFFFFFu;
+ if (IREE_UNLIKELY(import_ordinal >= module_state->import_count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "import ordinal %u out of range", import_ordinal);
+ }
+
+ const iree_vm_bytecode_import_t* import =
+ &module_state->import_table[import_ordinal];
+ if (!import->function.module) {
+ iree_vm_function_t decl_function;
+ IREE_RETURN_IF_ERROR(iree_vm_module_lookup_function_by_ordinal(
+ iree_vm_stack_current_frame(stack)->function.module,
+ IREE_VM_FUNCTION_LINKAGE_IMPORT_OPTIONAL, import_ordinal,
+ &decl_function));
+ iree_string_view_t import_name = iree_vm_function_name(&decl_function);
+ (void)import_name;
+ return iree_make_status(IREE_STATUS_NOT_FOUND,
+ "optional import `%.*s` (ordinal %u) not resolved",
+ (int)import_name.size, import_name.data,
+ import_ordinal);
+ }
+
+ *out_import = import;
+ return iree_ok_status();
+}
+
+// Calls an imported function from another module.
+// Marshals the |src_reg_list| registers into ABI storage and results into
+// |dst_reg_list|.
+static iree_status_t iree_vm_bytecode_call_import(
+ iree_vm_stack_t* stack, const iree_vm_bytecode_module_state_t* module_state,
+ uint32_t import_ordinal, const iree_vm_registers_t caller_registers,
+ const iree_vm_register_list_t* IREE_RESTRICT src_reg_list,
+ const iree_vm_register_list_t* IREE_RESTRICT dst_reg_list,
+ iree_vm_stack_frame_t** out_caller_frame,
+ iree_vm_registers_t* out_caller_registers,
+ iree_vm_execution_result_t* out_result) {
+ // Prepare |call| by looking up the import information.
+ const iree_vm_bytecode_import_t* import = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_verify_import(stack, module_state,
+ import_ordinal, &import));
+
+ iree_vm_function_call_t call;
+ memset(&call, 0, sizeof(call));
+ call.function = import->function;
+
+ // Marshal inputs from registers to the ABI arguments buffer.
+ call.arguments.data_length = import->argument_buffer_size;
+ call.arguments.data = iree_alloca(call.arguments.data_length);
+ memset(call.arguments.data, 0, call.arguments.data_length);
+ iree_vm_bytecode_populate_import_cconv_arguments(
+ import->arguments, caller_registers,
+ /*segment_size_list=*/NULL, src_reg_list, call.arguments);
+
+ // Issue the call and handle results.
+ call.results.data_length = import->result_buffer_size;
+ call.results.data = iree_alloca(call.results.data_length);
+ memset(call.results.data, 0, call.results.data_length);
+ return iree_vm_bytecode_issue_import_call(stack, call, import->results,
+ dst_reg_list, out_caller_frame,
+ out_caller_registers, out_result);
+}
+
+// Calls a variadic imported function from another module.
+// Marshals the |src_reg_list| registers into ABI storage and results into
+// |dst_reg_list|. |segment_size_list| contains the counts within each segment.
+static iree_status_t iree_vm_bytecode_call_import_variadic(
+ iree_vm_stack_t* stack, const iree_vm_bytecode_module_state_t* module_state,
+ uint32_t import_ordinal, const iree_vm_registers_t caller_registers,
+ const iree_vm_register_list_t* IREE_RESTRICT segment_size_list,
+ const iree_vm_register_list_t* IREE_RESTRICT src_reg_list,
+ const iree_vm_register_list_t* IREE_RESTRICT dst_reg_list,
+ iree_vm_stack_frame_t** out_caller_frame,
+ iree_vm_registers_t* out_caller_registers,
+ iree_vm_execution_result_t* out_result) {
+ // Prepare |call| by looking up the import information.
+ const iree_vm_bytecode_import_t* import = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_verify_import(stack, module_state,
+ import_ordinal, &import));
+
+ iree_vm_function_call_t call;
+ memset(&call, 0, sizeof(call));
+ call.function = import->function;
+
+ // Allocate ABI argument/result storage taking into account the variadic
+ // segments.
+ IREE_RETURN_IF_ERROR(iree_vm_function_call_compute_cconv_fragment_size(
+ import->arguments, segment_size_list, &call.arguments.data_length));
+ call.arguments.data = iree_alloca(call.arguments.data_length);
+ memset(call.arguments.data, 0, call.arguments.data_length);
+
+ // Marshal inputs from registers to the ABI arguments buffer.
+ iree_vm_bytecode_populate_import_cconv_arguments(
+ import->arguments, caller_registers, segment_size_list, src_reg_list,
+ call.arguments);
+
+ // Issue the call and handle results.
+ call.results.data_length = import->result_buffer_size;
+ call.results.data = iree_alloca(call.results.data_length);
+ memset(call.results.data, 0, call.results.data_length);
+ return iree_vm_bytecode_issue_import_call(stack, call, import->results,
+ dst_reg_list, out_caller_frame,
+ out_caller_registers, out_result);
+}
+
+//===----------------------------------------------------------------------===//
+// Main interpreter dispatch routine
+//===----------------------------------------------------------------------===//
+
+iree_status_t iree_vm_bytecode_dispatch(
+ iree_vm_stack_t* stack, iree_vm_bytecode_module_t* module,
+ const iree_vm_function_call_t* call, iree_string_view_t cconv_arguments,
+ iree_string_view_t cconv_results, iree_vm_execution_result_t* out_result) {
+ memset(out_result, 0, sizeof(*out_result));
+
+ // When required emit the dispatch tables here referencing the labels we are
+ // defining below.
+ DEFINE_DISPATCH_TABLES();
+
+ // Enter function (as this is the initial call).
+ // The callee's return will take care of storing the output registers when it
+ // actually does return, either immediately or in the future via a resume.
+ iree_vm_stack_frame_t* current_frame = NULL;
+ iree_vm_registers_t regs;
+ IREE_RETURN_IF_ERROR(
+ iree_vm_bytecode_external_enter(stack, call->function, cconv_arguments,
+ call->arguments, ¤t_frame, ®s));
+
+ // Primary dispatch state. This is our 'native stack frame' and really
+ // just enough to make dereferencing common addresses (like the current
+ // offset) faster. You can think of this like CPU state (like PC).
+ //
+ // The hope is that the compiler decides to keep these in registers (as
+ // they are touched for every instruction executed). The frame will change
+ // as we call into different functions.
+ const iree_vm_bytecode_module_state_t* IREE_RESTRICT module_state =
+ (iree_vm_bytecode_module_state_t*)current_frame->module_state;
+ const uint8_t* IREE_RESTRICT bytecode_data =
+ module->bytecode_data.data +
+ module->function_descriptor_table[current_frame->function.ordinal]
+ .bytecode_offset;
+ iree_vm_source_offset_t pc = current_frame->pc;
+ const int32_t entry_frame_depth = current_frame->depth;
+
+ BEGIN_DISPATCH_CORE() {
+ //===------------------------------------------------------------------===//
+ // Globals
+ //===------------------------------------------------------------------===//
+
+ DISPATCH_OP(CORE, GlobalLoadI32, {
+ uint32_t byte_offset = VM_DecGlobalAttr("global");
+ if (IREE_UNLIKELY(byte_offset >=
+ module_state->rwdata_storage.data_length)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
+ module_state->rwdata_storage.data_length);
+ }
+ int32_t* value = VM_DecResultRegI32("value");
+ const int32_t global_value =
+ vm_global_load_i32(module_state->rwdata_storage.data, byte_offset);
+ *value = global_value;
+ });
+
+ DISPATCH_OP(CORE, GlobalStoreI32, {
+ uint32_t byte_offset = VM_DecGlobalAttr("global");
+ if (IREE_UNLIKELY(byte_offset >=
+ module_state->rwdata_storage.data_length)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
+ module_state->rwdata_storage.data_length);
+ }
+ int32_t value = VM_DecOperandRegI32("value");
+ vm_global_store_i32(module_state->rwdata_storage.data, byte_offset,
+ value);
+ });
+
+ DISPATCH_OP(CORE, GlobalLoadIndirectI32, {
+ uint32_t byte_offset = VM_DecOperandRegI32("global");
+ if (IREE_UNLIKELY(byte_offset >=
+ module_state->rwdata_storage.data_length)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
+ module_state->rwdata_storage.data_length);
+ }
+ int32_t* value = VM_DecResultRegI32("value");
+ const int32_t global_value =
+ vm_global_load_i32(module_state->rwdata_storage.data, byte_offset);
+ *value = global_value;
+ });
+
+ DISPATCH_OP(CORE, GlobalStoreIndirectI32, {
+ uint32_t byte_offset = VM_DecOperandRegI32("global");
+ if (IREE_UNLIKELY(byte_offset >=
+ module_state->rwdata_storage.data_length)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
+ module_state->rwdata_storage.data_length);
+ }
+ int32_t value = VM_DecOperandRegI32("value");
+ vm_global_store_i32(module_state->rwdata_storage.data, byte_offset,
+ value);
+ });
+
+ DISPATCH_OP(CORE, GlobalLoadRef, {
+ uint32_t global = VM_DecGlobalAttr("global");
+ if (IREE_UNLIKELY(global >= module_state->global_ref_count)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global ref ordinal out of range: %d (table=%zu)", global,
+ module_state->global_ref_count);
+ }
+ const iree_vm_type_def_t* type_def = VM_DecTypeOf("value");
+ bool result_is_move;
+ iree_vm_ref_t* result = VM_DecResultRegRef("value", &result_is_move);
+ iree_vm_ref_t* global_ref = &module_state->global_ref_table[global];
+ IREE_RETURN_IF_ERROR(iree_vm_ref_retain_or_move_checked(
+ result_is_move, global_ref, type_def->ref_type, result));
+ });
+
+ DISPATCH_OP(CORE, GlobalStoreRef, {
+ uint32_t global = VM_DecGlobalAttr("global");
+ if (IREE_UNLIKELY(global >= module_state->global_ref_count)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global ref ordinal out of range: %d (table=%zu)", global,
+ module_state->global_ref_count);
+ }
+ const iree_vm_type_def_t* type_def = VM_DecTypeOf("value");
+ bool value_is_move;
+ iree_vm_ref_t* value = VM_DecOperandRegRef("value", &value_is_move);
+ iree_vm_ref_t* global_ref = &module_state->global_ref_table[global];
+ IREE_RETURN_IF_ERROR(iree_vm_ref_retain_or_move_checked(
+ value_is_move, value, type_def->ref_type, global_ref));
+ });
+
+ DISPATCH_OP(CORE, GlobalLoadIndirectRef, {
+ uint32_t global = VM_DecOperandRegI32("global");
+ if (IREE_UNLIKELY(global >= module_state->global_ref_count)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global ref ordinal out of range: %d (table=%zu)", global,
+ module_state->global_ref_count);
+ }
+ const iree_vm_type_def_t* type_def = VM_DecTypeOf("value");
+ bool result_is_move;
+ iree_vm_ref_t* result = VM_DecResultRegRef("value", &result_is_move);
+ iree_vm_ref_t* global_ref = &module_state->global_ref_table[global];
+ IREE_RETURN_IF_ERROR(iree_vm_ref_retain_or_move_checked(
+ result_is_move, global_ref, type_def->ref_type, result));
+ });
+
+ DISPATCH_OP(CORE, GlobalStoreIndirectRef, {
+ uint32_t global = VM_DecOperandRegI32("global");
+ if (IREE_UNLIKELY(global >= module_state->global_ref_count)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global ref ordinal out of range: %d (table=%zu)", global,
+ module_state->global_ref_count);
+ }
+ const iree_vm_type_def_t* type_def = VM_DecTypeOf("value");
+ bool value_is_move;
+ iree_vm_ref_t* value = VM_DecOperandRegRef("value", &value_is_move);
+ iree_vm_ref_t* global_ref = &module_state->global_ref_table[global];
+ IREE_RETURN_IF_ERROR(iree_vm_ref_retain_or_move_checked(
+ value_is_move, value, type_def->ref_type, global_ref));
+ });
+
+ //===------------------------------------------------------------------===//
+ // Constants
+ //===------------------------------------------------------------------===//
+
+ DISPATCH_OP(CORE, ConstI32, {
+ int32_t value = VM_DecIntAttr32("value");
+ int32_t* result = VM_DecResultRegI32("result");
+ *result = value;
+ });
+
+ DISPATCH_OP(CORE, ConstI32Zero, {
+ int32_t* result = VM_DecResultRegI32("result");
+ *result = 0;
+ });
+
+ DISPATCH_OP(CORE, ConstRefZero, {
+ bool result_is_move;
+ iree_vm_ref_t* result = VM_DecResultRegRef("result", &result_is_move);
+ iree_vm_ref_release(result);
+ });
+
+ DISPATCH_OP(CORE, ConstRefRodata, {
+ uint32_t rodata_ordinal = VM_DecRodataAttr("rodata");
+ if (IREE_UNLIKELY(rodata_ordinal >= module_state->rodata_ref_count)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "rodata ref ordinal out of range: %d (table=%zu)", rodata_ordinal,
+ module_state->rodata_ref_count);
+ }
+ bool result_is_move;
+ iree_vm_ref_t* result = VM_DecResultRegRef("value", &result_is_move);
+ IREE_RETURN_IF_ERROR(iree_vm_ref_wrap_retain(
+ &module_state->rodata_ref_table[rodata_ordinal],
+ iree_vm_buffer_type_id(), result));
+ });
+
+ //===------------------------------------------------------------------===//
+ // Buffers
+ //===------------------------------------------------------------------===//
+
+ DISPATCH_OP(CORE, BufferAlloc, {
+ uint32_t length = VM_DecOperandRegI32("length");
+ bool result_is_move;
+ iree_vm_ref_t* result_ref = VM_DecResultRegRef("result", &result_is_move);
+ iree_vm_buffer_t* buffer = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_create(
+ IREE_VM_BUFFER_ACCESS_MUTABLE | IREE_VM_BUFFER_ACCESS_ORIGIN_GUEST,
+ length, module_state->allocator, &buffer));
+ IREE_RETURN_IF_ERROR(iree_vm_ref_wrap_assign(
+ buffer, iree_vm_buffer_type_id(), result_ref));
+ });
+
+ DISPATCH_OP(CORE, BufferClone, {
+ bool source_is_move;
+ iree_vm_ref_t* source_ref =
+ VM_DecOperandRegRef("source", &source_is_move);
+ iree_vm_buffer_t* source = iree_vm_buffer_deref(*source_ref);
+ if (IREE_UNLIKELY(!source)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "source is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("offset");
+ uint32_t length = VM_DecOperandRegI32("length");
+ bool result_is_move;
+ iree_vm_ref_t* result_ref = VM_DecResultRegRef("result", &result_is_move);
+ iree_vm_buffer_t* result = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_clone(
+ IREE_VM_BUFFER_ACCESS_MUTABLE | IREE_VM_BUFFER_ACCESS_ORIGIN_GUEST,
+ source, offset, length, module_state->allocator, &result));
+ IREE_RETURN_IF_ERROR(iree_vm_ref_wrap_assign(
+ result, iree_vm_buffer_type_id(), result_ref));
+ });
+
+ DISPATCH_OP(CORE, BufferLength, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "buffer is null");
+ }
+ uint32_t* result = VM_DecResultRegI32("result");
+ *result = (uint32_t)iree_vm_buffer_length(buffer);
+ });
+
+ DISPATCH_OP(CORE, BufferCopy, {
+ bool source_buffer_is_move;
+ iree_vm_ref_t* source_buffer_ref =
+ VM_DecOperandRegRef("source_buffer", &source_buffer_is_move);
+ iree_vm_buffer_t* source_buffer =
+ iree_vm_buffer_deref(*source_buffer_ref);
+ if (IREE_UNLIKELY(!source_buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "source_buffer is null");
+ }
+ uint32_t source_offset = VM_DecOperandRegI32("source_offset");
+ bool target_buffer_is_move;
+ iree_vm_ref_t* target_buffer_ref =
+ VM_DecOperandRegRef("target_buffer", &target_buffer_is_move);
+ iree_vm_buffer_t* target_buffer =
+ iree_vm_buffer_deref(*target_buffer_ref);
+ if (IREE_UNLIKELY(!target_buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "target_buffer is null");
+ }
+ uint32_t target_offset = VM_DecOperandRegI32("target_offset");
+ uint32_t length = VM_DecOperandRegI32("length");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_copy_bytes(
+ source_buffer, source_offset, target_buffer, target_offset, length));
+ });
+
+ DISPATCH_OP(CORE, BufferCompare, {
+ bool lhs_buffer_is_move;
+ iree_vm_ref_t* lhs_buffer_ref =
+ VM_DecOperandRegRef("lhs_buffer", &lhs_buffer_is_move);
+ iree_vm_buffer_t* lhs_buffer = iree_vm_buffer_deref(*lhs_buffer_ref);
+ if (IREE_UNLIKELY(!lhs_buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "lhs_buffer is null");
+ }
+ uint32_t lhs_offset = VM_DecOperandRegI32("lhs_offset");
+ bool rhs_buffer_is_move;
+ iree_vm_ref_t* rhs_buffer_ref =
+ VM_DecOperandRegRef("rhs_buffer", &rhs_buffer_is_move);
+ iree_vm_buffer_t* rhs_buffer = iree_vm_buffer_deref(*rhs_buffer_ref);
+ if (IREE_UNLIKELY(!rhs_buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "rhs_buffer is null");
+ }
+ uint32_t rhs_offset = VM_DecOperandRegI32("rhs_offset");
+ uint32_t length = VM_DecOperandRegI32("length");
+ uint32_t* result_ptr = VM_DecResultRegI32("result");
+ bool result = false;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_compare_bytes(
+ lhs_buffer, lhs_offset, rhs_buffer, rhs_offset, length, &result));
+ *result_ptr = result ? 1 : 0;
+ });
+
+ // TODO(benvanik): rework dispatch so that the FillI* ops can share the same
+ // body - they all only vary by the length passed to fill_elements. The
+ // gotcha is that on big-endian machines we'd have to flip around the bytes.
+ // See VMOpcodesCore.td for more information on the encoding.
+ DISPATCH_OP(CORE, BufferFillI8, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("target_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("target_offset");
+ uint32_t length = VM_DecOperandRegI32("length");
+ uint8_t value = (uint8_t)VM_DecOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_fill_elements(
+ buffer, offset, length / sizeof(uint8_t), sizeof(uint8_t), &value));
+ });
+ DISPATCH_OP(CORE, BufferFillI16, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("target_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("target_offset");
+ uint32_t length = VM_DecOperandRegI32("length");
+ uint16_t value = (uint16_t)VM_DecOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_fill_elements(
+ buffer, offset, length / sizeof(uint16_t), sizeof(uint16_t), &value));
+ });
+ DISPATCH_OP(CORE, BufferFillI32, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("target_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("target_offset");
+ uint32_t length = VM_DecOperandRegI32("length");
+ uint32_t value = VM_DecOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_fill_elements(
+ buffer, offset, length / sizeof(uint32_t), sizeof(uint32_t), &value));
+ });
+
+ // TODO(benvanik): rework dispatch so that the LoadI* ops can share the same
+ // body - they only vary on the length and sign/zero extension mode but
+ // can be packed into a single handler to reduce code-size.
+ // See VMOpcodesCore.td for more information on the encoding.
+ DISPATCH_OP(CORE, BufferLoadI8U, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("source_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "source_buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("source_offset");
+ uint32_t* result_ptr = VM_DecResultRegI32("result");
+ uint8_t result_x8 = 0;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_read_elements(
+ buffer, offset, &result_x8, 1, sizeof(result_x8)));
+ *result_ptr = vm_ext_i8i32u(result_x8);
+ });
+ DISPATCH_OP(CORE, BufferLoadI8S, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("source_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "source_buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("source_offset");
+ uint32_t* result_ptr = VM_DecResultRegI32("result");
+ int8_t result_x8 = 0;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_read_elements(
+ buffer, offset, &result_x8, 1, sizeof(result_x8)));
+ *result_ptr = vm_ext_i8i32s(result_x8);
+ });
+ DISPATCH_OP(CORE, BufferLoadI16U, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("source_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "source_buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("source_offset");
+ uint32_t* result_ptr = VM_DecResultRegI32("result");
+ uint16_t result_x16 = 0;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_read_elements(
+ buffer, offset, &result_x16, 1, sizeof(result_x16)));
+ *result_ptr = vm_ext_i16i32u(result_x16);
+ });
+ DISPATCH_OP(CORE, BufferLoadI16S, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("source_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "source_buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("source_offset");
+ uint32_t* result_ptr = VM_DecResultRegI32("result");
+ int16_t result_x16 = 0;
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_read_elements(
+ buffer, offset, &result_x16, 1, sizeof(result_x16)));
+ *result_ptr = vm_ext_i16i32s(result_x16);
+ });
+ DISPATCH_OP(CORE, BufferLoadI32, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("source_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "source_buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("source_offset");
+ uint32_t* result = VM_DecResultRegI32("result");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_read_elements(buffer, offset, result,
+ 1, sizeof(*result)));
+ });
+
+ // TODO(benvanik): rework dispatch so that the StoreI* ops can share the
+ // same body - they only vary on the length.
+ // See VMOpcodesCore.td for more information on the encoding.
+ DISPATCH_OP(CORE, BufferStoreI8, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("target_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "target_buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("target_offset");
+ uint8_t value = (uint8_t)VM_DecOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_write_elements(&value, buffer, offset,
+ 1, sizeof(uint8_t)));
+ });
+ DISPATCH_OP(CORE, BufferStoreI16, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("target_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "target_buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("target_offset");
+ uint16_t value = (uint16_t)VM_DecOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_write_elements(&value, buffer, offset,
+ 1, sizeof(uint16_t)));
+ });
+ DISPATCH_OP(CORE, BufferStoreI32, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("target_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "target_buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("target_offset");
+ uint32_t value = VM_DecOperandRegI32("value");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_write_elements(&value, buffer, offset,
+ 1, sizeof(uint32_t)));
+ });
+
+ //===------------------------------------------------------------------===//
+ // Lists
+ //===------------------------------------------------------------------===//
+
+ DISPATCH_OP(CORE, ListAlloc, {
+ const iree_vm_type_def_t* element_type_def = VM_DecTypeOf("element_type");
+ uint32_t initial_capacity = VM_DecOperandRegI32("initial_capacity");
+ bool result_is_move;
+ iree_vm_ref_t* result = VM_DecResultRegRef("result", &result_is_move);
+ iree_vm_list_t* list = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_list_create(
+ element_type_def, initial_capacity, module_state->allocator, &list));
+ IREE_RETURN_IF_ERROR(
+ iree_vm_ref_wrap_assign(list, iree_vm_list_type_id(), result));
+ });
+
+ DISPATCH_OP(CORE, ListReserve, {
+ bool list_is_move;
+ iree_vm_ref_t* list_ref = VM_DecOperandRegRef("list", &list_is_move);
+ iree_vm_list_t* list = iree_vm_list_deref(*list_ref);
+ if (IREE_UNLIKELY(!list)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "list is null");
+ }
+ uint32_t minimum_capacity = VM_DecOperandRegI32("minimum_capacity");
+ IREE_RETURN_IF_ERROR(iree_vm_list_reserve(list, minimum_capacity));
+ });
+
+ DISPATCH_OP(CORE, ListSize, {
+ bool list_is_move;
+ iree_vm_ref_t* list_ref = VM_DecOperandRegRef("list", &list_is_move);
+ iree_vm_list_t* list = iree_vm_list_deref(*list_ref);
+ if (IREE_UNLIKELY(!list)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "list is null");
+ }
+ int32_t* result = VM_DecResultRegI32("result");
+ *result = (int32_t)iree_vm_list_size(list);
+ });
+
+ DISPATCH_OP(CORE, ListResize, {
+ bool list_is_move;
+ iree_vm_ref_t* list_ref = VM_DecOperandRegRef("list", &list_is_move);
+ iree_vm_list_t* list = iree_vm_list_deref(*list_ref);
+ if (IREE_UNLIKELY(!list)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "list is null");
+ }
+ uint32_t new_size = VM_DecOperandRegI32("new_size");
+ IREE_RETURN_IF_ERROR(iree_vm_list_resize(list, new_size));
+ });
+
+ DISPATCH_OP(CORE, ListGetI32, {
+ bool list_is_move;
+ iree_vm_ref_t* list_ref = VM_DecOperandRegRef("list", &list_is_move);
+ iree_vm_list_t* list = iree_vm_list_deref(*list_ref);
+ if (IREE_UNLIKELY(!list)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "list is null");
+ }
+ uint32_t index = VM_DecOperandRegI32("index");
+ int32_t* result = VM_DecResultRegI32("result");
+ iree_vm_value_t value;
+ IREE_RETURN_IF_ERROR(iree_vm_list_get_value_as(
+ list, index, IREE_VM_VALUE_TYPE_I32, &value));
+ *result = value.i32;
+ });
+
+ DISPATCH_OP(CORE, ListSetI32, {
+ bool list_is_move;
+ iree_vm_ref_t* list_ref = VM_DecOperandRegRef("list", &list_is_move);
+ iree_vm_list_t* list = iree_vm_list_deref(*list_ref);
+ if (IREE_UNLIKELY(!list)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "list is null");
+ }
+ uint32_t index = VM_DecOperandRegI32("index");
+ int32_t raw_value = VM_DecOperandRegI32("raw_value");
+ iree_vm_value_t value = iree_vm_value_make_i32(raw_value);
+ IREE_RETURN_IF_ERROR(iree_vm_list_set_value(list, index, &value));
+ });
+
+ DISPATCH_OP(CORE, ListGetRef, {
+ bool list_is_move;
+ iree_vm_ref_t* list_ref = VM_DecOperandRegRef("list", &list_is_move);
+ iree_vm_list_t* list = iree_vm_list_deref(*list_ref);
+ if (IREE_UNLIKELY(!list)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "list is null");
+ }
+ uint32_t index = VM_DecOperandRegI32("index");
+ const iree_vm_type_def_t* type_def = VM_DecTypeOf("result");
+ bool result_is_move;
+ iree_vm_ref_t* result = VM_DecResultRegRef("result", &result_is_move);
+ // TODO(benvanik): use result_is_move with a _retain_or_move.
+ IREE_RETURN_IF_ERROR(iree_vm_list_get_ref_retain(list, index, result));
+ if (result->type != IREE_VM_REF_TYPE_NULL &&
+ (iree_vm_type_def_is_value(type_def) ||
+ result->type != type_def->ref_type)) {
+ // Type mismatch; put null in the register instead.
+ // TODO(benvanik): return an error here and make a query type method?
+ iree_vm_ref_release(result);
+ }
+ });
+
+ DISPATCH_OP(CORE, ListSetRef, {
+ bool list_is_move;
+ iree_vm_ref_t* list_ref = VM_DecOperandRegRef("list", &list_is_move);
+ iree_vm_list_t* list = iree_vm_list_deref(*list_ref);
+ if (IREE_UNLIKELY(!list)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "list is null");
+ }
+ uint32_t index = VM_DecOperandRegI32("index");
+ bool operand_is_move;
+ iree_vm_ref_t* operand = VM_DecOperandRegRef("value", &operand_is_move);
+ if (operand_is_move) {
+ IREE_RETURN_IF_ERROR(iree_vm_list_set_ref_move(list, index, operand));
+ } else {
+ IREE_RETURN_IF_ERROR(iree_vm_list_set_ref_retain(list, index, operand));
+ }
+ });
+
+ //===------------------------------------------------------------------===//
+ // Conditional assignment
+ //===------------------------------------------------------------------===//
+
+ DISPATCH_OP(CORE, SelectI32, {
+ int32_t condition = VM_DecOperandRegI32("condition");
+ int32_t true_value = VM_DecOperandRegI32("true_value");
+ int32_t false_value = VM_DecOperandRegI32("false_value");
+ int32_t* result = VM_DecResultRegI32("result");
+ *result = vm_select_i32(condition, true_value, false_value);
+ });
+
+ DISPATCH_OP(CORE, SelectRef, {
+ int32_t condition = VM_DecOperandRegI32("condition");
+ // TODO(benvanik): remove the type_id and use either LHS/RHS (if both are
+ // null then output is always null so no need to know the type).
+ const iree_vm_type_def_t* type_def = VM_DecTypeOf("true_value");
+ bool true_value_is_move;
+ iree_vm_ref_t* true_value =
+ VM_DecOperandRegRef("true_value", &true_value_is_move);
+ bool false_value_is_move;
+ iree_vm_ref_t* false_value =
+ VM_DecOperandRegRef("false_value", &false_value_is_move);
+ bool result_is_move;
+ iree_vm_ref_t* result = VM_DecResultRegRef("result", &result_is_move);
+ if (condition) {
+ // Select LHS.
+ IREE_RETURN_IF_ERROR(iree_vm_ref_retain_or_move_checked(
+ true_value_is_move, true_value, type_def->ref_type, result));
+ if (false_value_is_move && false_value != result) {
+ iree_vm_ref_release(false_value);
+ }
+ } else {
+ // Select RHS.
+ IREE_RETURN_IF_ERROR(iree_vm_ref_retain_or_move_checked(
+ false_value_is_move, false_value, type_def->ref_type, result));
+ if (true_value_is_move && true_value != result) {
+ iree_vm_ref_release(true_value);
+ }
+ }
+ });
+
+ DISPATCH_OP(CORE, SwitchI32, {
+ int32_t index = VM_DecOperandRegI32("index");
+ int32_t default_value = VM_DecIntAttr32("default_value");
+ const iree_vm_register_list_t* value_reg_list =
+ VM_DecVariadicOperands("values");
+ int32_t* result = VM_DecResultRegI32("result");
+ if (index >= 0 && index < value_reg_list->size) {
+ *result = regs.i32[value_reg_list->registers[index] & regs.i32_mask];
+ } else {
+ *result = default_value;
+ }
+ });
+
+ DISPATCH_OP(CORE, SwitchRef, {
+ int32_t index = VM_DecOperandRegI32("index");
+ const iree_vm_type_def_t* type_def = VM_DecTypeOf("result");
+ bool default_is_move;
+ iree_vm_ref_t* default_value =
+ VM_DecOperandRegRef("default_value", &default_is_move);
+ const iree_vm_register_list_t* value_reg_list =
+ VM_DecVariadicOperands("values");
+ bool result_is_move;
+ iree_vm_ref_t* result = VM_DecResultRegRef("result", &result_is_move);
+ if (index >= 0 && index < value_reg_list->size) {
+ bool is_move =
+ value_reg_list->registers[index] & IREE_REF_REGISTER_MOVE_BIT;
+ iree_vm_ref_t* new_value =
+ ®s.ref[value_reg_list->registers[index] & regs.ref_mask];
+ IREE_RETURN_IF_ERROR(iree_vm_ref_retain_or_move_checked(
+ is_move, new_value, type_def->ref_type, result));
+ } else {
+ IREE_RETURN_IF_ERROR(iree_vm_ref_retain_or_move_checked(
+ default_is_move, default_value, type_def->ref_type, result));
+ }
+ });
+
+ //===------------------------------------------------------------------===//
+ // Native integer arithmetic
+ //===------------------------------------------------------------------===//
+
+ DISPATCH_OP_CORE_BINARY_I32(AddI32, vm_add_i32);
+ DISPATCH_OP_CORE_BINARY_I32(SubI32, vm_sub_i32);
+ DISPATCH_OP_CORE_BINARY_I32(MulI32, vm_mul_i32);
+ DISPATCH_OP_CORE_BINARY_I32(DivI32S, vm_div_i32s);
+ DISPATCH_OP_CORE_BINARY_I32(DivI32U, vm_div_i32u);
+ DISPATCH_OP_CORE_BINARY_I32(RemI32S, vm_rem_i32s);
+ DISPATCH_OP_CORE_BINARY_I32(RemI32U, vm_rem_i32u);
+ DISPATCH_OP_CORE_TERNARY_I32(FMAI32, vm_fma_i32);
+ DISPATCH_OP_CORE_UNARY_I32(NotI32, vm_not_i32);
+ DISPATCH_OP_CORE_BINARY_I32(AndI32, vm_and_i32);
+ DISPATCH_OP_CORE_BINARY_I32(OrI32, vm_or_i32);
+ DISPATCH_OP_CORE_BINARY_I32(XorI32, vm_xor_i32);
+
+ //===------------------------------------------------------------------===//
+ // Casting and type conversion/emulation
+ //===------------------------------------------------------------------===//
+
+ DISPATCH_OP_CORE_UNARY_I32(TruncI32I8, vm_trunc_i32i8);
+ DISPATCH_OP_CORE_UNARY_I32(TruncI32I16, vm_trunc_i32i16);
+ DISPATCH_OP_CORE_UNARY_I32(ExtI8I32S, vm_ext_i8i32s);
+ DISPATCH_OP_CORE_UNARY_I32(ExtI8I32U, vm_ext_i8i32u);
+ DISPATCH_OP_CORE_UNARY_I32(ExtI16I32S, vm_ext_i16i32s);
+ DISPATCH_OP_CORE_UNARY_I32(ExtI16I32U, vm_ext_i16i32u);
+
+ //===------------------------------------------------------------------===//
+ // Native bitwise shifts and rotates
+ //===------------------------------------------------------------------===//
+
+#define DISPATCH_OP_CORE_SHIFT_I32(op_name, op_func) \
+ DISPATCH_OP(CORE, op_name, { \
+ int32_t operand = VM_DecOperandRegI32("operand"); \
+ int32_t amount = VM_DecOperandRegI32("amount"); \
+ int32_t* result = VM_DecResultRegI32("result"); \
+ *result = op_func(operand, amount); \
+ });
+
+ DISPATCH_OP_CORE_SHIFT_I32(ShlI32, vm_shl_i32);
+ DISPATCH_OP_CORE_SHIFT_I32(ShrI32S, vm_shr_i32s);
+ DISPATCH_OP_CORE_SHIFT_I32(ShrI32U, vm_shr_i32u);
+
+ //===------------------------------------------------------------------===//
+ // Comparison ops
+ //===------------------------------------------------------------------===//
+
+ DISPATCH_OP_CORE_BINARY_I32(CmpEQI32, vm_cmp_eq_i32);
+ DISPATCH_OP_CORE_BINARY_I32(CmpNEI32, vm_cmp_ne_i32);
+ DISPATCH_OP_CORE_BINARY_I32(CmpLTI32S, vm_cmp_lt_i32s);
+ DISPATCH_OP_CORE_BINARY_I32(CmpLTI32U, vm_cmp_lt_i32u);
+ DISPATCH_OP_CORE_UNARY_I32(CmpNZI32, vm_cmp_nz_i32);
+
+ DISPATCH_OP(CORE, CmpEQRef, {
+ bool lhs_is_move;
+ iree_vm_ref_t* lhs = VM_DecOperandRegRef("lhs", &lhs_is_move);
+ bool rhs_is_move;
+ iree_vm_ref_t* rhs = VM_DecOperandRegRef("rhs", &rhs_is_move);
+ int32_t* result = VM_DecResultRegI32("result");
+ *result = vm_cmp_eq_ref(lhs, rhs);
+ if (lhs_is_move) iree_vm_ref_release(lhs);
+ if (rhs_is_move) iree_vm_ref_release(rhs);
+ });
+ DISPATCH_OP(CORE, CmpNERef, {
+ bool lhs_is_move;
+ iree_vm_ref_t* lhs = VM_DecOperandRegRef("lhs", &lhs_is_move);
+ bool rhs_is_move;
+ iree_vm_ref_t* rhs = VM_DecOperandRegRef("rhs", &rhs_is_move);
+ int32_t* result = VM_DecResultRegI32("result");
+ *result = vm_cmp_ne_ref(lhs, rhs);
+ if (lhs_is_move) iree_vm_ref_release(lhs);
+ if (rhs_is_move) iree_vm_ref_release(rhs);
+ });
+ DISPATCH_OP(CORE, CmpNZRef, {
+ bool operand_is_move;
+ iree_vm_ref_t* operand = VM_DecOperandRegRef("operand", &operand_is_move);
+ int32_t* result = VM_DecResultRegI32("result");
+ *result = vm_cmp_nz_ref(operand);
+ if (operand_is_move) iree_vm_ref_release(operand);
+ });
+
+ //===------------------------------------------------------------------===//
+ // Control flow
+ //===------------------------------------------------------------------===//
+
+ DISPATCH_OP(CORE, Branch, {
+ int32_t block_pc = VM_DecBranchTarget("dest");
+ const iree_vm_register_remap_list_t* remap_list =
+ VM_DecBranchOperands("operands");
+ pc = block_pc;
+ iree_vm_bytecode_dispatch_remap_branch_registers(regs, remap_list);
+ });
+
+ DISPATCH_OP(CORE, CondBranch, {
+ int32_t condition = VM_DecOperandRegI32("condition");
+ int32_t true_block_pc = VM_DecBranchTarget("true_dest");
+ const iree_vm_register_remap_list_t* true_remap_list =
+ VM_DecBranchOperands("true_operands");
+ int32_t false_block_pc = VM_DecBranchTarget("false_dest");
+ const iree_vm_register_remap_list_t* false_remap_list =
+ VM_DecBranchOperands("false_operands");
+ if (condition) {
+ pc = true_block_pc;
+ iree_vm_bytecode_dispatch_remap_branch_registers(regs, true_remap_list);
+ } else {
+ pc = false_block_pc;
+ iree_vm_bytecode_dispatch_remap_branch_registers(regs,
+ false_remap_list);
+ }
+ });
+
+ DISPATCH_OP(CORE, Call, {
+ int32_t function_ordinal = VM_DecFuncAttr("callee");
+ const iree_vm_register_list_t* src_reg_list =
+ VM_DecVariadicOperands("operands");
+ const iree_vm_register_list_t* dst_reg_list =
+ VM_DecVariadicResults("results");
+ current_frame->pc = pc;
+
+ // NOTE: we assume validation has ensured these functions exist.
+ // TODO(benvanik): something more clever than just a high bit?
+ int is_import = (function_ordinal & 0x80000000u) != 0;
+ if (is_import) {
+ // Call import (and possible yield).
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_call_import(
+ stack, module_state, function_ordinal, regs, src_reg_list,
+ dst_reg_list, ¤t_frame, ®s, out_result));
+ } else {
+ // Switch execution to the target function and continue running in the
+ // bytecode dispatcher.
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_internal_enter(
+ stack, current_frame->function.module, function_ordinal,
+ src_reg_list, dst_reg_list, ¤t_frame, ®s));
+ bytecode_data =
+ module->bytecode_data.data +
+ module->function_descriptor_table[function_ordinal].bytecode_offset;
+ pc = current_frame->pc;
+ }
+ });
+
+ DISPATCH_OP(CORE, CallVariadic, {
+ // TODO(benvanik): dedupe with above or merge and always have the seg size
+ // list be present (but empty) for non-variadic calls.
+ int32_t function_ordinal = VM_DecFuncAttr("callee");
+ const iree_vm_register_list_t* segment_size_list =
+ VM_DecVariadicOperands("segment_sizes");
+ const iree_vm_register_list_t* src_reg_list =
+ VM_DecVariadicOperands("operands");
+ const iree_vm_register_list_t* dst_reg_list =
+ VM_DecVariadicResults("results");
+ current_frame->pc = pc;
+
+ // NOTE: we assume validation has ensured these functions exist.
+ // TODO(benvanik): something more clever than just a high bit?
+ int is_import = (function_ordinal & 0x80000000u) != 0;
+ if (IREE_UNLIKELY(!is_import)) {
+ // Variadic calls are currently only supported for import functions.
+ return iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "variadic calls only supported for internal callees");
+ }
+
+ // Call import (and possible yield).
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_call_import_variadic(
+ stack, module_state, function_ordinal, regs, segment_size_list,
+ src_reg_list, dst_reg_list, ¤t_frame, ®s, out_result));
+ });
+
+ DISPATCH_OP(CORE, Return, {
+ const iree_vm_register_list_t* src_reg_list =
+ VM_DecVariadicOperands("operands");
+ current_frame->pc = pc;
+
+ if (current_frame->depth <= entry_frame_depth) {
+ // Return from the top-level entry frame - return back to call().
+ return iree_vm_bytecode_external_leave(stack, current_frame, ®s,
+ src_reg_list, cconv_results,
+ call->results);
+ }
+
+ // Store results into the caller frame and pop back to the parent.
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_internal_leave(
+ stack, current_frame, regs, src_reg_list, ¤t_frame, ®s));
+
+ // Reset dispatch state so we can continue executing in the caller.
+ bytecode_data =
+ module->bytecode_data.data +
+ module->function_descriptor_table[current_frame->function.ordinal]
+ .bytecode_offset;
+ pc = current_frame->pc;
+ });
+
+ DISPATCH_OP(CORE, Fail, {
+ uint32_t status_code = VM_DecOperandRegI32("status");
+ iree_string_view_t message;
+ VM_DecStrAttr("message", &message);
+ if (status_code != 0) {
+ // TODO(benvanik): capture source information.
+ return iree_status_allocate_f(status_code, "<vm>", 0, "%.*s",
+ (int)message.size, message.data);
+ }
+ });
+
+ DISPATCH_OP(CORE, ImportResolved, {
+ uint32_t function_ordinal = VM_DecFuncAttr("import");
+ int32_t* result = VM_DecResultRegI32("result");
+ uint32_t import_ordinal = function_ordinal & 0x7FFFFFFFu;
+ if (IREE_UNLIKELY(import_ordinal >= module_state->import_count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "import ordinal out of range");
+ }
+ const iree_vm_bytecode_import_t* import =
+ &module_state->import_table[import_ordinal];
+ *result = import->function.module != NULL ? 1 : 0;
+ });
+
+ //===------------------------------------------------------------------===//
+ // Async/fiber ops
+ //===------------------------------------------------------------------===//
+
+ DISPATCH_OP(CORE, Yield, {
+ // Perform branch before yielding; in this way we will resume at the
+ // target without needing to retain any information about the yield.
+ int32_t block_pc = VM_DecBranchTarget("dest");
+ const iree_vm_register_remap_list_t* remap_list =
+ VM_DecBranchOperands("operands");
+ iree_vm_bytecode_dispatch_remap_branch_registers(regs, remap_list);
+ pc = block_pc;
+
+ // Return magic status code indicating a yield.
+ // This isn't an error, though callers not supporting coroutines will
+ // treat it as one and propagate it up.
+ return iree_status_from_code(IREE_STATUS_DEFERRED);
+ });
+
+ //===------------------------------------------------------------------===//
+ // Debugging
+ //===------------------------------------------------------------------===//
+
+ DISPATCH_OP(CORE, Trace, {
+ iree_string_view_t event_name;
+ VM_DecStrAttr("event_name", &event_name);
+ const iree_vm_register_list_t* src_reg_list =
+ VM_DecVariadicOperands("operands");
+ // TODO(benvanik): trace (if enabled).
+ iree_vm_bytecode_dispatch_discard_registers(regs, src_reg_list);
+ });
+
+ DISPATCH_OP(CORE, Print, {
+ iree_string_view_t event_name;
+ VM_DecStrAttr("event_name", &event_name);
+ const iree_vm_register_list_t* src_reg_list =
+ VM_DecVariadicOperands("operands");
+ // TODO(benvanik): print.
+ iree_vm_bytecode_dispatch_discard_registers(regs, src_reg_list);
+ });
+
+ DISPATCH_OP(CORE, Break, {
+ // TODO(benvanik): break unconditionally.
+ int32_t block_pc = VM_DecBranchTarget("dest");
+ const iree_vm_register_remap_list_t* remap_list =
+ VM_DecBranchOperands("operands");
+ iree_vm_bytecode_dispatch_remap_branch_registers(regs, remap_list);
+ pc = block_pc;
+ });
+
+ DISPATCH_OP(CORE, CondBreak, {
+ int32_t condition = VM_DecOperandRegI32("condition");
+ if (condition) {
+ // TODO(benvanik): cond break.
+ }
+ int32_t block_pc = VM_DecBranchTarget("dest");
+ const iree_vm_register_remap_list_t* remap_list =
+ VM_DecBranchOperands("operands");
+ iree_vm_bytecode_dispatch_remap_branch_registers(regs, remap_list);
+ pc = block_pc;
+ });
+
+ //===------------------------------------------------------------------===//
+ // Extension trampolines
+ //===------------------------------------------------------------------===//
+
+#if IREE_VM_EXT_I64_ENABLE
+ BEGIN_DISPATCH_PREFIX(PrefixExtI64, EXT_I64) {
+ //===----------------------------------------------------------------===//
+ // ExtI64: Globals
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP(EXT_I64, GlobalLoadI64, {
+ uint32_t byte_offset = VM_DecGlobalAttr("global");
+ if (IREE_UNLIKELY(byte_offset >=
+ module_state->rwdata_storage.data_length)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
+ module_state->rwdata_storage.data_length);
+ }
+ int64_t* value = VM_DecResultRegI64("value");
+ const int64_t global_value =
+ vm_global_load_i64(module_state->rwdata_storage.data, byte_offset);
+ *value = global_value;
+ });
+
+ DISPATCH_OP(EXT_I64, GlobalStoreI64, {
+ uint32_t byte_offset = VM_DecGlobalAttr("global");
+ if (IREE_UNLIKELY(byte_offset >=
+ module_state->rwdata_storage.data_length)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
+ module_state->rwdata_storage.data_length);
+ }
+ int64_t value = VM_DecOperandRegI64("value");
+ vm_global_store_i64(module_state->rwdata_storage.data, byte_offset,
+ value);
+ });
+
+ DISPATCH_OP(EXT_I64, GlobalLoadIndirectI64, {
+ uint32_t byte_offset = VM_DecOperandRegI32("global");
+ if (IREE_UNLIKELY(byte_offset >=
+ module_state->rwdata_storage.data_length)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
+ module_state->rwdata_storage.data_length);
+ }
+ int64_t* value = VM_DecResultRegI64("value");
+ const int64_t global_value =
+ vm_global_load_i64(module_state->rwdata_storage.data, byte_offset);
+ *value = global_value;
+ });
+
+ DISPATCH_OP(EXT_I64, GlobalStoreIndirectI64, {
+ uint32_t byte_offset = VM_DecOperandRegI32("global");
+ if (IREE_UNLIKELY(byte_offset >=
+ module_state->rwdata_storage.data_length)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
+ module_state->rwdata_storage.data_length);
+ }
+ int64_t value = VM_DecOperandRegI64("value");
+ vm_global_store_i64(module_state->rwdata_storage.data, byte_offset,
+ value);
+ });
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Constants
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP(EXT_I64, ConstI64, {
+ int64_t value = VM_DecIntAttr64("value");
+ int64_t* result = VM_DecResultRegI64("result");
+ *result = value;
+ });
+
+ DISPATCH_OP(EXT_I64, ConstI64Zero, {
+ int64_t* result = VM_DecResultRegI64("result");
+ *result = 0;
+ });
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Lists
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP(EXT_I64, ListGetI64, {
+ bool list_is_move;
+ iree_vm_ref_t* list_ref = VM_DecOperandRegRef("list", &list_is_move);
+ iree_vm_list_t* list = iree_vm_list_deref(*list_ref);
+ if (IREE_UNLIKELY(!list)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "list is null");
+ }
+ uint32_t index = VM_DecOperandRegI32("index");
+ int64_t* result = VM_DecResultRegI64("result");
+ iree_vm_value_t value;
+ IREE_RETURN_IF_ERROR(iree_vm_list_get_value_as(
+ list, index, IREE_VM_VALUE_TYPE_I64, &value));
+ *result = value.i64;
+ });
+
+ DISPATCH_OP(EXT_I64, ListSetI64, {
+ bool list_is_move;
+ iree_vm_ref_t* list_ref = VM_DecOperandRegRef("list", &list_is_move);
+ iree_vm_list_t* list = iree_vm_list_deref(*list_ref);
+ if (IREE_UNLIKELY(!list)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "list is null");
+ }
+ uint32_t index = VM_DecOperandRegI32("index");
+ int64_t raw_value = VM_DecOperandRegI64("value");
+ iree_vm_value_t value = iree_vm_value_make_i64(raw_value);
+ IREE_RETURN_IF_ERROR(iree_vm_list_set_value(list, index, &value));
+ });
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Conditional assignment
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP(EXT_I64, SelectI64, {
+ int32_t condition = VM_DecOperandRegI32("condition");
+ int64_t true_value = VM_DecOperandRegI64("true_value");
+ int64_t false_value = VM_DecOperandRegI64("false_value");
+ int64_t* result = VM_DecResultRegI64("result");
+ *result = vm_select_i64(condition, true_value, false_value);
+ });
+
+ DISPATCH_OP(EXT_I64, SwitchI64, {
+ int32_t index = VM_DecOperandRegI32("index");
+ int64_t default_value = VM_DecIntAttr64("default_value");
+ const iree_vm_register_list_t* value_reg_list =
+ VM_DecVariadicOperands("values");
+ int64_t* result = VM_DecResultRegI64("result");
+ if (index >= 0 && index < value_reg_list->size) {
+ *result =
+ regs.i32[value_reg_list->registers[index] & (regs.i32_mask & ~1)];
+ } else {
+ *result = default_value;
+ }
+ });
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Native integer arithmetic
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP_EXT_I64_BINARY_I64(AddI64, vm_add_i64);
+ DISPATCH_OP_EXT_I64_BINARY_I64(SubI64, vm_sub_i64);
+ DISPATCH_OP_EXT_I64_BINARY_I64(MulI64, vm_mul_i64);
+ DISPATCH_OP_EXT_I64_BINARY_I64(DivI64S, vm_div_i64s);
+ DISPATCH_OP_EXT_I64_BINARY_I64(DivI64U, vm_div_i64u);
+ DISPATCH_OP_EXT_I64_BINARY_I64(RemI64S, vm_rem_i64s);
+ DISPATCH_OP_EXT_I64_BINARY_I64(RemI64U, vm_rem_i64u);
+ DISPATCH_OP_EXT_I64_TERNARY_I64(FMAI64, vm_fma_i64);
+ DISPATCH_OP_EXT_I64_UNARY_I64(NotI64, vm_not_i64);
+ DISPATCH_OP_EXT_I64_BINARY_I64(AndI64, vm_and_i64);
+ DISPATCH_OP_EXT_I64_BINARY_I64(OrI64, vm_or_i64);
+ DISPATCH_OP_EXT_I64_BINARY_I64(XorI64, vm_xor_i64);
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Casting and type conversion/emulation
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP(EXT_I64, TruncI64I32, {
+ int64_t operand = VM_DecOperandRegI64("operand");
+ int32_t* result = VM_DecResultRegI32("result");
+ *result = vm_trunc_i64i32(operand);
+ });
+ DISPATCH_OP(EXT_I64, ExtI32I64S, {
+ int32_t operand = VM_DecOperandRegI32("operand");
+ int64_t* result = VM_DecResultRegI64("result");
+ *result = vm_ext_i32i64s(operand);
+ });
+ DISPATCH_OP(EXT_I64, ExtI32I64U, {
+ int32_t operand = VM_DecOperandRegI32("operand");
+ int64_t* result = VM_DecResultRegI64("result");
+ *result = vm_ext_i32i64u(operand);
+ });
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Native bitwise shifts and rotates
+ //===----------------------------------------------------------------===//
+
+#define DISPATCH_OP_EXT_I64_SHIFT_I64(op_name, op_func) \
+ DISPATCH_OP(EXT_I64, op_name, { \
+ int64_t operand = VM_DecOperandRegI64("operand"); \
+ int32_t amount = VM_DecOperandRegI32("amount"); \
+ int64_t* result = VM_DecResultRegI64("result"); \
+ *result = op_func(operand, amount); \
+ });
+
+ DISPATCH_OP_EXT_I64_SHIFT_I64(ShlI64, vm_shl_i64);
+ DISPATCH_OP_EXT_I64_SHIFT_I64(ShrI64S, vm_shr_i64s);
+ DISPATCH_OP_EXT_I64_SHIFT_I64(ShrI64U, vm_shr_i64u);
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Comparison ops
+ //===----------------------------------------------------------------===//
+
+#define DISPATCH_OP_EXT_I64_CMP_I64(op_name, op_func) \
+ DISPATCH_OP(EXT_I64, op_name, { \
+ int64_t lhs = VM_DecOperandRegI64("lhs"); \
+ int64_t rhs = VM_DecOperandRegI64("rhs"); \
+ int32_t* result = VM_DecResultRegI32("result"); \
+ *result = op_func(lhs, rhs); \
+ });
+
+ DISPATCH_OP_EXT_I64_CMP_I64(CmpEQI64, vm_cmp_eq_i64);
+ DISPATCH_OP_EXT_I64_CMP_I64(CmpNEI64, vm_cmp_ne_i64);
+ DISPATCH_OP_EXT_I64_CMP_I64(CmpLTI64S, vm_cmp_lt_i64s);
+ DISPATCH_OP_EXT_I64_CMP_I64(CmpLTI64U, vm_cmp_lt_i64u);
+ DISPATCH_OP(EXT_I64, CmpNZI64, {
+ int64_t operand = VM_DecOperandRegI64("operand");
+ int32_t* result = VM_DecResultRegI32("result");
+ *result = vm_cmp_nz_i64(operand);
+ });
+
+ //===----------------------------------------------------------------===//
+ // ExtI64: Buffers
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP(EXT_I64, BufferFillI64, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("target_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("target_offset");
+ uint32_t length = VM_DecOperandRegI32("length");
+ uint64_t value = VM_DecOperandRegI64("value");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_fill_elements(
+ buffer, offset, length / sizeof(uint64_t), sizeof(uint64_t),
+ &value));
+ });
+
+ DISPATCH_OP(EXT_I64, BufferLoadI64, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("source_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "source_buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("source_offset");
+ uint64_t* result = VM_DecResultRegI64("result");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_read_elements(
+ buffer, offset, result, 1, sizeof(*result)));
+ });
+
+ DISPATCH_OP(EXT_I64, BufferStoreI64, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("target_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "target_buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("target_offset");
+ uint64_t value = (uint64_t)VM_DecOperandRegI64("value");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_write_elements(
+ &value, buffer, offset, 1, sizeof(uint64_t)));
+ });
+ }
+ END_DISPATCH_PREFIX();
+#else
+ UNHANDLED_DISPATCH_PREFIX(PrefixExtI64, EXT_I64);
+#endif // IREE_VM_EXT_I64_ENABLE
+
+#if IREE_VM_EXT_F32_ENABLE
+ BEGIN_DISPATCH_PREFIX(PrefixExtF32, EXT_F32) {
+ //===----------------------------------------------------------------===//
+ // ExtF32: Globals
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP(EXT_F32, GlobalLoadF32, {
+ uint32_t byte_offset = VM_DecGlobalAttr("global");
+ if (IREE_UNLIKELY(byte_offset >=
+ module_state->rwdata_storage.data_length)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
+ module_state->rwdata_storage.data_length);
+ }
+ float* value = VM_DecResultRegF32("value");
+ const float global_value =
+ vm_global_load_f32(module_state->rwdata_storage.data, byte_offset);
+ *value = global_value;
+ });
+
+ DISPATCH_OP(EXT_F32, GlobalStoreF32, {
+ uint32_t byte_offset = VM_DecGlobalAttr("global");
+ if (IREE_UNLIKELY(byte_offset >=
+ module_state->rwdata_storage.data_length)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
+ module_state->rwdata_storage.data_length);
+ }
+ float value = VM_DecOperandRegF32("value");
+ vm_global_store_f32(module_state->rwdata_storage.data, byte_offset,
+ value);
+ });
+
+ DISPATCH_OP(EXT_F32, GlobalLoadIndirectF32, {
+ uint32_t byte_offset = VM_DecOperandRegI32("global");
+ if (IREE_UNLIKELY(byte_offset >=
+ module_state->rwdata_storage.data_length)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
+ module_state->rwdata_storage.data_length);
+ }
+ float* value = VM_DecResultRegF32("value");
+ const float global_value =
+ vm_global_load_f32(module_state->rwdata_storage.data, byte_offset);
+ *value = global_value;
+ });
+
+ DISPATCH_OP(EXT_F32, GlobalStoreIndirectF32, {
+ uint32_t byte_offset = VM_DecOperandRegI32("global");
+ if (IREE_UNLIKELY(byte_offset >=
+ module_state->rwdata_storage.data_length)) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
+ module_state->rwdata_storage.data_length);
+ }
+ float value = VM_DecOperandRegF32("value");
+ vm_global_store_f32(module_state->rwdata_storage.data, byte_offset,
+ value);
+ });
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Constants
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP(EXT_F32, ConstF32, {
+ float value = VM_DecFloatAttr32("value");
+ float* result = VM_DecResultRegF32("result");
+ *result = value;
+ });
+
+ DISPATCH_OP(EXT_F32, ConstF32Zero, {
+ float* result = VM_DecResultRegF32("result");
+ *result = 0;
+ });
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Lists
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP(EXT_F32, ListGetF32, {
+ bool list_is_move;
+ iree_vm_ref_t* list_ref = VM_DecOperandRegRef("list", &list_is_move);
+ iree_vm_list_t* list = iree_vm_list_deref(*list_ref);
+ if (IREE_UNLIKELY(!list)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "list is null");
+ }
+ uint32_t index = VM_DecOperandRegI32("index");
+ float* result = VM_DecResultRegF32("result");
+ iree_vm_value_t value;
+ IREE_RETURN_IF_ERROR(iree_vm_list_get_value_as(
+ list, index, IREE_VM_VALUE_TYPE_F32, &value));
+ *result = value.f32;
+ });
+
+ DISPATCH_OP(EXT_F32, ListSetF32, {
+ bool list_is_move;
+ iree_vm_ref_t* list_ref = VM_DecOperandRegRef("list", &list_is_move);
+ iree_vm_list_t* list = iree_vm_list_deref(*list_ref);
+ if (IREE_UNLIKELY(!list)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "list is null");
+ }
+ uint32_t index = VM_DecOperandRegI32("index");
+ float raw_value = VM_DecOperandRegF32("value");
+ iree_vm_value_t value = iree_vm_value_make_f32(raw_value);
+ IREE_RETURN_IF_ERROR(iree_vm_list_set_value(list, index, &value));
+ });
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Conditional assignment
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP(EXT_F32, SelectF32, {
+ int32_t condition = VM_DecOperandRegI32("condition");
+ float true_value = VM_DecOperandRegF32("true_value");
+ float false_value = VM_DecOperandRegF32("false_value");
+ float* result = VM_DecResultRegF32("result");
+ *result = vm_select_f32(condition, true_value, false_value);
+ });
+
+ DISPATCH_OP(EXT_F32, SwitchF32, {
+ int32_t index = VM_DecOperandRegI32("index");
+ float default_value = VM_DecFloatAttr32("default_value");
+ const iree_vm_register_list_t* value_reg_list =
+ VM_DecVariadicOperands("values");
+ float* result = VM_DecResultRegF32("result");
+ if (index >= 0 && index < value_reg_list->size) {
+ *result = *((float*)®s.i32[value_reg_list->registers[index] &
+ (regs.i32_mask & ~1)]);
+ } else {
+ *result = default_value;
+ }
+ });
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Native floating-point arithmetic
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP_EXT_F32_BINARY_F32(AddF32, vm_add_f32);
+ DISPATCH_OP_EXT_F32_BINARY_F32(SubF32, vm_sub_f32);
+ DISPATCH_OP_EXT_F32_BINARY_F32(MulF32, vm_mul_f32);
+ DISPATCH_OP_EXT_F32_BINARY_F32(DivF32, vm_div_f32);
+ DISPATCH_OP_EXT_F32_BINARY_F32(RemF32, vm_rem_f32);
+ DISPATCH_OP_EXT_F32_TERNARY_F32(FMAF32, vm_fma_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(AbsF32, vm_abs_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(NegF32, vm_neg_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(CeilF32, vm_ceil_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(FloorF32, vm_floor_f32);
+
+ DISPATCH_OP_EXT_F32_UNARY_F32(AtanF32, vm_atan_f32);
+ DISPATCH_OP_EXT_F32_BINARY_F32(Atan2F32, vm_atan2_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(CosF32, vm_cos_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(SinF32, vm_sin_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(ExpF32, vm_exp_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(Exp2F32, vm_exp2_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(ExpM1F32, vm_expm1_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(LogF32, vm_log_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(Log10F32, vm_log10_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(Log1pF32, vm_log1p_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(Log2F32, vm_log2_f32);
+ DISPATCH_OP_EXT_F32_BINARY_F32(PowF32, vm_pow_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(RsqrtF32, vm_rsqrt_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(SqrtF32, vm_sqrt_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(TanhF32, vm_tanh_f32);
+ DISPATCH_OP_EXT_F32_UNARY_F32(ErfF32, vm_erf_f32);
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Casting and type conversion/emulation
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP(EXT_F32, CastSI32F32, {
+ int32_t operand = (int32_t)VM_DecOperandRegI32("operand");
+ float* result = VM_DecResultRegF32("result");
+ *result = vm_cast_si32f32(operand);
+ });
+ DISPATCH_OP(EXT_F32, CastUI32F32, {
+ int32_t operand = (int32_t)VM_DecOperandRegI32("operand");
+ float* result = VM_DecResultRegF32("result");
+ *result = vm_cast_ui32f32(operand);
+ });
+ DISPATCH_OP(EXT_F32, CastF32SI32, {
+ float operand = VM_DecOperandRegF32("operand");
+ int32_t* result = VM_DecResultRegI32("result");
+ *result = vm_cast_f32si32(operand);
+ });
+ DISPATCH_OP(EXT_F32, CastF32UI32, {
+ float operand = VM_DecOperandRegF32("operand");
+ int32_t* result = VM_DecResultRegI32("result");
+ *result = vm_cast_f32ui32(operand);
+ });
+ DISPATCH_OP(EXT_F32, BitcastI32F32, {
+ int32_t operand = (int32_t)VM_DecOperandRegI32("operand");
+ float* result = VM_DecResultRegF32("result");
+ *result = vm_bitcast_i32f32(operand);
+ });
+ DISPATCH_OP(EXT_F32, BitcastF32I32, {
+ float operand = VM_DecOperandRegF32("operand");
+ int32_t* result = VM_DecResultRegI32("result");
+ *result = vm_bitcast_f32i32(operand);
+ });
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Comparison ops
+ //===----------------------------------------------------------------===//
+
+#define DISPATCH_OP_EXT_F32_CMP_F32(op_name, op_func) \
+ DISPATCH_OP(EXT_F32, op_name, { \
+ float lhs = VM_DecOperandRegF32("lhs"); \
+ float rhs = VM_DecOperandRegF32("rhs"); \
+ int32_t* result = VM_DecResultRegI32("result"); \
+ *result = op_func(lhs, rhs); \
+ });
+
+ DISPATCH_OP_EXT_F32_CMP_F32(CmpEQF32O, vm_cmp_eq_f32o);
+ DISPATCH_OP_EXT_F32_CMP_F32(CmpEQF32U, vm_cmp_eq_f32u);
+ DISPATCH_OP_EXT_F32_CMP_F32(CmpNEF32O, vm_cmp_ne_f32o);
+ DISPATCH_OP_EXT_F32_CMP_F32(CmpNEF32U, vm_cmp_ne_f32u);
+ DISPATCH_OP_EXT_F32_CMP_F32(CmpLTF32O, vm_cmp_lt_f32o);
+ DISPATCH_OP_EXT_F32_CMP_F32(CmpLTF32U, vm_cmp_lt_f32u);
+ DISPATCH_OP_EXT_F32_CMP_F32(CmpLTEF32O, vm_cmp_lte_f32o);
+ DISPATCH_OP_EXT_F32_CMP_F32(CmpLTEF32U, vm_cmp_lte_f32u);
+ DISPATCH_OP(EXT_F32, CmpNaNF32, {
+ float operand = VM_DecOperandRegF32("operand");
+ int32_t* result = VM_DecResultRegI32("result");
+ *result = vm_cmp_nan_f32(operand);
+ });
+
+ //===----------------------------------------------------------------===//
+ // ExtF32: Buffers
+ //===----------------------------------------------------------------===//
+
+ DISPATCH_OP(EXT_F32, BufferFillF32, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("target_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("target_offset");
+ uint32_t length = VM_DecOperandRegI32("length");
+ float value = VM_DecOperandRegF32("value");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_fill_elements(
+ buffer, offset, length / sizeof(float), sizeof(float), &value));
+ });
+
+ DISPATCH_OP(EXT_F32, BufferLoadF32, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("source_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "source_buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("source_offset");
+ float* result = VM_DecResultRegF32("result");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_read_elements(
+ buffer, offset, result, 1, sizeof(*result)));
+ });
+
+ DISPATCH_OP(EXT_F32, BufferStoreF32, {
+ bool buffer_is_move;
+ iree_vm_ref_t* buffer_ref =
+ VM_DecOperandRegRef("target_buffer", &buffer_is_move);
+ iree_vm_buffer_t* buffer = iree_vm_buffer_deref(*buffer_ref);
+ if (IREE_UNLIKELY(!buffer)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "target_buffer is null");
+ }
+ uint32_t offset = VM_DecOperandRegI32("target_offset");
+ float value = VM_DecOperandRegF32("value");
+ IREE_RETURN_IF_ERROR(iree_vm_buffer_write_elements(
+ &value, buffer, offset, 1, sizeof(float)));
+ });
+ }
+ END_DISPATCH_PREFIX();
+#else
+ UNHANDLED_DISPATCH_PREFIX(PrefixExtF32, EXT_F32);
+#endif // IREE_VM_EXT_F32_ENABLE
+
+ DISPATCH_OP(CORE, PrefixExtF64,
+ { return iree_make_status(IREE_STATUS_UNIMPLEMENTED); });
+
+ // NOLINTNEXTLINE(misc-static-assert)
+ DISPATCH_UNHANDLED_CORE();
+ }
+ END_DISPATCH_CORE();
+}
diff --git a/runtime/src/iree/vm/bytecode_dispatch_test.cc b/runtime/src/iree/vm/bytecode_dispatch_test.cc
new file mode 100644
index 0000000..fa2a1a5
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode_dispatch_test.cc
@@ -0,0 +1,137 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Tests covering the dispatch logic for individual ops.
+//
+// iree/vm/test/*.mlir contains the functions used here for testing. We
+// avoid defining the IR inline here so that we can run this test on platforms
+// that we can't run the full MLIR compiler stack on.
+
+#include "iree/base/logging.h"
+#include "iree/base/status_cc.h"
+#include "iree/testing/gtest.h"
+#include "iree/vm/api.h"
+#include "iree/vm/bytecode_module.h"
+
+// Compiled module embedded here to avoid file IO:
+#include "iree/vm/test/all_bytecode_modules.h"
+
+namespace {
+
+struct TestParams {
+ const struct iree_file_toc_t& module_file;
+ std::string function_name;
+};
+
+std::ostream& operator<<(std::ostream& os, const TestParams& params) {
+ std::string name{params.module_file.name};
+ auto name_sv = iree_make_string_view(name.data(), name.size());
+ iree_string_view_replace_char(name_sv, ':', '_');
+ iree_string_view_replace_char(name_sv, '.', '_');
+ return os << name << "_" << params.function_name;
+}
+
+std::vector<TestParams> GetModuleTestParams() {
+ std::vector<TestParams> test_params;
+
+ IREE_CHECK_OK(iree_vm_register_builtin_types());
+
+ const struct iree_file_toc_t* module_file_toc =
+ all_bytecode_modules_c_create();
+ for (size_t i = 0; i < all_bytecode_modules_c_size(); ++i) {
+ const auto& module_file = module_file_toc[i];
+ iree_vm_module_t* module = nullptr;
+ IREE_CHECK_OK(iree_vm_bytecode_module_create(
+ iree_const_byte_span_t{
+ reinterpret_cast<const uint8_t*>(module_file.data),
+ module_file.size},
+ iree_allocator_null(), iree_allocator_system(), &module));
+ iree_vm_module_signature_t signature = module->signature(module->self);
+ test_params.reserve(test_params.size() + signature.export_function_count);
+ for (int i = 0; i < signature.export_function_count; ++i) {
+ iree_vm_function_t function;
+ IREE_CHECK_OK(iree_vm_module_lookup_function_by_ordinal(
+ module, IREE_VM_FUNCTION_LINKAGE_EXPORT, i, &function));
+ iree_string_view_t function_name = iree_vm_function_name(&function);
+ test_params.push_back(
+ {module_file, std::string(function_name.data, function_name.size)});
+ }
+ iree_vm_module_release(module);
+ }
+
+ return test_params;
+}
+
+class VMBytecodeDispatchTest
+ : public ::testing::Test,
+ public ::testing::WithParamInterface<TestParams> {
+ protected:
+ virtual void SetUp() {
+ const auto& test_params = GetParam();
+
+ IREE_CHECK_OK(iree_vm_instance_create(iree_allocator_system(), &instance_));
+
+ IREE_CHECK_OK(iree_vm_bytecode_module_create(
+ iree_const_byte_span_t{
+ reinterpret_cast<const uint8_t*>(test_params.module_file.data),
+ test_params.module_file.size},
+ iree_allocator_null(), iree_allocator_system(), &bytecode_module_));
+
+ std::vector<iree_vm_module_t*> modules = {bytecode_module_};
+ IREE_CHECK_OK(iree_vm_context_create_with_modules(
+ instance_, IREE_VM_CONTEXT_FLAG_NONE, modules.data(), modules.size(),
+ iree_allocator_system(), &context_));
+ }
+
+ virtual void TearDown() {
+ iree_vm_module_release(bytecode_module_);
+ iree_vm_context_release(context_);
+ iree_vm_instance_release(instance_);
+ }
+
+ iree_status_t RunFunction(const char* function_name) {
+ iree_vm_function_t function;
+ IREE_CHECK_OK(iree_vm_module_lookup_function_by_name(
+ bytecode_module_, IREE_VM_FUNCTION_LINKAGE_EXPORT,
+ iree_make_cstring_view(function_name), &function));
+
+ return iree_vm_invoke(context_, function, IREE_VM_INVOCATION_FLAG_NONE,
+ /*policy=*/nullptr, /*inputs=*/nullptr,
+ /*outputs=*/nullptr, iree_allocator_system());
+ }
+
+ iree_vm_instance_t* instance_ = nullptr;
+ iree_vm_context_t* context_ = nullptr;
+ iree_vm_module_t* bytecode_module_ = nullptr;
+};
+
+TEST_P(VMBytecodeDispatchTest, Check) {
+ const auto& test_params = GetParam();
+ bool expect_failure = test_params.function_name.find("fail_") == 0;
+
+ iree_status_t status = RunFunction(test_params.function_name.c_str());
+ if (iree_status_is_ok(status)) {
+ if (expect_failure) {
+ GTEST_FAIL() << "Function expected failure but succeeded";
+ } else {
+ GTEST_SUCCEED();
+ }
+ } else {
+ if (expect_failure) {
+ iree_status_ignore(status);
+ GTEST_SUCCEED();
+ } else {
+ GTEST_FAIL() << "Function expected success but failed with error: "
+ << iree::Status(std::move(status));
+ }
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(VMIRFunctions, VMBytecodeDispatchTest,
+ ::testing::ValuesIn(GetModuleTestParams()),
+ ::testing::PrintToStringParamName());
+
+} // namespace
diff --git a/runtime/src/iree/vm/bytecode_dispatch_util.h b/runtime/src/iree/vm/bytecode_dispatch_util.h
new file mode 100644
index 0000000..676b342
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode_dispatch_util.h
@@ -0,0 +1,500 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_BYTECODE_DISPATCH_UTIL_H_
+#define IREE_VM_BYTECODE_DISPATCH_UTIL_H_
+
+#include <assert.h>
+#include <string.h>
+
+#include "iree/base/alignment.h"
+#include "iree/base/config.h"
+#include "iree/base/target_platform.h"
+#include "iree/vm/bytecode_module_impl.h"
+#include "iree/vm/generated/bytecode_op_table.h"
+
+//===----------------------------------------------------------------------===//
+// Shared data structures
+//===----------------------------------------------------------------------===//
+//
+// Register bounds checking
+// ------------------------
+// All accesses into the register lists are truncated to the valid range for the
+// typed bank. This allows us to directly use the register ordinals from the
+// bytecode without needing to perform any validation at load-time or run-time.
+// The worst that can happen is that the bytecode program being executed doesn't
+// work as intended - which, with a working compiler, shouldn't happen. Though
+// there are cases where the runtime produces the register values and may know
+// that they are in range it's a good habit to always mask the ordinal by the
+// type-specific mask so that it's not possible for out of bounds accesses to
+// sneak in. The iree_vm_registers_t struct is often kept in cache and the
+// masking is cheap relative to any other validation we could be performing.
+//
+// Alternative register widths
+// ---------------------------
+// Registers in the VM are just a blob of memory and not physical device
+// registers. They have a natural width of 32-bits as that covers a majority of
+// our usage for i32/f32 but can be accessed at larger widths such as 64-bits or
+// more for vector operations. The base of each frame's register memory is
+// 16-byte aligned and accessing any individual register as a 32-bit value is
+// always 4-byte aligned.
+//
+// Supporting other register widths is "free" in that the registers for all
+// widths alias the same register storage memory. This is similar to how
+// physical registers work in x86 where each register can be accessed at
+// different sizes (like EAX/RAX alias and the SIMD registers alias as XMM1 is
+// 128-bit, YMM1 is 256-bit, and ZMM1 is 512-bit but all the same storage).
+//
+// The requirements for doing this is that the base alignment for any register
+// must be a multiple of 4 (due to the native 32-bit storage) AND aligned to the
+// natural size of the register (so 8 bytes for i64, 16 bytes for v128, etc).
+// This alignment can easily be done by masking off the low bits such that we
+// know for any valid `reg` ordinal aligned to 4 bytes `reg/N` will still be
+// within register storage. For example, i64 registers are accessed as `reg&~1`
+// to align to 8 bytes starting at byte 0 of the register storage.
+//
+// Transferring between register types can be done with vm.ext.* and vm.trunc.*
+// ops. For example, vm.trunc.i64.i32 will read an 8 byte register and write a
+// two 4 byte registers (effectively) with hi=0 and lo=the lower 32-bits of the
+// value.
+
+// Pointers to typed register storage.
+typedef struct iree_vm_registers_t {
+ // Ordinal mask defining which ordinal bits are valid. All i32 indexing must
+ // be ANDed with this mask.
+ uint16_t i32_mask;
+ // 16-byte aligned i32 register array.
+ int32_t* i32;
+ // Ordinal mask defining which ordinal bits are valid. All ref indexing must
+ // be ANDed with this mask.
+ uint16_t ref_mask;
+ // Naturally aligned ref register array.
+ iree_vm_ref_t* ref;
+} iree_vm_registers_t;
+
+// Storage associated with each stack frame of a bytecode function.
+// NOTE: we cannot store pointers to the stack in here as the stack may be
+// reallocated.
+typedef struct iree_vm_bytecode_frame_storage_t {
+ // Pointer to a register list within the stack frame where return registers
+ // will be stored by callees upon return.
+ const iree_vm_register_list_t* return_registers;
+
+ // Counts of each register type rounded up to the next power of two.
+ iree_host_size_t i32_register_count;
+ iree_host_size_t ref_register_count;
+
+ // Relative byte offsets from the head of this struct.
+ iree_host_size_t i32_register_offset;
+ iree_host_size_t ref_register_offset;
+} iree_vm_bytecode_frame_storage_t;
+
+// Interleaved src-dst register sets for branch register remapping.
+// This structure is an overlay for the bytecode that is serialized in a
+// matching format.
+typedef struct iree_vm_register_remap_list_t {
+ uint16_t size;
+ struct pair {
+ uint16_t src_reg;
+ uint16_t dst_reg;
+ } pairs[];
+} iree_vm_register_remap_list_t;
+static_assert(iree_alignof(iree_vm_register_remap_list_t) == 2,
+ "Expecting byte alignment (to avoid padding)");
+static_assert(offsetof(iree_vm_register_remap_list_t, pairs) == 2,
+ "Expect no padding in the struct");
+
+// Maps a type ID to a type def with clamping for out of bounds values.
+static inline const iree_vm_type_def_t* iree_vm_map_type(
+ iree_vm_bytecode_module_t* module, int32_t type_id) {
+ type_id = type_id >= module->type_count ? 0 : type_id;
+ return &module->type_table[type_id];
+}
+
+//===----------------------------------------------------------------------===//
+// Debugging utilities
+//===----------------------------------------------------------------------===//
+
+#if IREE_VM_EXECUTION_TRACING_FORCE_ENABLE
+#define IREE_IS_DISPATCH_TRACING_ENABLED() true
+#else
+#define IREE_IS_DISPATCH_TRACING_ENABLED() \
+ !!(iree_vm_stack_invocation_flags(stack) & \
+ IREE_VM_INVOCATION_FLAG_TRACE_EXECUTION)
+#endif // IREE_VM_EXECUTION_TRACING_FORCE_ENABLE
+
+#if IREE_VM_EXECUTION_TRACING_ENABLE
+#define IREE_DISPATCH_TRACE_INSTRUCTION(pc_offset, op_name) \
+ if (IREE_IS_DISPATCH_TRACING_ENABLED()) { \
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_trace_disasm( \
+ current_frame, (pc - (pc_offset)), ®s, stderr)); \
+ }
+
+#else
+#define IREE_DISPATCH_TRACE_INSTRUCTION(...)
+#endif // IREE_VM_EXECUTION_TRACING_ENABLE
+
+#if defined(IREE_COMPILER_MSVC) && !defined(IREE_COMPILER_CLANG)
+#define IREE_DISPATCH_MODE_SWITCH 1
+#else
+#define IREE_DISPATCH_MODE_COMPUTED_GOTO 1
+#endif // MSVC
+
+#ifndef NDEBUG
+#define VMCHECK(expr) assert(expr)
+#else
+#define VMCHECK(expr)
+#endif // NDEBUG
+
+//===----------------------------------------------------------------------===//
+// Bytecode data reading with little-/big-endian support
+//===----------------------------------------------------------------------===//
+
+static const int kRegSize = sizeof(uint16_t);
+
+// Bytecode data access macros for reading values of a given type from a byte
+// offset within the current function.
+#define OP_I8(i) iree_unaligned_load_le((uint8_t*)&bytecode_data[pc + (i)])
+#define OP_I16(i) iree_unaligned_load_le((uint16_t*)&bytecode_data[pc + (i)])
+#define OP_I32(i) iree_unaligned_load_le((uint32_t*)&bytecode_data[pc + (i)])
+#define OP_I64(i) iree_unaligned_load_le((uint64_t*)&bytecode_data[pc + (i)])
+#define OP_F32(i) iree_unaligned_load_le((float*)&bytecode_data[pc + (i)])
+#define OP_F64(i) iree_unaligned_load_le((double*)&bytecode_data[pc + (i)])
+
+//===----------------------------------------------------------------------===//
+// Utilities matching the tablegen op encoding scheme
+//===----------------------------------------------------------------------===//
+// These utilities match the VM_Enc* statements in VMBase.td 1:1, allowing us
+// to have the inverse of the encoding which make things easier to read.
+//
+// Each macro will increment the pc by the number of bytes read and as such must
+// be called in the same order the values are encoded.
+
+#define VM_AlignPC(pc, alignment) \
+ (pc) = ((pc) + ((alignment)-1)) & ~((alignment)-1)
+
+#define VM_DecConstI8(name) \
+ OP_I8(0); \
+ ++pc;
+#define VM_DecConstI32(name) \
+ OP_I32(0); \
+ pc += 4;
+#define VM_DecConstI64(name) \
+ OP_I64(0); \
+ pc += 8;
+#define VM_DecConstF32(name) \
+ OP_F32(0); \
+ pc += 4;
+#define VM_DecConstF64(name) \
+ OP_F64(0); \
+ pc += 8;
+#define VM_DecOpcode(opcode) VM_DecConstI8(#opcode)
+#define VM_DecFuncAttr(name) VM_DecConstI32(name)
+#define VM_DecGlobalAttr(name) VM_DecConstI32(name)
+#define VM_DecRodataAttr(name) VM_DecConstI32(name)
+#define VM_DecType(name) \
+ iree_vm_map_type(module, OP_I32(0)); \
+ pc += 4;
+#define VM_DecTypeOf(name) VM_DecType(name)
+#define VM_DecIntAttr32(name) VM_DecConstI32(name)
+#define VM_DecIntAttr64(name) VM_DecConstI64(name)
+#define VM_DecFloatAttr32(name) VM_DecConstF32(name)
+#define VM_DecFloatAttr64(name) VM_DecConstF64(name)
+#define VM_DecStrAttr(name, out_str) \
+ (out_str)->size = (iree_host_size_t)OP_I16(0); \
+ (out_str)->data = (const char*)&bytecode_data[pc + 2]; \
+ pc += 2 + (out_str)->size;
+#define VM_DecBranchTarget(block_name) VM_DecConstI32(name)
+#define VM_DecBranchOperands(operands_name) \
+ VM_DecBranchOperandsImpl(bytecode_data, &pc)
+static inline const iree_vm_register_remap_list_t* VM_DecBranchOperandsImpl(
+ const uint8_t* IREE_RESTRICT bytecode_data, iree_vm_source_offset_t* pc) {
+ VM_AlignPC(*pc, kRegSize);
+ const iree_vm_register_remap_list_t* list =
+ (const iree_vm_register_remap_list_t*)&bytecode_data[*pc];
+ *pc = *pc + kRegSize + list->size * 2 * kRegSize;
+ return list;
+}
+#define VM_DecOperandRegI32(name) \
+ regs.i32[OP_I16(0) & regs.i32_mask]; \
+ pc += kRegSize;
+#define VM_DecOperandRegI64(name) \
+ *((int64_t*)®s.i32[OP_I16(0) & (regs.i32_mask & ~1)]); \
+ pc += kRegSize;
+#define VM_DecOperandRegF32(name) \
+ *((float*)®s.i32[OP_I16(0) & regs.i32_mask]); \
+ pc += kRegSize;
+#define VM_DecOperandRegF64(name) \
+ *((double*)®s.i32[OP_I16(0) & (regs.i32_mask & ~1)]); \
+ pc += kRegSize;
+#define VM_DecOperandRegRef(name, out_is_move) \
+ ®s.ref[OP_I16(0) & regs.ref_mask]; \
+ *(out_is_move) = 0; /*= OP_I16(0) & IREE_REF_REGISTER_MOVE_BIT;*/ \
+ pc += kRegSize;
+#define VM_DecVariadicOperands(name) \
+ VM_DecVariadicOperandsImpl(bytecode_data, &pc)
+static inline const iree_vm_register_list_t* VM_DecVariadicOperandsImpl(
+ const uint8_t* IREE_RESTRICT bytecode_data, iree_vm_source_offset_t* pc) {
+ VM_AlignPC(*pc, kRegSize);
+ const iree_vm_register_list_t* list =
+ (const iree_vm_register_list_t*)&bytecode_data[*pc];
+ *pc = *pc + kRegSize + list->size * kRegSize;
+ return list;
+}
+#define VM_DecResultRegI32(name) \
+ ®s.i32[OP_I16(0) & regs.i32_mask]; \
+ pc += kRegSize;
+#define VM_DecResultRegI64(name) \
+ ((int64_t*)®s.i32[OP_I16(0) & (regs.i32_mask & ~1)]); \
+ pc += kRegSize;
+#define VM_DecResultRegF32(name) \
+ ((float*)®s.i32[OP_I16(0) & regs.i32_mask]); \
+ pc += kRegSize;
+#define VM_DecResultRegF64(name) \
+ ((double*)®s.i32[OP_I16(0) & (regs.i32_mask & ~1)]); \
+ pc += kRegSize;
+#define VM_DecResultRegRef(name, out_is_move) \
+ ®s.ref[OP_I16(0) & regs.ref_mask]; \
+ *(out_is_move) = 0; /*= OP_I16(0) & IREE_REF_REGISTER_MOVE_BIT;*/ \
+ pc += kRegSize;
+#define VM_DecVariadicResults(name) VM_DecVariadicOperands(name)
+
+//===----------------------------------------------------------------------===//
+// Dispatch table structure
+//===----------------------------------------------------------------------===//
+// We support both computed goto (gcc/clang) and switch-based dispatch. Computed
+// goto is preferred when available as it has the most efficient codegen. MSVC
+// doesn't support it, though, and there may be other targets (like wasm) that
+// can only handle the switch-based approach.
+
+// Bytecode data -offset used when looking for the start of the currently
+// dispatched instruction: `instruction_start = pc - OFFSET`
+#define VM_PC_OFFSET_CORE 1
+#define VM_PC_OFFSET_EXT_I32 2
+#define VM_PC_OFFSET_EXT_I64 2
+#define VM_PC_OFFSET_EXT_F32 2
+#define VM_PC_OFFSET_EXT_F64 2
+
+#if defined(IREE_DISPATCH_MODE_COMPUTED_GOTO)
+
+// Dispatch table mapping 1:1 with bytecode ops.
+// Each entry is a label within this function that can be used for computed
+// goto. You can find more information on computed goto here:
+// https://eli.thegreenplace.net/2012/07/12/computed-goto-for-efficient-dispatch-tables
+//
+// Note that we ensure the table is 256 elements long exactly to make sure
+// that unused opcodes are handled gracefully.
+//
+// Computed gotos are pretty much the best way to dispatch interpreters but are
+// not part of the C standard; GCC and clang support them but MSVC does not.
+// Because the performance difference is significant we support both here but
+// prefer the computed goto path where available. Empirical data shows them to
+// still be a win in 2019 on x64 desktops and arm32/arm64 mobile devices.
+#define BEGIN_DISPATCH_CORE() \
+ goto* kDispatchTable_CORE[bytecode_data[pc++]]; \
+ while (1)
+#define END_DISPATCH_CORE()
+
+#define DECLARE_DISPATCH_CORE_OPC(ordinal, name) &&_dispatch_CORE_##name,
+#define DECLARE_DISPATCH_CORE_RSV(ordinal) &&_dispatch_unhandled,
+#define DEFINE_DISPATCH_TABLE_CORE() \
+ static const void* kDispatchTable_CORE[256] = {IREE_VM_OP_CORE_TABLE( \
+ DECLARE_DISPATCH_CORE_OPC, DECLARE_DISPATCH_CORE_RSV)};
+
+#define DECLARE_DISPATCH_EXT_RSV(ordinal) &&_dispatch_unhandled,
+#if IREE_VM_EXT_I64_ENABLE
+#define DECLARE_DISPATCH_EXT_I64_OPC(ordinal, name) &&_dispatch_EXT_I64_##name,
+#define DEFINE_DISPATCH_TABLE_EXT_I64() \
+ static const void* kDispatchTable_EXT_I64[256] = {IREE_VM_OP_EXT_I64_TABLE( \
+ DECLARE_DISPATCH_EXT_I64_OPC, DECLARE_DISPATCH_EXT_RSV)};
+#else
+#define DEFINE_DISPATCH_TABLE_EXT_I64()
+#endif // IREE_VM_EXT_I64_ENABLE
+#if IREE_VM_EXT_F32_ENABLE
+#define DECLARE_DISPATCH_EXT_F32_OPC(ordinal, name) &&_dispatch_EXT_F32_##name,
+#define DEFINE_DISPATCH_TABLE_EXT_F32() \
+ static const void* kDispatchTable_EXT_F32[256] = {IREE_VM_OP_EXT_F32_TABLE( \
+ DECLARE_DISPATCH_EXT_F32_OPC, DECLARE_DISPATCH_EXT_RSV)};
+#else
+#define DEFINE_DISPATCH_TABLE_EXT_F32()
+#endif // IREE_VM_EXT_I64_ENABLE
+#if IREE_VM_EXT_F64_ENABLE
+#define DECLARE_DISPATCH_EXT_F64_OPC(ordinal, name) &&_dispatch_EXT_F64_##name,
+#define DEFINE_DISPATCH_TABLE_EXT_F64() \
+ static const void* kDispatchTable_EXT_F64[256] = {IREE_VM_OP_EXT_F64_TABLE( \
+ DECLARE_DISPATCH_EXT_F64_OPC, DECLARE_DISPATCH_EXT_RSV)};
+#else
+#define DEFINE_DISPATCH_TABLE_EXT_F64()
+#endif // IREE_VM_EXT_I64_ENABLE
+
+#define DEFINE_DISPATCH_TABLES() \
+ DEFINE_DISPATCH_TABLE_CORE(); \
+ DEFINE_DISPATCH_TABLE_EXT_I64(); \
+ DEFINE_DISPATCH_TABLE_EXT_F32(); \
+ DEFINE_DISPATCH_TABLE_EXT_F64();
+
+#define DISPATCH_UNHANDLED_CORE() \
+ _dispatch_unhandled : { \
+ VMCHECK(0); \
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED, "unhandled opcode"); \
+ }
+#define UNHANDLED_DISPATCH_PREFIX(op_name, ext) \
+ _dispatch_CORE_##op_name : { \
+ VMCHECK(0); \
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED, \
+ "unhandled dispatch extension " #ext); \
+ }
+
+#define DISPATCH_OP(ext, op_name, body) \
+ _dispatch_##ext##_##op_name:; \
+ IREE_DISPATCH_TRACE_INSTRUCTION(VM_PC_OFFSET_##ext, #op_name); \
+ body; \
+ goto* kDispatchTable_CORE[bytecode_data[pc++]];
+
+#define BEGIN_DISPATCH_PREFIX(op_name, ext) \
+ _dispatch_CORE_##op_name : goto* kDispatchTable_##ext[bytecode_data[pc++]]; \
+ while (1)
+#define END_DISPATCH_PREFIX() goto* kDispatchTable_CORE[bytecode_data[pc++]];
+
+#else
+
+// Switch-based dispatch. This is strictly less efficient than the computed
+// goto approach above but is universally supported.
+
+#define BEGIN_DISPATCH_CORE() \
+ while (1) { \
+ switch (bytecode_data[pc++])
+#define END_DISPATCH_CORE() }
+
+#define DEFINE_DISPATCH_TABLES()
+
+#define DISPATCH_UNHANDLED_CORE() \
+ default: { \
+ VMCHECK(0); \
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED, \
+ "unhandled core opcode"); \
+ }
+#define UNHANDLED_DISPATCH_PREFIX(op_name, ext) \
+ case IREE_VM_OP_CORE_##op_name: { \
+ VMCHECK(0); \
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED, \
+ "unhandled dispatch extension " #ext); \
+ }
+
+#define DISPATCH_OP(ext, op_name, body) \
+ case IREE_VM_OP_##ext##_##op_name: { \
+ IREE_DISPATCH_TRACE_INSTRUCTION(VM_PC_OFFSET_##ext, #op_name); \
+ body; \
+ } break;
+
+#define BEGIN_DISPATCH_PREFIX(op_name, ext) \
+ case IREE_VM_OP_CORE_##op_name: { \
+ switch (bytecode_data[pc++])
+#define END_DISPATCH_PREFIX() \
+ break; \
+ }
+
+#endif // IREE_DISPATCH_MODE_COMPUTED_GOTO
+
+// Common dispatch op macros
+
+#define DISPATCH_OP_CORE_UNARY_I32(op_name, op_func) \
+ DISPATCH_OP(CORE, op_name, { \
+ int32_t operand = VM_DecOperandRegI32("operand"); \
+ int32_t* result = VM_DecResultRegI32("result"); \
+ *result = op_func(operand); \
+ });
+
+#define DISPATCH_OP_CORE_BINARY_I32(op_name, op_func) \
+ DISPATCH_OP(CORE, op_name, { \
+ int32_t lhs = VM_DecOperandRegI32("lhs"); \
+ int32_t rhs = VM_DecOperandRegI32("rhs"); \
+ int32_t* result = VM_DecResultRegI32("result"); \
+ *result = op_func(lhs, rhs); \
+ });
+
+#define DISPATCH_OP_CORE_TERNARY_I32(op_name, op_func) \
+ DISPATCH_OP(CORE, op_name, { \
+ int32_t a = VM_DecOperandRegI32("a"); \
+ int32_t b = VM_DecOperandRegI32("b"); \
+ int32_t c = VM_DecOperandRegI32("c"); \
+ int32_t* result = VM_DecResultRegI32("result"); \
+ *result = op_func(a, b, c); \
+ });
+
+#define DISPATCH_OP_EXT_I64_UNARY_I64(op_name, op_func) \
+ DISPATCH_OP(EXT_I64, op_name, { \
+ int64_t operand = VM_DecOperandRegI64("operand"); \
+ int64_t* result = VM_DecResultRegI64("result"); \
+ *result = op_func(operand); \
+ });
+
+#define DISPATCH_OP_EXT_I64_BINARY_I64(op_name, op_func) \
+ DISPATCH_OP(EXT_I64, op_name, { \
+ int64_t lhs = VM_DecOperandRegI64("lhs"); \
+ int64_t rhs = VM_DecOperandRegI64("rhs"); \
+ int64_t* result = VM_DecResultRegI64("result"); \
+ *result = op_func(lhs, rhs); \
+ });
+
+#define DISPATCH_OP_EXT_I64_TERNARY_I64(op_name, op_func) \
+ DISPATCH_OP(EXT_I64, op_name, { \
+ int64_t a = VM_DecOperandRegI64("a"); \
+ int64_t b = VM_DecOperandRegI64("b"); \
+ int64_t c = VM_DecOperandRegI64("c"); \
+ int64_t* result = VM_DecResultRegI64("result"); \
+ *result = op_func(a, b, c); \
+ });
+
+#define DISPATCH_OP_EXT_F32_UNARY_F32(op_name, op_func) \
+ DISPATCH_OP(EXT_F32, op_name, { \
+ float operand = VM_DecOperandRegF32("operand"); \
+ float* result = VM_DecResultRegF32("result"); \
+ *result = op_func(operand); \
+ });
+
+#define DISPATCH_OP_EXT_F32_BINARY_F32(op_name, op_func) \
+ DISPATCH_OP(EXT_F32, op_name, { \
+ float lhs = VM_DecOperandRegF32("lhs"); \
+ float rhs = VM_DecOperandRegF32("rhs"); \
+ float* result = VM_DecResultRegF32("result"); \
+ *result = op_func(lhs, rhs); \
+ });
+
+#define DISPATCH_OP_EXT_F32_TERNARY_F32(op_name, op_func) \
+ DISPATCH_OP(EXT_F32, op_name, { \
+ float a = VM_DecOperandRegF32("a"); \
+ float b = VM_DecOperandRegF32("b"); \
+ float c = VM_DecOperandRegF32("c"); \
+ float* result = VM_DecResultRegF32("result"); \
+ *result = op_func(a, b, c); \
+ });
+
+#define DISPATCH_OP_EXT_F64_UNARY_F64(op_name, op_func) \
+ DISPATCH_OP(EXT_F64, op_name, { \
+ double operand = VM_DecOperandRegF64("operand"); \
+ double* result = VM_DecResultRegF64("result"); \
+ *result = op_func(operand); \
+ });
+
+#define DISPATCH_OP_EXT_F64_BINARY_F64(op_name, op_func) \
+ DISPATCH_OP(EXT_F64, op_name, { \
+ double lhs = VM_DecOperandRegF64("lhs"); \
+ double rhs = VM_DecOperandRegF64("rhs"); \
+ double* result = VM_DecResultRegF64("result"); \
+ *result = op_func(lhs, rhs); \
+ });
+
+#define DISPATCH_OP_EXT_F64_TERNARY_F64(op_name, op_func) \
+ DISPATCH_OP(EXT_F64, op_name, { \
+ double a = VM_DecOperandRegF64("a"); \
+ double b = VM_DecOperandRegF64("b"); \
+ double c = VM_DecOperandRegF64("c"); \
+ double* result = VM_DecResultRegF64("result"); \
+ *result = op_func(a, b, c); \
+ });
+
+#endif // IREE_VM_BYTECODE_DISPATCH_UTIL_H_
diff --git a/runtime/src/iree/vm/bytecode_module.c b/runtime/src/iree/vm/bytecode_module.c
new file mode 100644
index 0000000..192464d
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode_module.c
@@ -0,0 +1,941 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/bytecode_module.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/vm/api.h"
+#include "iree/vm/bytecode_module_impl.h"
+
+// Perform an strcmp between a flatbuffers string and an IREE string view.
+static bool iree_vm_flatbuffer_strcmp(flatbuffers_string_t lhs,
+ iree_string_view_t rhs) {
+ size_t lhs_size = flatbuffers_string_len(lhs);
+ int x = strncmp(lhs, rhs.data, lhs_size < rhs.size ? lhs_size : rhs.size);
+ return x != 0 ? x : lhs_size < rhs.size ? -1 : lhs_size > rhs.size;
+}
+
+// Resolves a type through either builtin rules or the ref registered types.
+static bool iree_vm_bytecode_module_resolve_type(
+ iree_vm_TypeDef_table_t type_def, iree_vm_type_def_t* out_type) {
+ memset(out_type, 0, sizeof(*out_type));
+ flatbuffers_string_t full_name = iree_vm_TypeDef_full_name(type_def);
+ if (!flatbuffers_string_len(full_name)) {
+ return false;
+ } else if (iree_vm_flatbuffer_strcmp(full_name,
+ iree_make_cstring_view("i8")) == 0) {
+ out_type->value_type = IREE_VM_VALUE_TYPE_I8;
+ return true;
+ } else if (iree_vm_flatbuffer_strcmp(full_name,
+ iree_make_cstring_view("i16")) == 0) {
+ out_type->value_type = IREE_VM_VALUE_TYPE_I16;
+ return true;
+ } else if (iree_vm_flatbuffer_strcmp(full_name,
+ iree_make_cstring_view("i32")) == 0) {
+ out_type->value_type = IREE_VM_VALUE_TYPE_I32;
+ return true;
+ } else if (iree_vm_flatbuffer_strcmp(full_name,
+ iree_make_cstring_view("i64")) == 0) {
+ out_type->value_type = IREE_VM_VALUE_TYPE_I64;
+ return true;
+ } else if (iree_vm_flatbuffer_strcmp(full_name,
+ iree_make_cstring_view("f32")) == 0) {
+ out_type->value_type = IREE_VM_VALUE_TYPE_F32;
+ return true;
+ } else if (iree_vm_flatbuffer_strcmp(full_name,
+ iree_make_cstring_view("f64")) == 0) {
+ out_type->value_type = IREE_VM_VALUE_TYPE_F64;
+ return true;
+ } else if (iree_vm_flatbuffer_strcmp(
+ full_name, iree_make_cstring_view("!vm.opaque")) == 0) {
+ out_type->value_type = IREE_VM_VALUE_TYPE_NONE;
+ out_type->ref_type = IREE_VM_REF_TYPE_NULL;
+ return true;
+ } else if (full_name[0] == '!') {
+ // Note that we drop the ! prefix:
+ iree_string_view_t type_name = {full_name + 1,
+ flatbuffers_string_len(full_name) - 1};
+ if (iree_string_view_starts_with(type_name,
+ iree_make_cstring_view("vm.list"))) {
+ // This is a !vm.list<...> type. We don't actually care about the type as
+ // we allow list types to be widened. Rewrite to just vm.list as that's
+ // all we have registered.
+ type_name = iree_make_cstring_view("vm.list");
+ }
+ const iree_vm_ref_type_descriptor_t* type_descriptor =
+ iree_vm_ref_lookup_registered_type(type_name);
+ if (type_descriptor) {
+ out_type->ref_type = type_descriptor->type;
+ }
+ return true;
+ }
+ return false;
+}
+
+// Resolves all types through either builtin rules or the ref registered types.
+// |type_table| can be omitted to just perform verification that all types are
+// registered.
+static iree_status_t iree_vm_bytecode_module_resolve_types(
+ iree_vm_TypeDef_vec_t type_defs, iree_vm_type_def_t* type_table) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = iree_ok_status();
+ for (size_t i = 0; i < iree_vm_TypeDef_vec_len(type_defs); ++i) {
+ iree_vm_TypeDef_table_t type_def = iree_vm_TypeDef_vec_at(type_defs, i);
+ if (!iree_vm_bytecode_module_resolve_type(type_def, &type_table[i])) {
+ status = iree_make_status(IREE_STATUS_NOT_FOUND,
+ "no type registered with name '%s'",
+ iree_vm_TypeDef_full_name(type_def));
+ break;
+ }
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// Verifies the structure of the flatbuffer so that we can avoid doing so during
+// runtime. There are still some conditions we must be aware of (such as omitted
+// names on functions with internal linkage), however we shouldn't need to
+// bounds check anything within the flatbuffer after this succeeds.
+static iree_status_t iree_vm_bytecode_module_flatbuffer_verify(
+ iree_const_byte_span_t flatbuffer_data) {
+ if (!flatbuffer_data.data || flatbuffer_data.data_length < 16) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "flatbuffer data is not present or less than 16 bytes (%zu total)",
+ flatbuffer_data.data_length);
+ }
+
+ // Run flatcc generated verification. This ensures all pointers are in-bounds
+ // and that we can safely walk the file, but not that the actual contents of
+ // the flatbuffer meet our expectations.
+ int verify_ret = iree_vm_BytecodeModuleDef_verify_as_root(
+ flatbuffer_data.data, flatbuffer_data.data_length);
+ if (verify_ret != flatcc_verify_ok) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "flatbuffer verification failed: %s",
+ flatcc_verify_error_string(verify_ret));
+ }
+
+ iree_vm_BytecodeModuleDef_table_t module_def =
+ iree_vm_BytecodeModuleDef_as_root(flatbuffer_data.data);
+
+ flatbuffers_string_t name = iree_vm_BytecodeModuleDef_name(module_def);
+ if (!flatbuffers_string_len(name)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "module missing name field");
+ }
+
+ iree_vm_TypeDef_vec_t types = iree_vm_BytecodeModuleDef_types(module_def);
+ for (size_t i = 0; i < iree_vm_TypeDef_vec_len(types); ++i) {
+ iree_vm_TypeDef_table_t type_def = iree_vm_TypeDef_vec_at(types, i);
+ if (!type_def) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "types[%zu] missing body", i);
+ }
+ flatbuffers_string_t full_name = iree_vm_TypeDef_full_name(type_def);
+ if (flatbuffers_string_len(full_name) <= 0) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "types[%zu] missing name", i);
+ }
+ }
+
+ iree_vm_ImportFunctionDef_vec_t imported_functions =
+ iree_vm_BytecodeModuleDef_imported_functions(module_def);
+ iree_vm_ExportFunctionDef_vec_t exported_functions =
+ iree_vm_BytecodeModuleDef_exported_functions(module_def);
+ iree_vm_FunctionDescriptor_vec_t function_descriptors =
+ iree_vm_BytecodeModuleDef_function_descriptors(module_def);
+
+ for (size_t i = 0; i < iree_vm_ImportFunctionDef_vec_len(imported_functions);
+ ++i) {
+ iree_vm_ImportFunctionDef_table_t import_def =
+ iree_vm_ImportFunctionDef_vec_at(imported_functions, i);
+ if (!import_def) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "imports[%zu] missing body", i);
+ }
+ flatbuffers_string_t full_name =
+ iree_vm_ImportFunctionDef_full_name(import_def);
+ if (!flatbuffers_string_len(full_name)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "imports[%zu] missing full_name", i);
+ }
+ }
+
+ for (size_t i = 0; i < iree_vm_ExportFunctionDef_vec_len(exported_functions);
+ ++i) {
+ iree_vm_ExportFunctionDef_table_t export_def =
+ iree_vm_ExportFunctionDef_vec_at(exported_functions, i);
+ if (!export_def) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "exports[%zu] missing body", i);
+ }
+ flatbuffers_string_t local_name =
+ iree_vm_ExportFunctionDef_local_name(export_def);
+ if (!flatbuffers_string_len(local_name)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "exports[%zu] missing local_name", i);
+ }
+ iree_host_size_t internal_ordinal =
+ iree_vm_ExportFunctionDef_internal_ordinal(export_def);
+ if (internal_ordinal >=
+ iree_vm_FunctionDescriptor_vec_len(function_descriptors)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "exports[%zu] internal_ordinal out of bounds (0 < %zu < %zu)", i,
+ internal_ordinal,
+ iree_vm_FunctionDescriptor_vec_len(function_descriptors));
+ }
+ }
+
+ flatbuffers_uint8_vec_t bytecode_data =
+ iree_vm_BytecodeModuleDef_bytecode_data(module_def);
+ for (size_t i = 0;
+ i < iree_vm_FunctionDescriptor_vec_len(function_descriptors); ++i) {
+ iree_vm_FunctionDescriptor_struct_t function_descriptor =
+ iree_vm_FunctionDescriptor_vec_at(function_descriptors, i);
+ if (function_descriptor->bytecode_offset < 0 ||
+ function_descriptor->bytecode_offset +
+ function_descriptor->bytecode_length >
+ flatbuffers_uint8_vec_len(bytecode_data)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "functions[%zu] descriptor bytecode span out of range (0 < %d < %zu)",
+ i, function_descriptor->bytecode_offset,
+ flatbuffers_uint8_vec_len(bytecode_data));
+ }
+ if (function_descriptor->i32_register_count > IREE_I32_REGISTER_COUNT ||
+ function_descriptor->ref_register_count > IREE_REF_REGISTER_COUNT) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "functions[%zu] descriptor register count out of range", i);
+ }
+
+ // TODO(benvanik): run bytecode verifier on contents.
+ }
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_vm_bytecode_map_internal_ordinal(
+ iree_vm_bytecode_module_t* module, iree_vm_function_t function,
+ uint16_t* out_ordinal,
+ iree_vm_FunctionSignatureDef_table_t* out_signature_def) {
+ *out_ordinal = 0;
+ if (out_signature_def) *out_signature_def = NULL;
+
+ uint16_t ordinal = function.ordinal;
+ iree_vm_FunctionSignatureDef_table_t signature_def = NULL;
+ if (function.linkage == IREE_VM_FUNCTION_LINKAGE_EXPORT) {
+ // Look up the internal ordinal index of this export in the function table.
+ iree_vm_ExportFunctionDef_vec_t exported_functions =
+ iree_vm_BytecodeModuleDef_exported_functions(module->def);
+ IREE_ASSERT_LT(ordinal,
+ iree_vm_ExportFunctionDef_vec_len(exported_functions),
+ "export ordinal out of range (0 < %zu < %zu)", ordinal,
+ iree_vm_ExportFunctionDef_vec_len(exported_functions));
+ iree_vm_ExportFunctionDef_table_t function_def =
+ iree_vm_ExportFunctionDef_vec_at(exported_functions, function.ordinal);
+ ordinal = iree_vm_ExportFunctionDef_internal_ordinal(function_def);
+ signature_def = iree_vm_ExportFunctionDef_signature(function_def);
+ } else {
+ // TODO(benvanik): support querying the internal functions, which could be
+ // useful for debugging. Or maybe we just drop them forever?
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "cannot map imported/internal functions; no entry "
+ "in the function table");
+ }
+
+ if (ordinal >= module->function_descriptor_count) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "function ordinal out of range (0 < %u < %zu)",
+ function.ordinal,
+ module->function_descriptor_count);
+ }
+
+ *out_ordinal = ordinal;
+ if (out_signature_def) *out_signature_def = signature_def;
+ return iree_ok_status();
+}
+
+static void iree_vm_bytecode_module_destroy(void* self) {
+ iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(module->flatbuffer_allocator,
+ (void*)module->flatbuffer_data.data);
+ module->flatbuffer_data = iree_make_const_byte_span(NULL, 0);
+ module->flatbuffer_allocator = iree_allocator_null();
+
+ iree_allocator_free(module->allocator, module);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_string_view_t iree_vm_bytecode_module_name(void* self) {
+ iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
+ flatbuffers_string_t name = iree_vm_BytecodeModuleDef_name(module->def);
+ return iree_make_string_view(name, flatbuffers_string_len(name));
+}
+
+static iree_vm_module_signature_t iree_vm_bytecode_module_signature(
+ void* self) {
+ iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
+ iree_vm_module_signature_t signature;
+ memset(&signature, 0, sizeof(signature));
+ signature.import_function_count = iree_vm_ImportFunctionDef_vec_len(
+ iree_vm_BytecodeModuleDef_imported_functions(module->def));
+ signature.export_function_count = iree_vm_ExportFunctionDef_vec_len(
+ iree_vm_BytecodeModuleDef_exported_functions(module->def));
+ signature.internal_function_count = module->function_descriptor_count;
+ return signature;
+}
+
+static iree_status_t iree_vm_bytecode_module_get_function(
+ void* self, iree_vm_function_linkage_t linkage, iree_host_size_t ordinal,
+ iree_vm_function_t* out_function, iree_string_view_t* out_name,
+ iree_vm_function_signature_t* out_signature) {
+ if (out_function) {
+ memset(out_function, 0, sizeof(*out_function));
+ }
+ if (out_name) {
+ memset(out_name, 0, sizeof(*out_name));
+ }
+ if (out_signature) {
+ memset(out_signature, 0, sizeof(*out_signature));
+ }
+
+ iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
+ flatbuffers_string_t name = NULL;
+ iree_vm_FunctionSignatureDef_table_t signature = NULL;
+ if (linkage == IREE_VM_FUNCTION_LINKAGE_IMPORT ||
+ linkage == IREE_VM_FUNCTION_LINKAGE_IMPORT_OPTIONAL) {
+ iree_vm_ImportFunctionDef_vec_t imported_functions =
+ iree_vm_BytecodeModuleDef_imported_functions(module->def);
+ if (ordinal >= iree_vm_ImportFunctionDef_vec_len(imported_functions)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "import ordinal out of range (0 < %zu < %zu)", ordinal,
+ iree_vm_ImportFunctionDef_vec_len(imported_functions));
+ }
+ iree_vm_ImportFunctionDef_table_t import_def =
+ iree_vm_ImportFunctionDef_vec_at(imported_functions, ordinal);
+ name = iree_vm_ImportFunctionDef_full_name(import_def);
+ signature = iree_vm_ImportFunctionDef_signature(import_def);
+ if (iree_all_bits_set(iree_vm_ImportFunctionDef_flags(import_def),
+ iree_vm_ImportFlagBits_OPTIONAL)) {
+ linkage = IREE_VM_FUNCTION_LINKAGE_IMPORT_OPTIONAL;
+ }
+ } else if (linkage == IREE_VM_FUNCTION_LINKAGE_EXPORT) {
+ iree_vm_ExportFunctionDef_vec_t exported_functions =
+ iree_vm_BytecodeModuleDef_exported_functions(module->def);
+ if (ordinal >= iree_vm_ExportFunctionDef_vec_len(exported_functions)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "export ordinal out of range (0 < %zu < %zu)", ordinal,
+ iree_vm_ExportFunctionDef_vec_len(exported_functions));
+ }
+ iree_vm_ExportFunctionDef_table_t export_def =
+ iree_vm_ExportFunctionDef_vec_at(exported_functions, ordinal);
+ name = iree_vm_ExportFunctionDef_local_name(export_def);
+ signature = iree_vm_ExportFunctionDef_signature(export_def);
+ }
+
+ if (out_function) {
+ out_function->module = &module->interface;
+ out_function->linkage = linkage;
+ out_function->ordinal = (uint16_t)ordinal;
+ }
+ if (out_name && name) {
+ out_name->data = name;
+ out_name->size = flatbuffers_string_len(name);
+ }
+ if (out_signature && signature) {
+ flatbuffers_string_t calling_convention =
+ iree_vm_FunctionSignatureDef_calling_convention(signature);
+ out_signature->calling_convention.data = calling_convention;
+ out_signature->calling_convention.size =
+ flatbuffers_string_len(calling_convention);
+ }
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_vm_bytecode_module_get_function_reflection_attr(
+ void* self, iree_vm_function_linkage_t linkage, iree_host_size_t ordinal,
+ iree_host_size_t index, iree_string_view_t* key,
+ iree_string_view_t* value) {
+ if (linkage != IREE_VM_FUNCTION_LINKAGE_EXPORT) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "only exported functions can be queried");
+ }
+
+ iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
+ iree_vm_ExportFunctionDef_vec_t exported_functions =
+ iree_vm_BytecodeModuleDef_exported_functions(module->def);
+
+ if (ordinal >= iree_vm_ExportFunctionDef_vec_len(exported_functions)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "function ordinal out of range (0 < %zu < %zu)", ordinal,
+ iree_vm_ExportFunctionDef_vec_len(exported_functions));
+ }
+
+ iree_vm_ExportFunctionDef_table_t function_def =
+ iree_vm_ExportFunctionDef_vec_at(exported_functions, ordinal);
+ iree_vm_FunctionSignatureDef_table_t signature_def =
+ iree_vm_ExportFunctionDef_signature(function_def);
+ if (!signature_def) {
+ return iree_make_status(
+ IREE_STATUS_NOT_FOUND,
+ "reflection attribute at index %zu not found; no signature", index);
+ }
+ iree_vm_ReflectionAttrDef_vec_t reflection_attrs =
+ iree_vm_FunctionSignatureDef_reflection_attrs(signature_def);
+ if (!reflection_attrs ||
+ index >= iree_vm_ReflectionAttrDef_vec_len(reflection_attrs)) {
+ return iree_make_status(IREE_STATUS_NOT_FOUND,
+ "reflection attribute at index %zu not found",
+ index);
+ }
+ iree_vm_ReflectionAttrDef_table_t attr =
+ iree_vm_ReflectionAttrDef_vec_at(reflection_attrs, index);
+ flatbuffers_string_t attr_key = iree_vm_ReflectionAttrDef_key(attr);
+ flatbuffers_string_t attr_value = iree_vm_ReflectionAttrDef_value(attr);
+ if (!flatbuffers_string_len(attr_key) ||
+ !flatbuffers_string_len(attr_value)) {
+ // Because reflection metadata should not impose any overhead for the
+ // non reflection case, we do not eagerly validate it on load -- instead
+ // verify it structurally as needed.
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "reflection attribute missing fields");
+ }
+
+ key->data = attr_key;
+ key->size = flatbuffers_string_len(attr_key);
+ value->data = attr_value;
+ value->size = flatbuffers_string_len(attr_value);
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_vm_bytecode_module_lookup_function(
+ void* self, iree_vm_function_linkage_t linkage, iree_string_view_t name,
+ iree_vm_function_t* out_function) {
+ IREE_ASSERT_ARGUMENT(out_function);
+ memset(out_function, 0, sizeof(iree_vm_function_t));
+
+ if (iree_string_view_is_empty(name)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "function name required for query");
+ }
+
+ iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
+ out_function->linkage = linkage;
+ out_function->module = &module->interface;
+
+ // NOTE: we could organize exports alphabetically so we could bsearch.
+ if (linkage == IREE_VM_FUNCTION_LINKAGE_IMPORT ||
+ linkage == IREE_VM_FUNCTION_LINKAGE_IMPORT_OPTIONAL) {
+ iree_vm_ImportFunctionDef_vec_t imported_functions =
+ iree_vm_BytecodeModuleDef_imported_functions(module->def);
+ for (iree_host_size_t ordinal = 0;
+ ordinal < iree_vm_ImportFunctionDef_vec_len(imported_functions);
+ ++ordinal) {
+ iree_vm_ImportFunctionDef_table_t import_def =
+ iree_vm_ImportFunctionDef_vec_at(imported_functions, ordinal);
+ if (iree_vm_flatbuffer_strcmp(
+ iree_vm_ImportFunctionDef_full_name(import_def), name) == 0) {
+ out_function->ordinal = ordinal;
+ if (iree_all_bits_set(iree_vm_ImportFunctionDef_flags(import_def),
+ iree_vm_ImportFlagBits_OPTIONAL)) {
+ out_function->linkage = IREE_VM_FUNCTION_LINKAGE_IMPORT_OPTIONAL;
+ }
+ return iree_ok_status();
+ }
+ }
+ } else if (linkage == IREE_VM_FUNCTION_LINKAGE_EXPORT) {
+ iree_vm_ExportFunctionDef_vec_t exported_functions =
+ iree_vm_BytecodeModuleDef_exported_functions(module->def);
+ for (iree_host_size_t ordinal = 0;
+ ordinal < iree_vm_ExportFunctionDef_vec_len(exported_functions);
+ ++ordinal) {
+ iree_vm_ExportFunctionDef_table_t export_def =
+ iree_vm_ExportFunctionDef_vec_at(exported_functions, ordinal);
+ if (iree_vm_flatbuffer_strcmp(
+ iree_vm_ExportFunctionDef_local_name(export_def), name) == 0) {
+ out_function->ordinal = ordinal;
+ return iree_ok_status();
+ }
+ }
+ }
+
+ return iree_make_status(IREE_STATUS_NOT_FOUND,
+ "function with the given name not found");
+}
+
+static iree_status_t iree_vm_bytecode_location_format(
+ int32_t location_ordinal,
+ iree_vm_LocationTypeDef_union_vec_t location_table,
+ iree_vm_source_location_format_flags_t flags,
+ iree_string_builder_t* builder) {
+ iree_vm_LocationTypeDef_union_t location =
+ iree_vm_LocationTypeDef_union_vec_at(location_table, location_ordinal);
+ switch (location.type) {
+ default:
+ case iree_vm_LocationTypeDef_NONE: {
+ return iree_string_builder_append_cstring(builder, "[unknown]");
+ }
+ case iree_vm_LocationTypeDef_CallSiteLocDef: {
+ // NOTE: MLIR prints caller->callee, but in a stack trace we want the
+ // upside-down callee->caller.
+ iree_vm_CallSiteLocDef_table_t loc =
+ (iree_vm_CallSiteLocDef_table_t)location.value;
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_location_format(
+ iree_vm_CallSiteLocDef_callee(loc), location_table, flags, builder));
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(builder, "\n at "));
+ return iree_vm_bytecode_location_format(
+ iree_vm_CallSiteLocDef_caller(loc), location_table, flags, builder);
+ }
+ case iree_vm_LocationTypeDef_FileLineColLocDef: {
+ iree_vm_FileLineColLocDef_table_t loc =
+ (iree_vm_FileLineColLocDef_table_t)location.value;
+ flatbuffers_string_t filename = iree_vm_FileLineColLocDef_filename(loc);
+ return iree_string_builder_append_format(
+ builder, "%.*s:%d:%d", (int)flatbuffers_string_len(filename),
+ filename, iree_vm_FileLineColLocDef_line(loc),
+ iree_vm_FileLineColLocDef_column(loc));
+ }
+ case iree_vm_LocationTypeDef_FusedLocDef: {
+ iree_vm_FusedLocDef_table_t loc =
+ (iree_vm_FusedLocDef_table_t)location.value;
+ if (iree_vm_FusedLocDef_metadata_is_present(loc)) {
+ flatbuffers_string_t metadata = iree_vm_FusedLocDef_metadata(loc);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ builder, "<%.*s>", (int)flatbuffers_string_len(metadata),
+ metadata));
+ }
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "[\n"));
+ flatbuffers_int32_vec_t child_locs = iree_vm_FusedLocDef_locations(loc);
+ for (size_t i = 0; i < flatbuffers_int32_vec_len(child_locs); ++i) {
+ if (i == 0) {
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(builder, " "));
+ } else {
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(builder, ",\n "));
+ }
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_location_format(
+ flatbuffers_int32_vec_at(child_locs, i), location_table, flags,
+ builder));
+ }
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(builder, "\n ]"));
+ return iree_ok_status();
+ }
+ case iree_vm_LocationTypeDef_NameLocDef: {
+ iree_vm_NameLocDef_table_t loc =
+ (iree_vm_NameLocDef_table_t)location.value;
+ flatbuffers_string_t name = iree_vm_NameLocDef_name(loc);
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ builder, "\"%.*s\"", (int)flatbuffers_string_len(name), name));
+ if (iree_vm_NameLocDef_child_location_is_present(loc)) {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "("));
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_location_format(
+ iree_vm_NameLocDef_child_location(loc), location_table, flags,
+ builder));
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, ")"));
+ }
+ return iree_ok_status();
+ }
+ }
+}
+
+static iree_status_t iree_vm_bytecode_module_source_location_format(
+ void* self, uint64_t data[2], iree_vm_source_location_format_flags_t flags,
+ iree_string_builder_t* builder) {
+ iree_vm_DebugDatabaseDef_table_t debug_database_def =
+ (iree_vm_DebugDatabaseDef_table_t)self;
+ iree_vm_FunctionSourceMapDef_table_t source_map_def =
+ (iree_vm_FunctionSourceMapDef_table_t)data[0];
+ iree_vm_BytecodeLocationDef_vec_t locations =
+ iree_vm_FunctionSourceMapDef_locations(source_map_def);
+ iree_vm_source_offset_t source_offset = (iree_vm_source_offset_t)data[1];
+
+ size_t location_def_ordinal =
+ iree_vm_BytecodeLocationDef_vec_scan_by_bytecode_offset(
+ locations, (int32_t)source_offset);
+ if (location_def_ordinal == -1) {
+ return iree_status_from_code(IREE_STATUS_UNAVAILABLE);
+ }
+ iree_vm_BytecodeLocationDef_struct_t location_def =
+ iree_vm_BytecodeLocationDef_vec_at(locations, location_def_ordinal);
+ if (!location_def) {
+ return iree_status_from_code(IREE_STATUS_UNAVAILABLE);
+ }
+
+ // Print source location stack trace.
+ iree_vm_LocationTypeDef_union_vec_t location_table =
+ iree_vm_DebugDatabaseDef_location_table_union(debug_database_def);
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_location_format(
+ location_def->location, location_table, flags, builder));
+
+ return iree_ok_status();
+}
+
+static iree_status_t iree_vm_bytecode_module_resolve_source_location(
+ void* self, iree_vm_stack_frame_t* frame,
+ iree_vm_source_location_t* out_source_location) {
+ // Get module debug database, if available.
+ iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
+ iree_vm_BytecodeModuleDef_table_t module_def = module->def;
+ iree_vm_DebugDatabaseDef_table_t debug_database_def =
+ iree_vm_BytecodeModuleDef_debug_database(module_def);
+ if (!debug_database_def) {
+ return iree_status_from_code(IREE_STATUS_UNAVAILABLE);
+ }
+
+ // Map the (potentially) export ordinal into the internal function ordinal in
+ // the function descriptor table.
+ uint16_t ordinal;
+ if (frame->function.linkage == IREE_VM_FUNCTION_LINKAGE_INTERNAL) {
+ ordinal = frame->function.ordinal;
+ } else {
+ IREE_RETURN_IF_ERROR(iree_vm_bytecode_map_internal_ordinal(
+ module, frame->function, &ordinal, NULL));
+ }
+
+ // Lookup the source map for the function, if available.
+ iree_vm_FunctionSourceMapDef_vec_t source_maps_vec =
+ iree_vm_DebugDatabaseDef_functions(debug_database_def);
+ iree_vm_FunctionSourceMapDef_table_t source_map_def =
+ ordinal < iree_vm_FunctionSourceMapDef_vec_len(source_maps_vec)
+ ? iree_vm_FunctionSourceMapDef_vec_at(source_maps_vec, ordinal)
+ : NULL;
+ if (!source_map_def) {
+ return iree_status_from_code(IREE_STATUS_UNAVAILABLE);
+ }
+
+ // The source location stores the source map and PC and will perform the
+ // actual lookup within the source map on demand.
+ out_source_location->self = (void*)debug_database_def;
+ out_source_location->data[0] = (uint64_t)source_map_def;
+ out_source_location->data[1] = (uint64_t)frame->pc;
+ out_source_location->format = iree_vm_bytecode_module_source_location_format;
+ return iree_ok_status();
+}
+
+// Lays out the nested tables within a |state| structure.
+// Returns the total size of the structure and all tables with padding applied.
+// |state| may be null if only the structure size is required for allocation.
+static iree_host_size_t iree_vm_bytecode_module_layout_state(
+ iree_vm_BytecodeModuleDef_table_t module_def,
+ iree_vm_bytecode_module_state_t* state) {
+ iree_vm_ModuleStateDef_table_t module_state_def =
+ iree_vm_BytecodeModuleDef_module_state(module_def);
+ iree_host_size_t rwdata_storage_capacity = 0;
+ iree_host_size_t global_ref_count = 0;
+ if (module_state_def) {
+ rwdata_storage_capacity =
+ iree_vm_ModuleStateDef_global_bytes_capacity(module_state_def);
+ global_ref_count =
+ iree_vm_ModuleStateDef_global_ref_count(module_state_def);
+ }
+ iree_host_size_t rodata_ref_count = iree_vm_RodataSegmentDef_vec_len(
+ iree_vm_BytecodeModuleDef_rodata_segments(module_def));
+ iree_host_size_t import_function_count = iree_vm_ImportFunctionDef_vec_len(
+ iree_vm_BytecodeModuleDef_imported_functions(module_def));
+
+ uint8_t* base_ptr = (uint8_t*)state;
+ iree_host_size_t offset =
+ iree_host_align(sizeof(iree_vm_bytecode_module_state_t), 16);
+
+ if (state) {
+ state->rwdata_storage =
+ iree_make_byte_span(base_ptr + offset, rwdata_storage_capacity);
+ }
+ offset += iree_host_align(rwdata_storage_capacity, 16);
+
+ if (state) {
+ state->global_ref_count = global_ref_count;
+ state->global_ref_table = (iree_vm_ref_t*)(base_ptr + offset);
+ }
+ offset += iree_host_align(global_ref_count * sizeof(iree_vm_ref_t), 16);
+
+ if (state) {
+ state->rodata_ref_count = rodata_ref_count;
+ state->rodata_ref_table = (iree_vm_buffer_t*)(base_ptr + offset);
+ }
+ offset += iree_host_align(rodata_ref_count * sizeof(iree_vm_buffer_t), 16);
+
+ if (state) {
+ state->import_count = import_function_count;
+ state->import_table = (iree_vm_bytecode_import_t*)(base_ptr + offset);
+ }
+ offset +=
+ iree_host_align(import_function_count * sizeof(*state->import_table), 16);
+
+ return offset;
+}
+
+static iree_status_t iree_vm_bytecode_module_alloc_state(
+ void* self, iree_allocator_t allocator,
+ iree_vm_module_state_t** out_module_state) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(out_module_state);
+ *out_module_state = NULL;
+
+ iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
+ iree_vm_BytecodeModuleDef_table_t module_def = module->def;
+
+ // Compute the total size required (with padding) for the state structure.
+ iree_host_size_t total_state_struct_size =
+ iree_vm_bytecode_module_layout_state(module_def, NULL);
+
+ // Allocate the storage for the structure and all its nested tables.
+ iree_vm_bytecode_module_state_t* state = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(allocator, total_state_struct_size,
+ (void**)&state));
+ state->allocator = allocator;
+
+ // Perform layout to get the pointers into the storage for each nested table.
+ iree_vm_bytecode_module_layout_state(module_def, state);
+
+ // Setup rodata segments to point directly at the flatbuffer memory.
+ iree_vm_RodataSegmentDef_vec_t rodata_segments =
+ iree_vm_BytecodeModuleDef_rodata_segments(module_def);
+ for (int i = 0; i < state->rodata_ref_count; ++i) {
+ iree_vm_RodataSegmentDef_table_t segment =
+ iree_vm_RodataSegmentDef_vec_at(rodata_segments, i);
+ iree_vm_buffer_t* ref = &state->rodata_ref_table[i];
+ iree_vm_buffer_initialize(
+ IREE_VM_BUFFER_ACCESS_ORIGIN_MODULE,
+ iree_make_byte_span(
+ (uint8_t*)iree_vm_RodataSegmentDef_data(segment),
+ flatbuffers_uint8_vec_len(iree_vm_RodataSegmentDef_data(segment))),
+ iree_allocator_null(), ref);
+ }
+
+ *out_module_state = (iree_vm_module_state_t*)state;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void iree_vm_bytecode_module_free_state(
+ void* self, iree_vm_module_state_t* module_state) {
+ if (!module_state) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_vm_bytecode_module_state_t* state =
+ (iree_vm_bytecode_module_state_t*)module_state;
+
+ // Release remaining global references.
+ for (int i = 0; i < state->global_ref_count; ++i) {
+ iree_vm_ref_release(&state->global_ref_table[i]);
+ }
+
+ // Ensure all rodata references are unused and deinitialized.
+ for (int i = 0; i < state->rodata_ref_count; ++i) {
+ iree_vm_buffer_t* ref = &state->rodata_ref_table[i];
+ iree_vm_buffer_deinitialize(ref);
+ }
+
+ iree_allocator_free(state->allocator, module_state);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_vm_bytecode_module_resolve_import(
+ void* self, iree_vm_module_state_t* module_state, iree_host_size_t ordinal,
+ const iree_vm_function_t* function,
+ const iree_vm_function_signature_t* signature) {
+ IREE_ASSERT_ARGUMENT(module_state);
+ iree_vm_bytecode_module_state_t* state =
+ (iree_vm_bytecode_module_state_t*)module_state;
+ if (ordinal >= state->import_count) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "import ordinal out of range (0 < %zu < %zu)",
+ ordinal, state->import_count);
+ }
+
+ iree_vm_bytecode_import_t* import = &state->import_table[ordinal];
+ import->function = *function;
+
+ // Split up arguments/results into fragments so that we can avoid scanning
+ // during calling.
+ IREE_RETURN_IF_ERROR(iree_vm_function_call_get_cconv_fragments(
+ signature, &import->arguments, &import->results));
+
+ // Precalculate bytes required to marshal argument/results across the ABI
+ // boundary.
+ iree_host_size_t argument_buffer_size = 0;
+ iree_host_size_t result_buffer_size = 0;
+ if (!iree_vm_function_call_is_variadic_cconv(import->arguments)) {
+ // NOTE: variadic types don't support precalculation and the vm.call.import
+ // dispatch code will handle calculating it per-call.
+ IREE_RETURN_IF_ERROR(iree_vm_function_call_compute_cconv_fragment_size(
+ import->arguments, /*segment_size_list=*/NULL, &argument_buffer_size));
+ }
+ IREE_RETURN_IF_ERROR(iree_vm_function_call_compute_cconv_fragment_size(
+ import->results, /*segment_size_list=*/NULL, &result_buffer_size));
+ if (argument_buffer_size > 16 * 1024 || result_buffer_size > 16 * 1024) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "ABI marshaling buffer overflow on import %zu",
+ ordinal);
+ }
+ import->argument_buffer_size = (uint16_t)argument_buffer_size;
+ import->result_buffer_size = (uint16_t)result_buffer_size;
+
+ return iree_ok_status();
+}
+
+static iree_status_t IREE_API_PTR iree_vm_bytecode_module_notify(
+ void* self, iree_vm_module_state_t* module_state, iree_vm_signal_t signal) {
+ return iree_ok_status();
+}
+
+static iree_status_t iree_vm_bytecode_module_begin_call(
+ void* self, iree_vm_stack_t* stack, const iree_vm_function_call_t* call,
+ iree_vm_execution_result_t* out_result) {
+ // NOTE: any work here adds directly to the invocation time. Avoid doing too
+ // much work or touching too many unlikely-to-be-cached structures (such as
+ // walking the FlatBuffer, which may cause page faults).
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(out_result);
+ memset(out_result, 0, sizeof(iree_vm_execution_result_t));
+
+ // Map the (potentially) export ordinal into the internal function ordinal in
+ // the function descriptor table.
+ iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
+ uint16_t ordinal = 0;
+ iree_vm_FunctionSignatureDef_table_t signature_def = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_vm_bytecode_map_internal_ordinal(module, call->function,
+ &ordinal, &signature_def));
+
+ // Grab calling convention string. This is not great as we are guaranteed to
+ // have a bunch of cache misses, but without putting it on the descriptor
+ // (which would duplicate data and slow down normal intra-module calls)
+ // there's not a good way around it. In the grand scheme of things users
+ // should be keeping their calls across this boundary relatively fat (compared
+ // to the real work they do), so this only needs to be fast enough to blend
+ // into the noise. Similar to JNI, P/Invoke, etc you don't want to have
+ // imports that cost less to execute than the marshaling overhead (dozens to
+ // hundreds of instructions).
+ flatbuffers_string_t calling_convention =
+ signature_def
+ ? iree_vm_FunctionSignatureDef_calling_convention(signature_def)
+ : 0;
+ iree_vm_function_signature_t signature;
+ memset(&signature, 0, sizeof(signature));
+ signature.calling_convention.data = calling_convention;
+ signature.calling_convention.size =
+ flatbuffers_string_len(calling_convention);
+ iree_string_view_t cconv_arguments = iree_string_view_empty();
+ iree_string_view_t cconv_results = iree_string_view_empty();
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_vm_function_call_get_cconv_fragments(
+ &signature, &cconv_arguments, &cconv_results));
+
+ // Jump into the dispatch routine to execute bytecode until the function
+ // either returns (synchronous) or yields (asynchronous).
+ iree_status_t status = iree_vm_bytecode_dispatch(
+ stack, module, call, cconv_arguments, cconv_results, out_result);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_bytecode_module_create(
+ iree_const_byte_span_t flatbuffer_data,
+ iree_allocator_t flatbuffer_allocator, iree_allocator_t allocator,
+ iree_vm_module_t** out_module) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(out_module);
+ *out_module = NULL;
+
+ IREE_TRACE_ZONE_BEGIN_NAMED(z1, "iree_vm_bytecode_module_flatbuffer_verify");
+ iree_status_t status =
+ iree_vm_bytecode_module_flatbuffer_verify(flatbuffer_data);
+ if (!iree_status_is_ok(status)) {
+ IREE_TRACE_ZONE_END(z1);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+ IREE_TRACE_ZONE_END(z1);
+
+ iree_vm_BytecodeModuleDef_table_t module_def =
+ iree_vm_BytecodeModuleDef_as_root(flatbuffer_data.data);
+ if (!module_def) {
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "failed getting root from flatbuffer; expected identifier "
+ "'" iree_vm_BytecodeModuleDef_file_identifier "' not found");
+ }
+
+ iree_vm_TypeDef_vec_t type_defs = iree_vm_BytecodeModuleDef_types(module_def);
+ size_t type_table_size =
+ iree_vm_TypeDef_vec_len(type_defs) * sizeof(iree_vm_type_def_t);
+
+ iree_vm_bytecode_module_t* module = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(allocator, sizeof(*module) + type_table_size,
+ (void**)&module));
+ module->allocator = allocator;
+
+ iree_vm_FunctionDescriptor_vec_t function_descriptors =
+ iree_vm_BytecodeModuleDef_function_descriptors(module_def);
+ module->function_descriptor_count =
+ iree_vm_FunctionDescriptor_vec_len(function_descriptors);
+ module->function_descriptor_table = function_descriptors;
+
+ flatbuffers_uint8_vec_t bytecode_data =
+ iree_vm_BytecodeModuleDef_bytecode_data(module_def);
+ module->bytecode_data = iree_make_const_byte_span(
+ bytecode_data, flatbuffers_uint8_vec_len(bytecode_data));
+
+ module->flatbuffer_data = flatbuffer_data;
+ module->flatbuffer_allocator = flatbuffer_allocator;
+ module->def = module_def;
+
+ module->type_count = iree_vm_TypeDef_vec_len(type_defs);
+ iree_status_t resolve_status =
+ iree_vm_bytecode_module_resolve_types(type_defs, module->type_table);
+ if (!iree_status_is_ok(resolve_status)) {
+ iree_allocator_free(allocator, module);
+ IREE_TRACE_ZONE_END(z0);
+ return resolve_status;
+ }
+
+ iree_vm_module_initialize(&module->interface, module);
+ module->interface.destroy = iree_vm_bytecode_module_destroy;
+ module->interface.name = iree_vm_bytecode_module_name;
+ module->interface.signature = iree_vm_bytecode_module_signature;
+ module->interface.get_function = iree_vm_bytecode_module_get_function;
+ module->interface.lookup_function = iree_vm_bytecode_module_lookup_function;
+#if IREE_VM_BACKTRACE_ENABLE
+ module->interface.resolve_source_location =
+ iree_vm_bytecode_module_resolve_source_location;
+#endif // IREE_VM_BACKTRACE_ENABLE
+ module->interface.alloc_state = iree_vm_bytecode_module_alloc_state;
+ module->interface.free_state = iree_vm_bytecode_module_free_state;
+ module->interface.resolve_import = iree_vm_bytecode_module_resolve_import;
+ module->interface.notify = iree_vm_bytecode_module_notify;
+ module->interface.begin_call = iree_vm_bytecode_module_begin_call;
+ module->interface.get_function_reflection_attr =
+ iree_vm_bytecode_module_get_function_reflection_attr;
+
+ *out_module = &module->interface;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
diff --git a/runtime/src/iree/vm/bytecode_module.h b/runtime/src/iree/vm/bytecode_module.h
new file mode 100644
index 0000000..ed7bc04
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode_module.h
@@ -0,0 +1,32 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_BYTECODE_MODULE_H_
+#define IREE_VM_BYTECODE_MODULE_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/vm/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a VM module from an in-memory ModuleDef FlatBuffer.
+// If a |flatbuffer_allocator| is provided then it will be used to free the
+// |flatbuffer_data| when the module is destroyed and otherwise the ownership of
+// the flatbuffer_data remains with the caller.
+IREE_API_EXPORT iree_status_t iree_vm_bytecode_module_create(
+ iree_const_byte_span_t flatbuffer_data,
+ iree_allocator_t flatbuffer_allocator, iree_allocator_t allocator,
+ iree_vm_module_t** out_module);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_VM_BYTECODE_MODULE_H_
diff --git a/runtime/src/iree/vm/bytecode_module_benchmark.cc b/runtime/src/iree/vm/bytecode_module_benchmark.cc
new file mode 100644
index 0000000..9dd7960
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode_module_benchmark.cc
@@ -0,0 +1,348 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <array>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "iree/base/api.h"
+#include "iree/base/logging.h"
+#include "iree/vm/api.h"
+#include "iree/vm/bytecode_module.h"
+#include "iree/vm/bytecode_module_benchmark_module_c.h"
+
+namespace {
+
+struct native_import_module_s;
+struct native_import_module_state_s;
+typedef struct native_import_module_t native_import_module_t;
+typedef struct native_import_module_state_t native_import_module_state_t;
+
+// vm.import @native_import_module.add_1(%arg0 : i32) -> i32
+static iree_status_t native_import_module_add_1(
+ iree_vm_stack_t* stack, const iree_vm_function_call_t* call,
+ iree_vm_native_function_target_t target_fn, void* module,
+ void* module_state, iree_vm_execution_result_t* out_result) {
+ // Add 1 to arg0 and return.
+ int32_t arg0 = *reinterpret_cast<int32_t*>(call->arguments.data);
+ int32_t ret0 = arg0 + 1;
+ *reinterpret_cast<int32_t*>(call->results.data) = ret0;
+ return iree_ok_status();
+}
+
+static const iree_vm_native_export_descriptor_t
+ native_import_module_exports_[] = {
+ {iree_make_cstring_view("add_1"), iree_make_cstring_view("0i_i"), 0,
+ NULL},
+};
+static const iree_vm_native_function_ptr_t native_import_module_funcs_[] = {
+ {(iree_vm_native_function_shim_t)native_import_module_add_1, NULL},
+};
+static_assert(IREE_ARRAYSIZE(native_import_module_funcs_) ==
+ IREE_ARRAYSIZE(native_import_module_exports_),
+ "function pointer table must be 1:1 with exports");
+static const iree_vm_native_module_descriptor_t
+ native_import_module_descriptor_ = {
+ iree_make_cstring_view("native_import_module"),
+ 0,
+ NULL,
+ IREE_ARRAYSIZE(native_import_module_exports_),
+ native_import_module_exports_,
+ IREE_ARRAYSIZE(native_import_module_funcs_),
+ native_import_module_funcs_,
+ 0,
+ NULL,
+};
+
+static iree_status_t native_import_module_create(
+ iree_allocator_t allocator, iree_vm_module_t** out_module) {
+ iree_vm_module_t interface;
+ IREE_RETURN_IF_ERROR(iree_vm_module_initialize(&interface, NULL));
+ return iree_vm_native_module_create(
+ &interface, &native_import_module_descriptor_, allocator, out_module);
+}
+
+// Benchmarks the given exported function, optionally passing in arguments.
+static iree_status_t RunFunction(benchmark::State& state,
+ iree_string_view_t function_name,
+ std::vector<int32_t> i32_args,
+ int result_count, int64_t batch_size = 1) {
+ iree_vm_instance_t* instance = NULL;
+ IREE_CHECK_OK(iree_vm_instance_create(iree_allocator_system(), &instance));
+
+ iree_vm_module_t* import_module = NULL;
+ IREE_CHECK_OK(
+ native_import_module_create(iree_allocator_system(), &import_module));
+
+ const auto* module_file_toc =
+ iree_vm_bytecode_module_benchmark_module_create();
+ iree_vm_module_t* bytecode_module = nullptr;
+ IREE_CHECK_OK(iree_vm_bytecode_module_create(
+ iree_const_byte_span_t{
+ reinterpret_cast<const uint8_t*>(module_file_toc->data),
+ module_file_toc->size},
+ iree_allocator_null(), iree_allocator_system(), &bytecode_module));
+
+ std::array<iree_vm_module_t*, 2> modules = {import_module, bytecode_module};
+ iree_vm_context_t* context = NULL;
+ IREE_CHECK_OK(iree_vm_context_create_with_modules(
+ instance, IREE_VM_CONTEXT_FLAG_NONE, modules.data(), modules.size(),
+ iree_allocator_system(), &context));
+
+ iree_vm_function_t function;
+ IREE_CHECK_OK(
+ iree_vm_context_resolve_function(context, function_name, &function));
+
+ iree_vm_function_call_t call;
+ memset(&call, 0, sizeof(call));
+ call.function = function;
+ call.arguments =
+ iree_make_byte_span(iree_alloca(i32_args.size() * sizeof(int32_t)),
+ i32_args.size() * sizeof(int32_t));
+ call.results =
+ iree_make_byte_span(iree_alloca(result_count * sizeof(int32_t)),
+ result_count * sizeof(int32_t));
+
+ IREE_VM_INLINE_STACK_INITIALIZE(stack, IREE_VM_INVOCATION_FLAG_NONE,
+ iree_vm_context_state_resolver(context),
+ iree_allocator_system());
+ while (state.KeepRunningBatch(batch_size)) {
+ for (iree_host_size_t i = 0; i < i32_args.size(); ++i) {
+ reinterpret_cast<int32_t*>(call.arguments.data)[i] = i32_args[i];
+ }
+
+ iree_vm_execution_result_t result;
+ IREE_CHECK_OK(bytecode_module->begin_call(bytecode_module->self, stack,
+ &call, &result));
+ }
+ iree_vm_stack_deinitialize(stack);
+
+ iree_vm_module_release(import_module);
+ iree_vm_module_release(bytecode_module);
+ iree_vm_context_release(context);
+ iree_vm_instance_release(instance);
+
+ return iree_ok_status();
+}
+
+static void BM_ModuleCreate(benchmark::State& state) {
+ while (state.KeepRunning()) {
+ const auto* module_file_toc =
+ iree_vm_bytecode_module_benchmark_module_create();
+ iree_vm_module_t* module = nullptr;
+ IREE_CHECK_OK(iree_vm_bytecode_module_create(
+ iree_const_byte_span_t{
+ reinterpret_cast<const uint8_t*>(module_file_toc->data),
+ module_file_toc->size},
+ iree_allocator_null(), iree_allocator_system(), &module));
+
+ // Just testing creation and verification here!
+ benchmark::DoNotOptimize(module);
+
+ iree_vm_module_release(module);
+ }
+}
+BENCHMARK(BM_ModuleCreate);
+
+static void BM_ModuleCreateState(benchmark::State& state) {
+ const auto* module_file_toc =
+ iree_vm_bytecode_module_benchmark_module_create();
+ iree_vm_module_t* module = nullptr;
+ IREE_CHECK_OK(iree_vm_bytecode_module_create(
+ iree_const_byte_span_t{
+ reinterpret_cast<const uint8_t*>(module_file_toc->data),
+ module_file_toc->size},
+ iree_allocator_null(), iree_allocator_system(), &module));
+
+ while (state.KeepRunning()) {
+ iree_vm_module_state_t* module_state;
+ module->alloc_state(module->self, iree_allocator_system(), &module_state);
+
+ // Really just testing malloc overhead, though it'll be module-dependent
+ // and if we do anything heavyweight on state init it'll show here.
+ benchmark::DoNotOptimize(module_state);
+
+ module->free_state(module->self, module_state);
+ }
+
+ iree_vm_module_release(module);
+}
+BENCHMARK(BM_ModuleCreateState);
+
+static void BM_FullModuleInit(benchmark::State& state) {
+ while (state.KeepRunning()) {
+ const auto* module_file_toc =
+ iree_vm_bytecode_module_benchmark_module_create();
+ iree_vm_module_t* module = nullptr;
+ IREE_CHECK_OK(iree_vm_bytecode_module_create(
+ iree_const_byte_span_t{
+ reinterpret_cast<const uint8_t*>(module_file_toc->data),
+ module_file_toc->size},
+ iree_allocator_null(), iree_allocator_system(), &module));
+
+ iree_vm_module_state_t* module_state;
+ module->alloc_state(module->self, iree_allocator_system(), &module_state);
+
+ benchmark::DoNotOptimize(module_state);
+
+ module->free_state(module->self, module_state);
+ iree_vm_module_release(module);
+ }
+}
+BENCHMARK(BM_FullModuleInit);
+
+IREE_ATTRIBUTE_NOINLINE static int empty_fn(void) {
+ int ret = 1;
+ benchmark::DoNotOptimize(ret);
+ return ret;
+}
+
+static void BM_EmptyFuncReference(benchmark::State& state) {
+ while (state.KeepRunning()) {
+ int ret = empty_fn();
+ benchmark::DoNotOptimize(ret);
+ benchmark::ClobberMemory();
+ }
+}
+BENCHMARK(BM_EmptyFuncReference);
+
+static void BM_EmptyFuncBytecode(benchmark::State& state) {
+ IREE_CHECK_OK(RunFunction(
+ state, iree_make_cstring_view("bytecode_module_benchmark.empty_func"), {},
+ /*result_count=*/0));
+}
+BENCHMARK(BM_EmptyFuncBytecode);
+
+IREE_ATTRIBUTE_NOINLINE static int add_fn(int value) {
+ benchmark::DoNotOptimize(value += value);
+ return value;
+}
+
+static void BM_CallInternalFuncReference(benchmark::State& state) {
+ while (state.KeepRunningBatch(10)) {
+ int value = 1;
+ value = add_fn(value);
+ benchmark::DoNotOptimize(value);
+ value = add_fn(value);
+ benchmark::DoNotOptimize(value);
+ value = add_fn(value);
+ benchmark::DoNotOptimize(value);
+ value = add_fn(value);
+ benchmark::DoNotOptimize(value);
+ value = add_fn(value);
+ benchmark::DoNotOptimize(value);
+ value = add_fn(value);
+ benchmark::DoNotOptimize(value);
+ value = add_fn(value);
+ benchmark::DoNotOptimize(value);
+ value = add_fn(value);
+ benchmark::DoNotOptimize(value);
+ value = add_fn(value);
+ benchmark::DoNotOptimize(value);
+ value = add_fn(value);
+ benchmark::DoNotOptimize(value);
+ benchmark::ClobberMemory();
+ }
+}
+BENCHMARK(BM_CallInternalFuncReference);
+
+static void BM_CallInternalFuncBytecode(benchmark::State& state) {
+ IREE_CHECK_OK(RunFunction(
+ state,
+ iree_make_cstring_view("bytecode_module_benchmark.call_internal_func"),
+ {100},
+ /*result_count=*/1,
+ /*batch_size=*/20));
+}
+BENCHMARK(BM_CallInternalFuncBytecode);
+
+static void BM_CallImportedFuncBytecode(benchmark::State& state) {
+ IREE_CHECK_OK(RunFunction(
+ state,
+ iree_make_cstring_view("bytecode_module_benchmark.call_imported_func"),
+ {100},
+ /*result_count=*/1,
+ /*batch_size=*/20));
+}
+BENCHMARK(BM_CallImportedFuncBytecode);
+
+static void BM_LoopSumReference(benchmark::State& state) {
+ static auto work = +[](int x) {
+ benchmark::DoNotOptimize(x);
+ return x;
+ };
+ static auto loop = +[](int count) {
+ int i = 0;
+ for (; i < count; ++i) {
+ benchmark::DoNotOptimize(i = work(i));
+ }
+ return i;
+ };
+ while (state.KeepRunningBatch(state.range(0))) {
+ int ret = loop(static_cast<int>(state.range(0)));
+ benchmark::DoNotOptimize(ret);
+ benchmark::ClobberMemory();
+ }
+}
+BENCHMARK(BM_LoopSumReference)->Arg(100000);
+
+static void BM_LoopSumBytecode(benchmark::State& state) {
+ IREE_CHECK_OK(RunFunction(
+ state, iree_make_cstring_view("bytecode_module_benchmark.loop_sum"),
+ {static_cast<int32_t>(state.range(0))},
+ /*result_count=*/1,
+ /*batch_size=*/state.range(0)));
+}
+BENCHMARK(BM_LoopSumBytecode)->Arg(100000);
+
+static void BM_BufferReduceReference(benchmark::State& state) {
+ static auto work = +[](int32_t* buffer, int i, int sum) {
+ int new_sum = buffer[i] + sum;
+ benchmark::DoNotOptimize(new_sum);
+ return new_sum;
+ };
+ static auto loop = +[](int32_t* buffer, int count) {
+ int sum = 0;
+ for (int i = 0; i < count; ++i) {
+ benchmark::DoNotOptimize(sum = work(buffer, i, sum));
+ }
+ return sum;
+ };
+ while (state.KeepRunningBatch(state.range(0))) {
+ int32_t* buffer = (int32_t*)malloc(state.range(0) * 4);
+ for (int i = 0; i < state.range(0); ++i) {
+ buffer[i] = 1;
+ }
+ int ret = loop(buffer, static_cast<int>(state.range(0)));
+ benchmark::DoNotOptimize(ret);
+ benchmark::ClobberMemory();
+ free(buffer);
+ }
+}
+BENCHMARK(BM_BufferReduceReference)->Arg(100000);
+
+static void BM_BufferReduceBytecode(benchmark::State& state) {
+ IREE_CHECK_OK(RunFunction(
+ state, iree_make_cstring_view("bytecode_module_benchmark.buffer_reduce"),
+ {static_cast<int32_t>(state.range(0))},
+ /*result_count=*/1,
+ /*batch_size=*/state.range(0)));
+}
+BENCHMARK(BM_BufferReduceBytecode)->Arg(100000);
+
+// NOTE: unrolled 8x, requires %count to be % 8 = 0.
+static void BM_BufferReduceBytecodeUnrolled(benchmark::State& state) {
+ IREE_CHECK_OK(
+ RunFunction(state,
+ iree_make_cstring_view(
+ "bytecode_module_benchmark.buffer_reduce_unrolled"),
+ {static_cast<int32_t>(state.range(0))},
+ /*result_count=*/1,
+ /*batch_size=*/state.range(0)));
+}
+BENCHMARK(BM_BufferReduceBytecodeUnrolled)->Arg(100000);
+
+} // namespace
diff --git a/runtime/src/iree/vm/bytecode_module_benchmark.mlir b/runtime/src/iree/vm/bytecode_module_benchmark.mlir
new file mode 100644
index 0000000..6a076f5
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode_module_benchmark.mlir
@@ -0,0 +1,143 @@
+vm.module @bytecode_module_benchmark {
+ // Measures the pure overhead of calling into/returning from a module.
+ vm.export @empty_func
+ vm.func @empty_func() {
+ vm.return
+ }
+
+ // Measures the cost of a call an internal function.
+ vm.func @internal_func(%arg0 : i32) -> i32 attributes {noinline} {
+ vm.return %arg0 : i32
+ }
+ vm.export @call_internal_func
+ vm.func @call_internal_func(%arg0 : i32) -> i32 {
+ %0 = vm.call @internal_func(%arg0) : (i32) -> i32
+ %1 = vm.call @internal_func(%0) : (i32) -> i32
+ %2 = vm.call @internal_func(%1) : (i32) -> i32
+ %3 = vm.call @internal_func(%2) : (i32) -> i32
+ %4 = vm.call @internal_func(%3) : (i32) -> i32
+ %5 = vm.call @internal_func(%4) : (i32) -> i32
+ %6 = vm.call @internal_func(%5) : (i32) -> i32
+ %7 = vm.call @internal_func(%6) : (i32) -> i32
+ %8 = vm.call @internal_func(%7) : (i32) -> i32
+ %9 = vm.call @internal_func(%8) : (i32) -> i32
+ %10 = vm.call @internal_func(%9) : (i32) -> i32
+ %11 = vm.call @internal_func(%10) : (i32) -> i32
+ %12 = vm.call @internal_func(%11) : (i32) -> i32
+ %13 = vm.call @internal_func(%12) : (i32) -> i32
+ %14 = vm.call @internal_func(%13) : (i32) -> i32
+ %15 = vm.call @internal_func(%14) : (i32) -> i32
+ %16 = vm.call @internal_func(%15) : (i32) -> i32
+ %17 = vm.call @internal_func(%16) : (i32) -> i32
+ %18 = vm.call @internal_func(%17) : (i32) -> i32
+ %19 = vm.call @internal_func(%18) : (i32) -> i32
+ %20 = vm.call @internal_func(%19) : (i32) -> i32
+ vm.return %20 : i32
+ }
+
+ // Measures the cost of a call to an imported function.
+ vm.import @native_import_module.add_1(%arg : i32) -> i32
+ vm.export @call_imported_func
+ vm.func @call_imported_func(%arg0 : i32) -> i32 {
+ %0 = vm.call @native_import_module.add_1(%arg0) : (i32) -> i32
+ %1 = vm.call @native_import_module.add_1(%0) : (i32) -> i32
+ %2 = vm.call @native_import_module.add_1(%1) : (i32) -> i32
+ %3 = vm.call @native_import_module.add_1(%2) : (i32) -> i32
+ %4 = vm.call @native_import_module.add_1(%3) : (i32) -> i32
+ %5 = vm.call @native_import_module.add_1(%4) : (i32) -> i32
+ %6 = vm.call @native_import_module.add_1(%5) : (i32) -> i32
+ %7 = vm.call @native_import_module.add_1(%6) : (i32) -> i32
+ %8 = vm.call @native_import_module.add_1(%7) : (i32) -> i32
+ %9 = vm.call @native_import_module.add_1(%8) : (i32) -> i32
+ %10 = vm.call @native_import_module.add_1(%9) : (i32) -> i32
+ %11 = vm.call @native_import_module.add_1(%10) : (i32) -> i32
+ %12 = vm.call @native_import_module.add_1(%11) : (i32) -> i32
+ %13 = vm.call @native_import_module.add_1(%12) : (i32) -> i32
+ %14 = vm.call @native_import_module.add_1(%13) : (i32) -> i32
+ %15 = vm.call @native_import_module.add_1(%14) : (i32) -> i32
+ %16 = vm.call @native_import_module.add_1(%15) : (i32) -> i32
+ %17 = vm.call @native_import_module.add_1(%16) : (i32) -> i32
+ %18 = vm.call @native_import_module.add_1(%17) : (i32) -> i32
+ %19 = vm.call @native_import_module.add_1(%18) : (i32) -> i32
+ %20 = vm.call @native_import_module.add_1(%19) : (i32) -> i32
+ vm.return %20 : i32
+ }
+
+ // Measures the cost of a simple for-loop.
+ vm.export @loop_sum
+ vm.func @loop_sum(%count : i32) -> i32 {
+ %c1 = vm.const.i32 1
+ %i0 = vm.const.i32.zero
+ vm.br ^loop(%i0 : i32)
+ ^loop(%i : i32):
+ %in = vm.add.i32 %i, %c1 : i32
+ %cmp = vm.cmp.lt.i32.s %in, %count : i32
+ vm.cond_br %cmp, ^loop(%in : i32), ^loop_exit(%in : i32)
+ ^loop_exit(%ie : i32):
+ vm.return %ie : i32
+ }
+
+ // Measures the cost of lots of buffer loads.
+ vm.export @buffer_reduce
+ vm.func @buffer_reduce(%count : i32) -> i32 {
+ %c0 = vm.const.i32.zero
+ %c1 = vm.const.i32 1
+ %c4 = vm.const.i32 4
+ %max = vm.mul.i32 %count, %c4 : i32
+ %buf = vm.buffer.alloc %max : !vm.buffer
+ vm.buffer.fill.i32 %buf, %c0, %max, %c1 : i32 -> !vm.buffer
+ vm.br ^loop(%c0, %c0 : i32, i32)
+ ^loop(%i : i32, %sum : i32):
+ %element = vm.buffer.load.i32 %buf[%i] : !vm.buffer -> i32
+ %new_sum = vm.add.i32 %sum, %element : i32
+ %ip4 = vm.add.i32 %i, %c4 : i32
+ %cmp = vm.cmp.lt.i32.s %ip4, %max : i32
+ vm.cond_br %cmp, ^loop(%ip4, %new_sum : i32, i32), ^loop_exit(%new_sum : i32)
+ ^loop_exit(%result : i32):
+ vm.return %result : i32
+ }
+
+ // Measures the cost of lots of buffer loads when somewhat unrolled.
+ // NOTE: unrolled 8x, requires %count to be % 8 = 0.
+ vm.export @buffer_reduce_unrolled
+ vm.func @buffer_reduce_unrolled(%count : i32) -> i32 {
+ %c0 = vm.const.i32.zero
+ %c1 = vm.const.i32 1
+ %c4 = vm.const.i32 4
+ %max = vm.mul.i32 %count, %c4 : i32
+ %buf = vm.buffer.alloc %max : !vm.buffer
+ vm.buffer.fill.i32 %buf, %c0, %max, %c1 : i32 -> !vm.buffer
+ vm.br ^loop(%c0, %c0 : i32, i32)
+ ^loop(%i0 : i32, %sum : i32):
+ // TODO(#5544): add addression modes to load/store.
+ %e0 = vm.buffer.load.i32 %buf[%i0] : !vm.buffer -> i32
+ %i1 = vm.add.i32 %i0, %c4 : i32
+ %e1 = vm.buffer.load.i32 %buf[%i1] : !vm.buffer -> i32
+ %i2 = vm.add.i32 %i1, %c4 : i32
+ %e2 = vm.buffer.load.i32 %buf[%i2] : !vm.buffer -> i32
+ %i3 = vm.add.i32 %i2, %c4 : i32
+ %e3 = vm.buffer.load.i32 %buf[%i3] : !vm.buffer -> i32
+ %i4 = vm.add.i32 %i3, %c4 : i32
+ %e4 = vm.buffer.load.i32 %buf[%i4] : !vm.buffer -> i32
+ %i5 = vm.add.i32 %i4, %c4 : i32
+ %e5 = vm.buffer.load.i32 %buf[%i5] : !vm.buffer -> i32
+ %i6 = vm.add.i32 %i5, %c4 : i32
+ %e6 = vm.buffer.load.i32 %buf[%i6] : !vm.buffer -> i32
+ %i7 = vm.add.i32 %i6, %c4 : i32
+ %e7 = vm.buffer.load.i32 %buf[%i7] : !vm.buffer -> i32
+ // If we do reductions like this we could add a horizontal-add op.
+ %new_sum0 = vm.add.i32 %sum, %e0 : i32
+ %new_sum1 = vm.add.i32 %new_sum0, %e1 : i32
+ %new_sum2 = vm.add.i32 %new_sum1, %e2 : i32
+ %new_sum3 = vm.add.i32 %new_sum2, %e3 : i32
+ %new_sum4 = vm.add.i32 %new_sum3, %e4 : i32
+ %new_sum5 = vm.add.i32 %new_sum4, %e5 : i32
+ %new_sum6 = vm.add.i32 %new_sum5, %e6 : i32
+ %new_sum7 = vm.add.i32 %new_sum6, %e7 : i32
+ %next_i = vm.add.i32 %i7, %c4 : i32
+ %cmp = vm.cmp.lt.i32.s %next_i, %max : i32
+ vm.cond_br %cmp, ^loop(%next_i, %new_sum7 : i32, i32), ^loop_exit(%new_sum7 : i32)
+ ^loop_exit(%result : i32):
+ vm.return %result : i32
+ }
+}
diff --git a/runtime/src/iree/vm/bytecode_module_impl.h b/runtime/src/iree/vm/bytecode_module_impl.h
new file mode 100644
index 0000000..01031b1
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode_module_impl.h
@@ -0,0 +1,137 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_BYTECODE_MODULE_IMPL_H_
+#define IREE_VM_BYTECODE_MODULE_IMPL_H_
+
+#include <stdint.h>
+#include <string.h>
+
+// VC++ does not have C11's stdalign.h.
+#if !defined(_MSC_VER)
+#include <stdalign.h>
+#endif // _MSC_VER
+
+#include "iree/base/api.h"
+#include "iree/vm/api.h"
+
+// NOTE: include order matters:
+#include "iree/base/internal/flatcc/parsing.h"
+#include "iree/schemas/bytecode_module_def_reader.h"
+#include "iree/schemas/bytecode_module_def_verifier.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#define VMMAX(a, b) (((a) > (b)) ? (a) : (b))
+#define VMMIN(a, b) (((a) < (b)) ? (a) : (b))
+
+// Maximum register count per bank.
+// This determines the bits required to reference registers in the VM bytecode.
+#define IREE_I32_REGISTER_COUNT 0x7FFF
+#define IREE_REF_REGISTER_COUNT 0x7FFF
+
+#define IREE_I32_REGISTER_MASK 0x7FFF
+
+#define IREE_REF_REGISTER_TYPE_BIT 0x8000
+#define IREE_REF_REGISTER_MOVE_BIT 0x4000
+#define IREE_REF_REGISTER_MASK 0x3FFF
+
+// A loaded bytecode module.
+typedef struct iree_vm_bytecode_module_t {
+ // Interface routing to the bytecode module functions.
+ // Must be first in the struct as we dereference the interface to find our
+ // members below.
+ iree_vm_module_t interface;
+
+ // Table of internal function bytecode descriptors.
+ // Mapped 1:1 with internal functions. Each defined bytecode span represents a
+ // range of bytes in |bytecode_data|.
+ iree_host_size_t function_descriptor_count;
+ const iree_vm_FunctionDescriptor_t* function_descriptor_table;
+
+ // A pointer to the bytecode data embedded within the module.
+ iree_const_byte_span_t bytecode_data;
+
+ // Allocator this module was allocated with and must be freed with.
+ iree_allocator_t allocator;
+
+ // Underlying FlatBuffer data and allocator (which may be null).
+ iree_const_byte_span_t flatbuffer_data;
+ iree_allocator_t flatbuffer_allocator;
+ iree_vm_BytecodeModuleDef_table_t def;
+
+ // Type table mapping module type IDs to registered VM types.
+ iree_host_size_t type_count;
+ iree_vm_type_def_t type_table[];
+} iree_vm_bytecode_module_t;
+
+// A resolved and split import in the module state table.
+//
+// NOTE: a table of these are stored per module per context so ideally we'd
+// only store the absolute minimum information to reduce our fixed overhead.
+// There's a big tradeoff though as a few extra bytes here can avoid non-trivial
+// work per import function invocation.
+typedef struct iree_vm_bytecode_import_t {
+ // Import function in the source module.
+ iree_vm_function_t function;
+
+ // Pre-parsed argument/result calling convention string fragments.
+ // For example, 0ii.r will be split to arguments=ii and results=r.
+ iree_string_view_t arguments;
+ iree_string_view_t results;
+
+ // Precomputed argument/result size requirements for marshaling values.
+ // Only usable for non-variadic signatures. Results are always usable as they
+ // don't support variadic values (yet).
+ uint16_t argument_buffer_size;
+ uint16_t result_buffer_size;
+} iree_vm_bytecode_import_t;
+
+// Per-instance module state.
+// This is allocated with a provided allocator as a single flat allocation.
+// This struct is a prefix to the allocation pointing into the dynamic offsets
+// of the allocation storage.
+typedef struct iree_vm_bytecode_module_state_t {
+ // Combined rwdata storage for the entire module, including globals.
+ // Aligned to 16 bytes (128-bits) for SIMD usage.
+ iree_byte_span_t rwdata_storage;
+
+ // Global ref values, indexed by global ordinal.
+ iree_host_size_t global_ref_count;
+ iree_vm_ref_t* global_ref_table;
+
+ // TODO(benvanik): move to iree_vm_bytecode_module_t if always static.
+ // Initialized references to rodata segments.
+ // Right now these don't do much, however we can perform lazy caching and
+ // on-the-fly decompression using this information.
+ iree_host_size_t rodata_ref_count;
+ iree_vm_buffer_t* rodata_ref_table;
+
+ // Resolved function imports.
+ iree_host_size_t import_count;
+ iree_vm_bytecode_import_t* import_table;
+
+ // Allocator used for the state itself and any runtime allocations needed.
+ iree_allocator_t allocator;
+} iree_vm_bytecode_module_state_t;
+
+// Begins (or resumes) execution of the current frame and continues until
+// either a yield or return. |out_result| will contain the result status for
+// continuation, if needed.
+iree_status_t iree_vm_bytecode_dispatch(iree_vm_stack_t* stack,
+ iree_vm_bytecode_module_t* module,
+ const iree_vm_function_call_t* call,
+ iree_string_view_t cconv_arguments,
+ iree_string_view_t cconv_results,
+ iree_vm_execution_result_t* out_result);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_VM_BYTECODE_MODULE_IMPL_H_
diff --git a/runtime/src/iree/vm/bytecode_module_size_benchmark.cc b/runtime/src/iree/vm/bytecode_module_size_benchmark.cc
new file mode 100644
index 0000000..164a223
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode_module_size_benchmark.cc
@@ -0,0 +1,44 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/api.h"
+#include "iree/vm/api.h"
+#include "iree/vm/bytecode_module.h"
+#include "iree/vm/bytecode_module_size_benchmark_module_c.h"
+
+extern "C" int main(int argc, char** argv) {
+ iree_vm_instance_t* instance = nullptr;
+ iree_vm_instance_create(iree_allocator_system(), &instance);
+
+ const auto* module_file_toc =
+ iree_vm_bytecode_module_size_benchmark_module_create();
+ iree_vm_module_t* module = nullptr;
+ iree_vm_bytecode_module_create(
+ iree_const_byte_span_t{
+ reinterpret_cast<const uint8_t*>(module_file_toc->data),
+ module_file_toc->size},
+ iree_allocator_null(), iree_allocator_system(), &module);
+
+ iree_vm_context_t* context = nullptr;
+ iree_vm_context_create_with_modules(instance, IREE_VM_CONTEXT_FLAG_NONE,
+ &module, /*module_count=*/1,
+ iree_allocator_system(), &context);
+
+ iree_vm_function_t function;
+ iree_vm_module_lookup_function_by_name(
+ module, IREE_VM_FUNCTION_LINKAGE_EXPORT,
+ iree_make_cstring_view("empty_func"), &function);
+
+ iree_vm_invoke(context, function, IREE_VM_INVOCATION_FLAG_NONE,
+ /*policy=*/nullptr, /*inputs=*/nullptr,
+ /*outputs=*/nullptr, iree_allocator_system());
+
+ iree_vm_module_release(module);
+ iree_vm_context_release(context);
+ iree_vm_instance_release(instance);
+
+ return 0;
+}
diff --git a/runtime/src/iree/vm/bytecode_module_size_benchmark.mlir b/runtime/src/iree/vm/bytecode_module_size_benchmark.mlir
new file mode 100644
index 0000000..84ad13c
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode_module_size_benchmark.mlir
@@ -0,0 +1,6 @@
+vm.module @bytecode_module_size_benchmark {
+ vm.export @empty_func
+ vm.func @empty_func() {
+ vm.return
+ }
+}
diff --git a/runtime/src/iree/vm/bytecode_module_test.cc b/runtime/src/iree/vm/bytecode_module_test.cc
new file mode 100644
index 0000000..71ffb27
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode_module_test.cc
@@ -0,0 +1,19 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Tests for bytecode_module.cc implementations.
+// This means mostly just flatbuffer verification, module interface functions,
+// etc. bytecode_dispatch_test.cc covers actual dispatch.
+
+#include "iree/vm/bytecode_module.h"
+
+#include "iree/testing/gtest.h"
+
+namespace {
+
+// TODO(benvanik): bytecode_module_test.cc for flatbuffer/module implementation.
+
+} // namespace
diff --git a/runtime/src/iree/vm/context.c b/runtime/src/iree/vm/context.c
new file mode 100644
index 0000000..2508075
--- /dev/null
+++ b/runtime/src/iree/vm/context.c
@@ -0,0 +1,627 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/context.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "iree/base/internal/atomics.h"
+#include "iree/base/tracing.h"
+
+struct iree_vm_context_t {
+ iree_atomic_ref_count_t ref_count;
+ iree_vm_instance_t* instance;
+ iree_allocator_t allocator;
+ intptr_t context_id;
+
+ // Context has been frozen and can no longer be modified.
+ uint32_t is_frozen : 1;
+ // Context storage is statically allocated and need not be freed.
+ uint32_t is_static : 1;
+
+ // Configuration flags.
+ iree_vm_context_flags_t flags;
+
+ struct {
+ iree_host_size_t count;
+ iree_host_size_t capacity;
+ iree_vm_module_t** modules;
+ iree_vm_module_state_t** module_states;
+ } list;
+};
+
+static void iree_vm_context_destroy(iree_vm_context_t* context);
+
+// Runs a single `() -> ()` function from the module if it exists.
+static iree_status_t iree_vm_context_run_function(
+ iree_vm_stack_t* stack, iree_vm_module_t* module,
+ iree_string_view_t function_name) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_vm_function_call_t call;
+ memset(&call, 0, sizeof(call));
+ iree_status_t status = iree_vm_module_lookup_function_by_name(
+ module, IREE_VM_FUNCTION_LINKAGE_EXPORT, function_name, &call.function);
+ if (iree_status_is_not_found(status)) {
+ // Function doesn't exist; that's ok as this was an optional call.
+ iree_status_ignore(status);
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+ } else if (!iree_status_is_ok(status)) {
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+
+ iree_vm_execution_result_t result;
+ status = module->begin_call(module->self, stack, &call, &result);
+ if (!iree_status_is_ok(status)) {
+ status = IREE_VM_STACK_ANNOTATE_BACKTRACE_IF_ENABLED(stack, status);
+ }
+
+ // TODO(benvanik): ensure completed synchronously.
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static iree_status_t iree_vm_context_query_module_state(
+ void* state_resolver, iree_vm_module_t* module,
+ iree_vm_module_state_t** out_module_state) {
+ IREE_ASSERT_ARGUMENT(state_resolver);
+ IREE_ASSERT_ARGUMENT(module);
+ IREE_ASSERT_ARGUMENT(out_module_state);
+ iree_vm_context_t* context = (iree_vm_context_t*)state_resolver;
+ // NOTE: this is a linear scan, but given that the list of modules should be
+ // N<4 this is faster than just about anything else we could do.
+ // To future performance profilers: sorry when N>>4 :)
+ for (int i = 0; i < context->list.count; ++i) {
+ if (context->list.modules[i] == module) {
+ *out_module_state = context->list.module_states[i];
+ return iree_ok_status();
+ }
+ }
+ return iree_make_status(IREE_STATUS_NOT_FOUND);
+}
+
+static iree_status_t iree_vm_context_resolve_module_imports(
+ iree_vm_context_t* context, iree_vm_module_t* module,
+ iree_vm_module_state_t* module_state) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // NOTE: this has some bad characteristics, but the number of modules and the
+ // number of imported functions should be relatively small (even if the number
+ // of exported functions for particular modules is large).
+ iree_vm_module_signature_t module_signature = module->signature(module->self);
+ for (int i = 0; i < module_signature.import_function_count; ++i) {
+ iree_vm_function_t decl_function;
+ iree_string_view_t full_name;
+ iree_vm_function_signature_t expected_signature;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ module->get_function(module->self, IREE_VM_FUNCTION_LINKAGE_IMPORT, i,
+ /*out_function=*/&decl_function,
+ /*out_name=*/&full_name,
+ /*out_signature=*/&expected_signature));
+
+ // Resolve the function to the module that contains it and return the
+ // information.
+ iree_vm_function_t import_function;
+ iree_status_t resolve_status =
+ iree_vm_context_resolve_function(context, full_name, &import_function);
+ if (!iree_status_is_ok(resolve_status)) {
+ if (iree_status_is_not_found(resolve_status) &&
+ decl_function.linkage == IREE_VM_FUNCTION_LINKAGE_IMPORT_OPTIONAL) {
+ // Failed to find the function but it was optionally imported and that's
+ // ok. We'll just continue the resolution process and leave the import
+ // unspecified on the target module.
+ iree_status_ignore(resolve_status);
+ continue;
+ } else {
+ // Failed to find the function.
+ IREE_TRACE_ZONE_END(z0);
+ return resolve_status;
+ }
+ }
+
+ // Query the function signature from the module that contains it; we don't
+ // use the signature from the module requesting the import as we want a
+ // single source of truth.
+ iree_vm_function_signature_t import_signature =
+ iree_vm_function_signature(&import_function);
+
+ // Simple check to confirm the signatures match. We still can't trust that
+ // the module using the import *actually* calls it with the right convention
+ // (so this is not a safety check!), but this will catch the 99% case of a
+ // signature changing out from under a module or using a module with a newer
+ // signature than that provided by the imported module.
+ //
+ // We allow modules to not define their cconv expectation as in a lot of
+ // cases where modules are all compiled into the same binary there's no
+ // value in performing the verification. Runtime checks during calls will
+ // fail with less awesome logging but that's the tradeoff.
+ if (expected_signature.calling_convention.size &&
+ !iree_string_view_equal(import_signature.calling_convention,
+ expected_signature.calling_convention)) {
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(
+ IREE_STATUS_INTERNAL,
+ "import function signature mismatch between %.*s "
+ "and source %.*s; expected %.*s but got %.*s",
+ (int)iree_vm_module_name(module).size,
+ iree_vm_module_name(module).data,
+ (int)iree_vm_module_name(import_function.module).size,
+ iree_vm_module_name(import_function.module).data,
+ (int)expected_signature.calling_convention.size,
+ expected_signature.calling_convention.data,
+ (int)import_signature.calling_convention.size,
+ import_signature.calling_convention.data);
+ }
+
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, module->resolve_import(module->self, module_state, i,
+ &import_function, &import_signature));
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void iree_vm_context_release_modules(iree_vm_context_t* context,
+ iree_host_size_t start,
+ iree_host_size_t end) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Run module __deinit functions, if present (in reverse init order).
+ IREE_VM_INLINE_STACK_INITIALIZE(
+ stack,
+ context->flags & IREE_VM_CONTEXT_FLAG_TRACE_EXECUTION
+ ? IREE_VM_INVOCATION_FLAG_TRACE_EXECUTION
+ : IREE_VM_INVOCATION_FLAG_NONE,
+ iree_vm_context_state_resolver(context), context->allocator);
+ for (int i = (int)end; i >= (int)start; --i) {
+ iree_vm_module_t* module = context->list.modules[i];
+ iree_vm_module_state_t* module_state = context->list.module_states[i];
+ if (!module_state) {
+ // Partially initialized; skip.
+ continue;
+ }
+ IREE_IGNORE_ERROR(iree_vm_context_run_function(
+ stack, module, iree_make_cstring_view("__deinit")));
+ }
+ iree_vm_stack_deinitialize(stack);
+
+ // Release all module state (in reverse init order).
+ for (int i = (int)end; i >= (int)start; --i) {
+ iree_vm_module_t* module = context->list.modules[i];
+ // It is possible in error states to have partially initialized.
+ if (context->list.module_states[i]) {
+ module->free_state(module->self, context->list.module_states[i]);
+ context->list.module_states[i] = NULL;
+ }
+ }
+
+ // Release modules now that there are no import tables remaining.
+ for (int i = (int)end; i >= (int)start; --i) {
+ if (context->list.modules[i]) {
+ iree_vm_module_release(context->list.modules[i]);
+ context->list.modules[i] = NULL;
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_context_create(
+ iree_vm_instance_t* instance, iree_vm_context_flags_t flags,
+ iree_allocator_t allocator, iree_vm_context_t** out_context) {
+ return iree_vm_context_create_with_modules(instance, flags, NULL, 0,
+ allocator, out_context);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_context_create_with_modules(
+ iree_vm_instance_t* instance, iree_vm_context_flags_t flags,
+ iree_vm_module_t** modules, iree_host_size_t module_count,
+ iree_allocator_t allocator, iree_vm_context_t** out_context) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(out_context);
+ *out_context = NULL;
+
+ iree_host_size_t context_size =
+ sizeof(iree_vm_context_t) + sizeof(iree_vm_module_t*) * module_count +
+ sizeof(iree_vm_module_state_t*) * module_count;
+
+ iree_vm_context_t* context = NULL;
+ iree_allocator_malloc(allocator, context_size, (void**)&context);
+ iree_atomic_ref_count_init(&context->ref_count);
+ context->instance = instance;
+ iree_vm_instance_retain(context->instance);
+ context->allocator = allocator;
+
+ static iree_atomic_int32_t next_context_id = IREE_ATOMIC_VAR_INIT(1);
+ context->context_id = iree_atomic_fetch_add_int32(&next_context_id, 1,
+ iree_memory_order_seq_cst);
+
+ // TODO(benvanik): allow for non-frozen but static contexts.
+ context->is_frozen = module_count > 0;
+ context->is_static = module_count > 0;
+ context->flags = flags;
+
+ uint8_t* p = (uint8_t*)context + sizeof(iree_vm_context_t);
+ context->list.modules = (iree_vm_module_t**)p;
+ p += sizeof(iree_vm_module_t*) * module_count;
+ context->list.module_states = (iree_vm_module_state_t**)p;
+ p += sizeof(iree_vm_module_state_t*) * module_count;
+ context->list.count = 0;
+ context->list.capacity = module_count;
+
+ iree_status_t register_status =
+ iree_vm_context_register_modules(context, modules, module_count);
+ if (!iree_status_is_ok(register_status)) {
+ iree_vm_context_destroy(context);
+ IREE_TRACE_ZONE_END(z0);
+ return register_status;
+ }
+
+ *out_context = context;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void iree_vm_context_destroy(iree_vm_context_t* context) {
+ if (!context) return;
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ if (context->list.count > 0) {
+ iree_vm_context_release_modules(context, 0, context->list.count - 1);
+ }
+
+ // Note: For non-static module lists, it is only dynamically allocated if
+ // capacity > 0.
+ if (!context->is_static && context->list.capacity > 0) {
+ iree_allocator_free(context->allocator, context->list.modules);
+ context->list.modules = NULL;
+ iree_allocator_free(context->allocator, context->list.module_states);
+ context->list.module_states = NULL;
+ }
+
+ iree_vm_instance_release(context->instance);
+ context->instance = NULL;
+
+ iree_allocator_free(context->allocator, context);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT void iree_vm_context_retain(iree_vm_context_t* context) {
+ if (context) {
+ iree_atomic_ref_count_inc(&context->ref_count);
+ }
+}
+
+IREE_API_EXPORT void iree_vm_context_release(iree_vm_context_t* context) {
+ if (context && iree_atomic_ref_count_dec(&context->ref_count) == 1) {
+ iree_vm_context_destroy(context);
+ }
+}
+
+IREE_API_EXPORT intptr_t iree_vm_context_id(const iree_vm_context_t* context) {
+ if (!context) {
+ return -1;
+ }
+ return context->context_id;
+}
+
+IREE_API_EXPORT iree_vm_context_flags_t
+iree_vm_context_flags(const iree_vm_context_t* context) {
+ IREE_ASSERT_ARGUMENT(context);
+ return context->flags;
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_context_register_modules(
+ iree_vm_context_t* context, iree_vm_module_t** modules,
+ iree_host_size_t module_count) {
+ IREE_ASSERT_ARGUMENT(context);
+ if (!modules && module_count > 1) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "modules/module_count mismatch");
+ }
+ for (iree_host_size_t i = 0; i < module_count; ++i) {
+ if (!modules[i]) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "modules[%zu] is null", i);
+ }
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Try growing both our storage lists first, if needed.
+ if (context->list.count + module_count > context->list.capacity) {
+ if (context->is_frozen) {
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "context was allocated as static and cannot "
+ "register modules after creation");
+ }
+ iree_host_size_t new_capacity = context->list.capacity + module_count;
+ if (new_capacity < context->list.capacity * 2) {
+ // TODO(benvanik): tune list growth for module count >> 4.
+ new_capacity = context->list.capacity * 2;
+ }
+ iree_vm_module_t** new_module_list = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(context->allocator,
+ sizeof(iree_vm_module_t*) * new_capacity,
+ (void**)&new_module_list));
+ iree_vm_module_state_t** new_module_state_list = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_allocator_malloc(context->allocator,
+ sizeof(iree_vm_module_state_t*) * new_capacity,
+ (void**)&new_module_state_list));
+ memcpy(new_module_list, context->list.modules,
+ sizeof(iree_vm_module_t*) * context->list.count);
+ memcpy(new_module_state_list, context->list.module_states,
+ sizeof(iree_vm_module_state_t*) * context->list.count);
+ // The existing memory is only dynamically allocated if it has been
+ // grown.
+ if (context->list.capacity > 0) {
+ iree_allocator_free(context->allocator, context->list.modules);
+ iree_allocator_free(context->allocator, context->list.module_states);
+ }
+ context->list.modules = new_module_list;
+ context->list.module_states = new_module_state_list;
+ context->list.capacity = new_capacity;
+ }
+
+ // VM stack used to call into module __init methods.
+ IREE_VM_INLINE_STACK_INITIALIZE(
+ stack,
+ context->flags & IREE_VM_CONTEXT_FLAG_TRACE_EXECUTION
+ ? IREE_VM_INVOCATION_FLAG_TRACE_EXECUTION
+ : IREE_VM_INVOCATION_FLAG_NONE,
+ iree_vm_context_state_resolver(context), context->allocator);
+
+ // Retain all modules and allocate their state.
+ assert(context->list.capacity >= context->list.count + module_count);
+ iree_host_size_t original_count = context->list.count;
+ iree_status_t status = iree_ok_status();
+ iree_host_size_t i = 0;
+ for (i = 0; i < module_count; ++i) {
+ iree_vm_module_t* module = modules[i];
+ context->list.modules[original_count + i] = module;
+ context->list.module_states[original_count + i] = NULL;
+
+ iree_vm_module_retain(module);
+
+ // Allocate module state.
+ iree_vm_module_state_t* module_state = NULL;
+ status =
+ module->alloc_state(module->self, context->allocator, &module_state);
+ if (!iree_status_is_ok(status)) {
+ // Cleanup handled below.
+ break;
+ }
+ context->list.module_states[original_count + i] = module_state;
+
+ // Resolve imports for the modules.
+ status =
+ iree_vm_context_resolve_module_imports(context, module, module_state);
+ if (!iree_status_is_ok(status)) {
+ // Cleanup handled below.
+ break;
+ }
+
+ ++context->list.count;
+
+ // Run module __init functions, if present.
+ // As initialization functions may reference imports we need to perform
+ // all of these after we have resolved the imports above.
+ status = iree_vm_context_run_function(stack, module,
+ iree_make_cstring_view("__init"));
+ if (!iree_status_is_ok(status)) {
+ // Cleanup handled below.
+ break;
+ }
+ }
+
+ iree_vm_stack_deinitialize(stack);
+
+ // Cleanup for failure cases during module initialization; we need to
+ // ensure we release any modules we'd already initialized.
+ if (!iree_status_is_ok(status)) {
+ iree_vm_context_release_modules(context, original_count,
+ original_count + i);
+ context->list.count = original_count;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_vm_context_freeze(iree_vm_context_t* context) {
+ IREE_ASSERT_ARGUMENT(context);
+ context->is_frozen = 1;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_vm_state_resolver_t
+iree_vm_context_state_resolver(const iree_vm_context_t* context) {
+ iree_vm_state_resolver_t state_resolver = {0};
+ state_resolver.self = (void*)context;
+ state_resolver.query_module_state = iree_vm_context_query_module_state;
+ return state_resolver;
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_context_resolve_module_state(
+ const iree_vm_context_t* context, iree_vm_module_t* module,
+ iree_vm_module_state_t** out_module_state) {
+ return iree_vm_context_query_module_state((void*)context, module,
+ out_module_state);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_context_resolve_function(
+ const iree_vm_context_t* context, iree_string_view_t full_name,
+ iree_vm_function_t* out_function) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(out_function);
+ memset(out_function, 0, sizeof(iree_vm_function_t));
+
+ iree_string_view_t module_name;
+ iree_string_view_t function_name;
+ if (iree_string_view_split(full_name, '.', &module_name, &function_name) ==
+ -1) {
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "import name not fully-qualified (module.func): '%.*s'",
+ (int)full_name.size, full_name.data);
+ }
+
+ for (int i = (int)context->list.count - 1; i >= 0; --i) {
+ iree_vm_module_t* module = context->list.modules[i];
+ if (iree_string_view_equal(module_name, iree_vm_module_name(module))) {
+ iree_status_t status = iree_vm_module_lookup_function_by_name(
+ module, IREE_VM_FUNCTION_LINKAGE_EXPORT, function_name, out_function);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(IREE_STATUS_NOT_FOUND,
+ "module '%.*s' required for import '%.*s' not "
+ "registered with the context",
+ (int)module_name.size, module_name.data,
+ (int)full_name.size, full_name.data);
+}
+
+// Calls the '__notify(i32)' function in |module|, if present.
+static iree_status_t iree_vm_context_call_module_notify(
+ iree_vm_stack_t* stack, iree_vm_module_t* module,
+ iree_vm_module_state_t* module_state, iree_vm_signal_t signal) {
+ // Single i32 argument with the signal number.
+ uint32_t signal_arg = (uint32_t)signal;
+ iree_vm_function_call_t call;
+ memset(&call, 0, sizeof(call));
+ call.arguments = iree_make_byte_span(&signal_arg, sizeof(signal_arg));
+
+ // Try to find the function. Modules are not required to export it.
+ iree_status_t status = iree_vm_module_lookup_function_by_name(
+ module, IREE_VM_FUNCTION_LINKAGE_EXPORT,
+ iree_make_cstring_view("__notify"), &call.function);
+ if (iree_status_is_not_found(status)) {
+ // Function doesn't exist; that's ok as this was an optional call.
+ return iree_status_ignore(status);
+ } else if (!iree_status_is_ok(status)) {
+ // Failed during trim.
+ return status;
+ }
+
+ // Call the resolved function.
+ iree_vm_execution_result_t result;
+ status = module->begin_call(module->self, stack, &call, &result);
+ if (!iree_status_is_ok(status)) {
+ status = IREE_VM_STACK_ANNOTATE_BACKTRACE_IF_ENABLED(stack, status);
+ }
+
+ // TODO(benvanik): ensure completed synchronously.
+
+ return status;
+}
+
+// Calls the module notify methods in registration order.
+static iree_status_t iree_vm_context_notify_forward(iree_vm_stack_t* stack,
+ iree_vm_context_t* context,
+ iree_vm_signal_t signal) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = iree_ok_status();
+ for (iree_host_size_t i = 0; i < context->list.count; ++i) {
+ iree_vm_module_t* module = context->list.modules[i];
+ iree_vm_module_state_t* module_state = context->list.module_states[i];
+
+ // Call the module internal interface notify method.
+ // This handles the resources owned by the module implementation itself
+ // such as JITed binaries or other module infrastructure.
+ status = module->notify(module->self, module_state, signal);
+ if (!iree_status_is_ok(status)) break;
+
+ // Call the user-level notify method.
+ // This may new use the reallocated resources from the module internal
+ // implementation above.
+ status =
+ iree_vm_context_call_module_notify(stack, module, module_state, signal);
+ if (!iree_status_is_ok(status)) break;
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+// Calls the module notify methods in reverse registration order.
+static iree_status_t iree_vm_context_notify_reverse(iree_vm_stack_t* stack,
+ iree_vm_context_t* context,
+ iree_vm_signal_t signal) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ iree_status_t status = iree_ok_status();
+ for (int i = (int)context->list.count - 1; i >= 0; --i) {
+ iree_vm_module_t* module = context->list.modules[i];
+ iree_vm_module_state_t* module_state = context->list.module_states[i];
+
+ // Call the user-level notify method first.
+ // This allows users to drop any state that they can rematerialize and
+ // return the resources to pools/caches to be trimmed below.
+ status =
+ iree_vm_context_call_module_notify(stack, module, module_state, signal);
+ if (!iree_status_is_ok(status)) break;
+
+ // Call the module internal interface notify method.
+ // This handles the resources owned by the module implementation itself
+ // such as JITed binaries or other module infrastructure. Since we've
+ // already called the user-level function we likely have all of the
+ // resources that could be returned to pools there for this to reclaim.
+ status = module->notify(module->self, module_state, signal);
+ if (!iree_status_is_ok(status)) break;
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_context_notify(iree_vm_context_t* context,
+ iree_vm_signal_t signal) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, (uint64_t)signal);
+
+ // VM stack used to call into module __init methods.
+ IREE_VM_INLINE_STACK_INITIALIZE(
+ stack,
+ context->flags & IREE_VM_CONTEXT_FLAG_TRACE_EXECUTION
+ ? IREE_VM_INVOCATION_FLAG_TRACE_EXECUTION
+ : IREE_VM_INVOCATION_FLAG_NONE,
+ iree_vm_context_state_resolver(context), context->allocator);
+
+ // Resumes are walked forward while suspends are walked backward.
+ // This follows the expected construction/destruction pattern where for
+ // example on suspend one would walk user modules to release resources back
+ // to system module pools before the system modules then clean up the pools.
+ iree_status_t status = iree_ok_status();
+ switch (signal) {
+ default:
+ case IREE_VM_SIGNAL_RESUME:
+ status = iree_vm_context_notify_forward(stack, context, signal);
+ break;
+ case IREE_VM_SIGNAL_SUSPEND:
+ case IREE_VM_SIGNAL_LOW_MEMORY:
+ status = iree_vm_context_notify_reverse(stack, context, signal);
+ break;
+ }
+
+ iree_vm_stack_deinitialize(stack);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/vm/context.h b/runtime/src/iree/vm/context.h
new file mode 100644
index 0000000..b58bca6
--- /dev/null
+++ b/runtime/src/iree/vm/context.h
@@ -0,0 +1,117 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_CONTEXT_H_
+#define IREE_VM_CONTEXT_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/vm/instance.h"
+#include "iree/vm/module.h"
+#include "iree/vm/stack.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// An isolated execution context.
+// Effectively a sandbox where modules can be loaded and run with restricted
+// visibility and where they can maintain state.
+//
+// Modules have imports resolved automatically when registered by searching
+// existing modules registered within the context and load order is used for
+// resolution. Functions are resolved from the most recently registered module
+// back to the first, such that modules can override implementations of
+// functions in previously registered modules.
+//
+// Thread-compatible and must be externally synchronized.
+typedef struct iree_vm_context_t iree_vm_context_t;
+
+enum iree_vm_context_flag_bits_t {
+ IREE_VM_CONTEXT_FLAG_NONE = 0u,
+
+ // Enables tracing of execution to stderr (when available).
+ // See iree/base/config.h for the flags that control whether this
+ // functionality is available; specifically:
+ // -DIREE_VM_EXECUTION_TRACING_ENABLE=1
+ // All invocations made to this context - including initializers - will be
+ // traced. For fine-grained control use `iree_vm_invocation_flags_t`.
+ IREE_VM_CONTEXT_FLAG_TRACE_EXECUTION = 1u << 0,
+};
+typedef uint32_t iree_vm_context_flags_t;
+
+// Creates a new context that uses the given |instance| for device management.
+// |out_context| must be released by the caller.
+IREE_API_EXPORT iree_status_t iree_vm_context_create(
+ iree_vm_instance_t* instance, iree_vm_context_flags_t flags,
+ iree_allocator_t allocator, iree_vm_context_t** out_context);
+
+// Creates a new context with the given static set of modules.
+// This is equivalent to iree_vm_context_create+iree_vm_context_register_modules
+// but may be more efficient to allocate. Contexts created in this way cannot
+// have additional modules registered after creation.
+// |out_context| must be released by the caller.
+IREE_API_EXPORT iree_status_t iree_vm_context_create_with_modules(
+ iree_vm_instance_t* instance, iree_vm_context_flags_t flags,
+ iree_vm_module_t** modules, iree_host_size_t module_count,
+ iree_allocator_t allocator, iree_vm_context_t** out_context);
+
+// Retains the given |context| for the caller.
+IREE_API_EXPORT void iree_vm_context_retain(iree_vm_context_t* context);
+
+// Releases the given |context| from the caller.
+IREE_API_EXPORT void iree_vm_context_release(iree_vm_context_t* context);
+
+// Returns a process-unique ID for the |context|.
+IREE_API_EXPORT intptr_t iree_vm_context_id(const iree_vm_context_t* context);
+
+// Returns |context| flags.
+IREE_API_EXPORT iree_vm_context_flags_t
+iree_vm_context_flags(const iree_vm_context_t* context);
+
+// Registers a list of modules with the context and resolves imports in the
+// order provided.
+// The modules will be retained by the context until destruction.
+IREE_API_EXPORT iree_status_t iree_vm_context_register_modules(
+ iree_vm_context_t* context, iree_vm_module_t** modules,
+ iree_host_size_t module_count);
+
+// Freezes a context such that no more modules can be registered.
+// This can be used to ensure that context contents cannot be modified by other
+// code as the context is made available to other parts of the program.
+// No-op if already frozen.
+IREE_API_EXPORT iree_status_t
+iree_vm_context_freeze(iree_vm_context_t* context);
+
+// Returns a state resolver setup to use the |context| for resolving module
+// state.
+IREE_API_EXPORT iree_vm_state_resolver_t
+iree_vm_context_state_resolver(const iree_vm_context_t* context);
+
+// Sets |out_module_state| to the context-specific state for the given |module|.
+// The state is owned by the context and will only be live for as long as the
+// context is.
+IREE_API_EXPORT iree_status_t iree_vm_context_resolve_module_state(
+ const iree_vm_context_t* context, iree_vm_module_t* module,
+ iree_vm_module_state_t** out_module_state);
+
+// Sets |out_function| to to an exported function with the fully-qualified name
+// of |full_name| or returns IREE_STATUS_NOT_FOUND. The function reference is
+// valid for the lifetime of |context|.
+IREE_API_EXPORT iree_status_t iree_vm_context_resolve_function(
+ const iree_vm_context_t* context, iree_string_view_t full_name,
+ iree_vm_function_t* out_function);
+
+// Notifies all modules in the context of a system signal.
+IREE_API_EXPORT iree_status_t iree_vm_context_notify(iree_vm_context_t* context,
+ iree_vm_signal_t signal);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_VM_CONTEXT_H_
diff --git a/runtime/src/iree/vm/generated/.clang-format b/runtime/src/iree/vm/generated/.clang-format
new file mode 100644
index 0000000..8844258
--- /dev/null
+++ b/runtime/src/iree/vm/generated/.clang-format
@@ -0,0 +1,9 @@
+# Copyright 2019 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Disable formatting for generated code.
+DisableFormat: true
+
diff --git a/runtime/src/iree/vm/generated/bytecode_op_table.h b/runtime/src/iree/vm/generated/bytecode_op_table.h
new file mode 100644
index 0000000..6e172d3
--- /dev/null
+++ b/runtime/src/iree/vm/generated/bytecode_op_table.h
@@ -0,0 +1,2076 @@
+/*===- TableGen'erated file -------------------------------------*- C++ -*-===*\
+|* *|
+|* IREE VM Operation Tables *|
+|* *|
+|* Automatically generated file, do not edit! *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+typedef enum {
+ IREE_VM_OP_CORE_GlobalLoadI32 = 0x00,
+ IREE_VM_OP_CORE_GlobalStoreI32 = 0x01,
+ IREE_VM_OP_CORE_GlobalLoadIndirectI32 = 0x02,
+ IREE_VM_OP_CORE_GlobalStoreIndirectI32 = 0x03,
+ IREE_VM_OP_CORE_GlobalLoadRef = 0x04,
+ IREE_VM_OP_CORE_GlobalStoreRef = 0x05,
+ IREE_VM_OP_CORE_GlobalLoadIndirectRef = 0x06,
+ IREE_VM_OP_CORE_GlobalStoreIndirectRef = 0x07,
+ IREE_VM_OP_CORE_ConstI32Zero = 0x08,
+ IREE_VM_OP_CORE_ConstI32 = 0x09,
+ IREE_VM_OP_CORE_ConstRefZero = 0x0A,
+ IREE_VM_OP_CORE_ConstRefRodata = 0x0B,
+ IREE_VM_OP_CORE_RSV_0x0C,
+ IREE_VM_OP_CORE_RSV_0x0D,
+ IREE_VM_OP_CORE_RSV_0x0E,
+ IREE_VM_OP_CORE_RSV_0x0F,
+ IREE_VM_OP_CORE_ListAlloc = 0x10,
+ IREE_VM_OP_CORE_ListReserve = 0x11,
+ IREE_VM_OP_CORE_ListSize = 0x12,
+ IREE_VM_OP_CORE_ListResize = 0x13,
+ IREE_VM_OP_CORE_ListGetI32 = 0x14,
+ IREE_VM_OP_CORE_ListSetI32 = 0x15,
+ IREE_VM_OP_CORE_ListGetRef = 0x16,
+ IREE_VM_OP_CORE_ListSetRef = 0x17,
+ IREE_VM_OP_CORE_RSV_0x18,
+ IREE_VM_OP_CORE_RSV_0x19,
+ IREE_VM_OP_CORE_RSV_0x1A,
+ IREE_VM_OP_CORE_RSV_0x1B,
+ IREE_VM_OP_CORE_RSV_0x1C,
+ IREE_VM_OP_CORE_RSV_0x1D,
+ IREE_VM_OP_CORE_SelectI32 = 0x1E,
+ IREE_VM_OP_CORE_SelectRef = 0x1F,
+ IREE_VM_OP_CORE_SwitchI32 = 0x20,
+ IREE_VM_OP_CORE_SwitchRef = 0x21,
+ IREE_VM_OP_CORE_AddI32 = 0x22,
+ IREE_VM_OP_CORE_SubI32 = 0x23,
+ IREE_VM_OP_CORE_MulI32 = 0x24,
+ IREE_VM_OP_CORE_DivI32S = 0x25,
+ IREE_VM_OP_CORE_DivI32U = 0x26,
+ IREE_VM_OP_CORE_RemI32S = 0x27,
+ IREE_VM_OP_CORE_RemI32U = 0x28,
+ IREE_VM_OP_CORE_FMAI32 = 0x29,
+ IREE_VM_OP_CORE_RSV_0x2A,
+ IREE_VM_OP_CORE_RSV_0x2B,
+ IREE_VM_OP_CORE_RSV_0x2C,
+ IREE_VM_OP_CORE_RSV_0x2D,
+ IREE_VM_OP_CORE_RSV_0x2E,
+ IREE_VM_OP_CORE_RSV_0x2F,
+ IREE_VM_OP_CORE_NotI32 = 0x30,
+ IREE_VM_OP_CORE_AndI32 = 0x31,
+ IREE_VM_OP_CORE_OrI32 = 0x32,
+ IREE_VM_OP_CORE_XorI32 = 0x33,
+ IREE_VM_OP_CORE_ShlI32 = 0x34,
+ IREE_VM_OP_CORE_ShrI32S = 0x35,
+ IREE_VM_OP_CORE_ShrI32U = 0x36,
+ IREE_VM_OP_CORE_TruncI32I8 = 0x37,
+ IREE_VM_OP_CORE_TruncI32I16 = 0x38,
+ IREE_VM_OP_CORE_ExtI8I32S = 0x39,
+ IREE_VM_OP_CORE_ExtI8I32U = 0x3A,
+ IREE_VM_OP_CORE_ExtI16I32S = 0x3B,
+ IREE_VM_OP_CORE_ExtI16I32U = 0x3C,
+ IREE_VM_OP_CORE_RSV_0x3D,
+ IREE_VM_OP_CORE_RSV_0x3E,
+ IREE_VM_OP_CORE_RSV_0x3F,
+ IREE_VM_OP_CORE_CmpEQI32 = 0x40,
+ IREE_VM_OP_CORE_CmpNEI32 = 0x41,
+ IREE_VM_OP_CORE_CmpLTI32S = 0x42,
+ IREE_VM_OP_CORE_CmpLTI32U = 0x43,
+ IREE_VM_OP_CORE_RSV_0x44,
+ IREE_VM_OP_CORE_RSV_0x45,
+ IREE_VM_OP_CORE_RSV_0x46,
+ IREE_VM_OP_CORE_RSV_0x47,
+ IREE_VM_OP_CORE_RSV_0x48,
+ IREE_VM_OP_CORE_RSV_0x49,
+ IREE_VM_OP_CORE_CmpEQRef = 0x4A,
+ IREE_VM_OP_CORE_CmpNERef = 0x4B,
+ IREE_VM_OP_CORE_CmpNZRef = 0x4C,
+ IREE_VM_OP_CORE_CmpNZI32 = 0x4D,
+ IREE_VM_OP_CORE_RSV_0x4E,
+ IREE_VM_OP_CORE_RSV_0x4F,
+ IREE_VM_OP_CORE_Branch = 0x50,
+ IREE_VM_OP_CORE_CondBranch = 0x51,
+ IREE_VM_OP_CORE_Call = 0x52,
+ IREE_VM_OP_CORE_CallVariadic = 0x53,
+ IREE_VM_OP_CORE_Return = 0x54,
+ IREE_VM_OP_CORE_Fail = 0x55,
+ IREE_VM_OP_CORE_ImportResolved = 0x56,
+ IREE_VM_OP_CORE_RSV_0x57,
+ IREE_VM_OP_CORE_RSV_0x58,
+ IREE_VM_OP_CORE_RSV_0x59,
+ IREE_VM_OP_CORE_RSV_0x5A,
+ IREE_VM_OP_CORE_RSV_0x5B,
+ IREE_VM_OP_CORE_RSV_0x5C,
+ IREE_VM_OP_CORE_RSV_0x5D,
+ IREE_VM_OP_CORE_RSV_0x5E,
+ IREE_VM_OP_CORE_RSV_0x5F,
+ IREE_VM_OP_CORE_Yield = 0x60,
+ IREE_VM_OP_CORE_RSV_0x61,
+ IREE_VM_OP_CORE_RSV_0x62,
+ IREE_VM_OP_CORE_RSV_0x63,
+ IREE_VM_OP_CORE_RSV_0x64,
+ IREE_VM_OP_CORE_RSV_0x65,
+ IREE_VM_OP_CORE_RSV_0x66,
+ IREE_VM_OP_CORE_RSV_0x67,
+ IREE_VM_OP_CORE_RSV_0x68,
+ IREE_VM_OP_CORE_RSV_0x69,
+ IREE_VM_OP_CORE_RSV_0x6A,
+ IREE_VM_OP_CORE_RSV_0x6B,
+ IREE_VM_OP_CORE_RSV_0x6C,
+ IREE_VM_OP_CORE_RSV_0x6D,
+ IREE_VM_OP_CORE_RSV_0x6E,
+ IREE_VM_OP_CORE_RSV_0x6F,
+ IREE_VM_OP_CORE_RSV_0x70,
+ IREE_VM_OP_CORE_RSV_0x71,
+ IREE_VM_OP_CORE_RSV_0x72,
+ IREE_VM_OP_CORE_RSV_0x73,
+ IREE_VM_OP_CORE_RSV_0x74,
+ IREE_VM_OP_CORE_RSV_0x75,
+ IREE_VM_OP_CORE_RSV_0x76,
+ IREE_VM_OP_CORE_RSV_0x77,
+ IREE_VM_OP_CORE_RSV_0x78,
+ IREE_VM_OP_CORE_RSV_0x79,
+ IREE_VM_OP_CORE_RSV_0x7A,
+ IREE_VM_OP_CORE_RSV_0x7B,
+ IREE_VM_OP_CORE_Trace = 0x7C,
+ IREE_VM_OP_CORE_Print = 0x7D,
+ IREE_VM_OP_CORE_CondBreak = 0x7E,
+ IREE_VM_OP_CORE_Break = 0x7F,
+ IREE_VM_OP_CORE_RSV_0x80,
+ IREE_VM_OP_CORE_RSV_0x81,
+ IREE_VM_OP_CORE_RSV_0x82,
+ IREE_VM_OP_CORE_RSV_0x83,
+ IREE_VM_OP_CORE_RSV_0x84,
+ IREE_VM_OP_CORE_RSV_0x85,
+ IREE_VM_OP_CORE_RSV_0x86,
+ IREE_VM_OP_CORE_RSV_0x87,
+ IREE_VM_OP_CORE_RSV_0x88,
+ IREE_VM_OP_CORE_RSV_0x89,
+ IREE_VM_OP_CORE_RSV_0x8A,
+ IREE_VM_OP_CORE_RSV_0x8B,
+ IREE_VM_OP_CORE_RSV_0x8C,
+ IREE_VM_OP_CORE_RSV_0x8D,
+ IREE_VM_OP_CORE_RSV_0x8E,
+ IREE_VM_OP_CORE_RSV_0x8F,
+ IREE_VM_OP_CORE_RSV_0x90,
+ IREE_VM_OP_CORE_RSV_0x91,
+ IREE_VM_OP_CORE_RSV_0x92,
+ IREE_VM_OP_CORE_RSV_0x93,
+ IREE_VM_OP_CORE_RSV_0x94,
+ IREE_VM_OP_CORE_RSV_0x95,
+ IREE_VM_OP_CORE_RSV_0x96,
+ IREE_VM_OP_CORE_RSV_0x97,
+ IREE_VM_OP_CORE_RSV_0x98,
+ IREE_VM_OP_CORE_RSV_0x99,
+ IREE_VM_OP_CORE_RSV_0x9A,
+ IREE_VM_OP_CORE_RSV_0x9B,
+ IREE_VM_OP_CORE_RSV_0x9C,
+ IREE_VM_OP_CORE_RSV_0x9D,
+ IREE_VM_OP_CORE_RSV_0x9E,
+ IREE_VM_OP_CORE_RSV_0x9F,
+ IREE_VM_OP_CORE_RSV_0xA0,
+ IREE_VM_OP_CORE_RSV_0xA1,
+ IREE_VM_OP_CORE_RSV_0xA2,
+ IREE_VM_OP_CORE_RSV_0xA3,
+ IREE_VM_OP_CORE_RSV_0xA4,
+ IREE_VM_OP_CORE_RSV_0xA5,
+ IREE_VM_OP_CORE_RSV_0xA6,
+ IREE_VM_OP_CORE_RSV_0xA7,
+ IREE_VM_OP_CORE_RSV_0xA8,
+ IREE_VM_OP_CORE_RSV_0xA9,
+ IREE_VM_OP_CORE_RSV_0xAA,
+ IREE_VM_OP_CORE_RSV_0xAB,
+ IREE_VM_OP_CORE_RSV_0xAC,
+ IREE_VM_OP_CORE_RSV_0xAD,
+ IREE_VM_OP_CORE_RSV_0xAE,
+ IREE_VM_OP_CORE_RSV_0xAF,
+ IREE_VM_OP_CORE_BufferLoadI8U = 0xB0,
+ IREE_VM_OP_CORE_BufferLoadI16U = 0xB1,
+ IREE_VM_OP_CORE_RSV_0xB2,
+ IREE_VM_OP_CORE_BufferLoadI32 = 0xB3,
+ IREE_VM_OP_CORE_BufferLoadI8S = 0xB4,
+ IREE_VM_OP_CORE_BufferLoadI16S = 0xB5,
+ IREE_VM_OP_CORE_RSV_0xB6,
+ IREE_VM_OP_CORE_RSV_0xB7,
+ IREE_VM_OP_CORE_BufferStoreI8 = 0xB8,
+ IREE_VM_OP_CORE_BufferStoreI16 = 0xB9,
+ IREE_VM_OP_CORE_RSV_0xBA,
+ IREE_VM_OP_CORE_BufferStoreI32 = 0xBB,
+ IREE_VM_OP_CORE_RSV_0xBC,
+ IREE_VM_OP_CORE_RSV_0xBD,
+ IREE_VM_OP_CORE_RSV_0xBE,
+ IREE_VM_OP_CORE_RSV_0xBF,
+ IREE_VM_OP_CORE_BufferAlloc = 0xC0,
+ IREE_VM_OP_CORE_BufferClone = 0xC1,
+ IREE_VM_OP_CORE_BufferLength = 0xC2,
+ IREE_VM_OP_CORE_RSV_0xC3,
+ IREE_VM_OP_CORE_RSV_0xC4,
+ IREE_VM_OP_CORE_RSV_0xC5,
+ IREE_VM_OP_CORE_BufferCopy = 0xC6,
+ IREE_VM_OP_CORE_BufferCompare = 0xC7,
+ IREE_VM_OP_CORE_RSV_0xC8,
+ IREE_VM_OP_CORE_RSV_0xC9,
+ IREE_VM_OP_CORE_RSV_0xCA,
+ IREE_VM_OP_CORE_RSV_0xCB,
+ IREE_VM_OP_CORE_BufferFillI8 = 0xCC,
+ IREE_VM_OP_CORE_BufferFillI16 = 0xCD,
+ IREE_VM_OP_CORE_RSV_0xCE,
+ IREE_VM_OP_CORE_BufferFillI32 = 0xCF,
+ IREE_VM_OP_CORE_RSV_0xD0,
+ IREE_VM_OP_CORE_RSV_0xD1,
+ IREE_VM_OP_CORE_RSV_0xD2,
+ IREE_VM_OP_CORE_RSV_0xD3,
+ IREE_VM_OP_CORE_RSV_0xD4,
+ IREE_VM_OP_CORE_RSV_0xD5,
+ IREE_VM_OP_CORE_RSV_0xD6,
+ IREE_VM_OP_CORE_RSV_0xD7,
+ IREE_VM_OP_CORE_RSV_0xD8,
+ IREE_VM_OP_CORE_RSV_0xD9,
+ IREE_VM_OP_CORE_RSV_0xDA,
+ IREE_VM_OP_CORE_RSV_0xDB,
+ IREE_VM_OP_CORE_RSV_0xDC,
+ IREE_VM_OP_CORE_RSV_0xDD,
+ IREE_VM_OP_CORE_RSV_0xDE,
+ IREE_VM_OP_CORE_RSV_0xDF,
+ IREE_VM_OP_CORE_PrefixExtI64 = 0xE0,
+ IREE_VM_OP_CORE_PrefixExtF32 = 0xE1,
+ IREE_VM_OP_CORE_PrefixExtF64 = 0xE2,
+ IREE_VM_OP_CORE_RSV_0xE3,
+ IREE_VM_OP_CORE_RSV_0xE4,
+ IREE_VM_OP_CORE_RSV_0xE5,
+ IREE_VM_OP_CORE_RSV_0xE6,
+ IREE_VM_OP_CORE_RSV_0xE7,
+ IREE_VM_OP_CORE_RSV_0xE8,
+ IREE_VM_OP_CORE_RSV_0xE9,
+ IREE_VM_OP_CORE_RSV_0xEA,
+ IREE_VM_OP_CORE_RSV_0xEB,
+ IREE_VM_OP_CORE_RSV_0xEC,
+ IREE_VM_OP_CORE_RSV_0xED,
+ IREE_VM_OP_CORE_RSV_0xEE,
+ IREE_VM_OP_CORE_RSV_0xEF,
+ IREE_VM_OP_CORE_RSV_0xF0,
+ IREE_VM_OP_CORE_RSV_0xF1,
+ IREE_VM_OP_CORE_RSV_0xF2,
+ IREE_VM_OP_CORE_RSV_0xF3,
+ IREE_VM_OP_CORE_RSV_0xF4,
+ IREE_VM_OP_CORE_RSV_0xF5,
+ IREE_VM_OP_CORE_RSV_0xF6,
+ IREE_VM_OP_CORE_RSV_0xF7,
+ IREE_VM_OP_CORE_RSV_0xF8,
+ IREE_VM_OP_CORE_RSV_0xF9,
+ IREE_VM_OP_CORE_RSV_0xFA,
+ IREE_VM_OP_CORE_RSV_0xFB,
+ IREE_VM_OP_CORE_RSV_0xFC,
+ IREE_VM_OP_CORE_RSV_0xFD,
+ IREE_VM_OP_CORE_RSV_0xFE,
+ IREE_VM_OP_CORE_RSV_0xFF,
+} iree_vm_core_op_t;
+
+#define IREE_VM_OP_CORE_TABLE(OPC, RSV) \
+ OPC(0x00, GlobalLoadI32) \
+ OPC(0x01, GlobalStoreI32) \
+ OPC(0x02, GlobalLoadIndirectI32) \
+ OPC(0x03, GlobalStoreIndirectI32) \
+ OPC(0x04, GlobalLoadRef) \
+ OPC(0x05, GlobalStoreRef) \
+ OPC(0x06, GlobalLoadIndirectRef) \
+ OPC(0x07, GlobalStoreIndirectRef) \
+ OPC(0x08, ConstI32Zero) \
+ OPC(0x09, ConstI32) \
+ OPC(0x0A, ConstRefZero) \
+ OPC(0x0B, ConstRefRodata) \
+ RSV(0x0C) \
+ RSV(0x0D) \
+ RSV(0x0E) \
+ RSV(0x0F) \
+ OPC(0x10, ListAlloc) \
+ OPC(0x11, ListReserve) \
+ OPC(0x12, ListSize) \
+ OPC(0x13, ListResize) \
+ OPC(0x14, ListGetI32) \
+ OPC(0x15, ListSetI32) \
+ OPC(0x16, ListGetRef) \
+ OPC(0x17, ListSetRef) \
+ RSV(0x18) \
+ RSV(0x19) \
+ RSV(0x1A) \
+ RSV(0x1B) \
+ RSV(0x1C) \
+ RSV(0x1D) \
+ OPC(0x1E, SelectI32) \
+ OPC(0x1F, SelectRef) \
+ OPC(0x20, SwitchI32) \
+ OPC(0x21, SwitchRef) \
+ OPC(0x22, AddI32) \
+ OPC(0x23, SubI32) \
+ OPC(0x24, MulI32) \
+ OPC(0x25, DivI32S) \
+ OPC(0x26, DivI32U) \
+ OPC(0x27, RemI32S) \
+ OPC(0x28, RemI32U) \
+ OPC(0x29, FMAI32) \
+ RSV(0x2A) \
+ RSV(0x2B) \
+ RSV(0x2C) \
+ RSV(0x2D) \
+ RSV(0x2E) \
+ RSV(0x2F) \
+ OPC(0x30, NotI32) \
+ OPC(0x31, AndI32) \
+ OPC(0x32, OrI32) \
+ OPC(0x33, XorI32) \
+ OPC(0x34, ShlI32) \
+ OPC(0x35, ShrI32S) \
+ OPC(0x36, ShrI32U) \
+ OPC(0x37, TruncI32I8) \
+ OPC(0x38, TruncI32I16) \
+ OPC(0x39, ExtI8I32S) \
+ OPC(0x3A, ExtI8I32U) \
+ OPC(0x3B, ExtI16I32S) \
+ OPC(0x3C, ExtI16I32U) \
+ RSV(0x3D) \
+ RSV(0x3E) \
+ RSV(0x3F) \
+ OPC(0x40, CmpEQI32) \
+ OPC(0x41, CmpNEI32) \
+ OPC(0x42, CmpLTI32S) \
+ OPC(0x43, CmpLTI32U) \
+ RSV(0x44) \
+ RSV(0x45) \
+ RSV(0x46) \
+ RSV(0x47) \
+ RSV(0x48) \
+ RSV(0x49) \
+ OPC(0x4A, CmpEQRef) \
+ OPC(0x4B, CmpNERef) \
+ OPC(0x4C, CmpNZRef) \
+ OPC(0x4D, CmpNZI32) \
+ RSV(0x4E) \
+ RSV(0x4F) \
+ OPC(0x50, Branch) \
+ OPC(0x51, CondBranch) \
+ OPC(0x52, Call) \
+ OPC(0x53, CallVariadic) \
+ OPC(0x54, Return) \
+ OPC(0x55, Fail) \
+ OPC(0x56, ImportResolved) \
+ RSV(0x57) \
+ RSV(0x58) \
+ RSV(0x59) \
+ RSV(0x5A) \
+ RSV(0x5B) \
+ RSV(0x5C) \
+ RSV(0x5D) \
+ RSV(0x5E) \
+ RSV(0x5F) \
+ OPC(0x60, Yield) \
+ RSV(0x61) \
+ RSV(0x62) \
+ RSV(0x63) \
+ RSV(0x64) \
+ RSV(0x65) \
+ RSV(0x66) \
+ RSV(0x67) \
+ RSV(0x68) \
+ RSV(0x69) \
+ RSV(0x6A) \
+ RSV(0x6B) \
+ RSV(0x6C) \
+ RSV(0x6D) \
+ RSV(0x6E) \
+ RSV(0x6F) \
+ RSV(0x70) \
+ RSV(0x71) \
+ RSV(0x72) \
+ RSV(0x73) \
+ RSV(0x74) \
+ RSV(0x75) \
+ RSV(0x76) \
+ RSV(0x77) \
+ RSV(0x78) \
+ RSV(0x79) \
+ RSV(0x7A) \
+ RSV(0x7B) \
+ OPC(0x7C, Trace) \
+ OPC(0x7D, Print) \
+ OPC(0x7E, CondBreak) \
+ OPC(0x7F, Break) \
+ RSV(0x80) \
+ RSV(0x81) \
+ RSV(0x82) \
+ RSV(0x83) \
+ RSV(0x84) \
+ RSV(0x85) \
+ RSV(0x86) \
+ RSV(0x87) \
+ RSV(0x88) \
+ RSV(0x89) \
+ RSV(0x8A) \
+ RSV(0x8B) \
+ RSV(0x8C) \
+ RSV(0x8D) \
+ RSV(0x8E) \
+ RSV(0x8F) \
+ RSV(0x90) \
+ RSV(0x91) \
+ RSV(0x92) \
+ RSV(0x93) \
+ RSV(0x94) \
+ RSV(0x95) \
+ RSV(0x96) \
+ RSV(0x97) \
+ RSV(0x98) \
+ RSV(0x99) \
+ RSV(0x9A) \
+ RSV(0x9B) \
+ RSV(0x9C) \
+ RSV(0x9D) \
+ RSV(0x9E) \
+ RSV(0x9F) \
+ RSV(0xA0) \
+ RSV(0xA1) \
+ RSV(0xA2) \
+ RSV(0xA3) \
+ RSV(0xA4) \
+ RSV(0xA5) \
+ RSV(0xA6) \
+ RSV(0xA7) \
+ RSV(0xA8) \
+ RSV(0xA9) \
+ RSV(0xAA) \
+ RSV(0xAB) \
+ RSV(0xAC) \
+ RSV(0xAD) \
+ RSV(0xAE) \
+ RSV(0xAF) \
+ OPC(0xB0, BufferLoadI8U) \
+ OPC(0xB1, BufferLoadI16U) \
+ RSV(0xB2) \
+ OPC(0xB3, BufferLoadI32) \
+ OPC(0xB4, BufferLoadI8S) \
+ OPC(0xB5, BufferLoadI16S) \
+ RSV(0xB6) \
+ RSV(0xB7) \
+ OPC(0xB8, BufferStoreI8) \
+ OPC(0xB9, BufferStoreI16) \
+ RSV(0xBA) \
+ OPC(0xBB, BufferStoreI32) \
+ RSV(0xBC) \
+ RSV(0xBD) \
+ RSV(0xBE) \
+ RSV(0xBF) \
+ OPC(0xC0, BufferAlloc) \
+ OPC(0xC1, BufferClone) \
+ OPC(0xC2, BufferLength) \
+ RSV(0xC3) \
+ RSV(0xC4) \
+ RSV(0xC5) \
+ OPC(0xC6, BufferCopy) \
+ OPC(0xC7, BufferCompare) \
+ RSV(0xC8) \
+ RSV(0xC9) \
+ RSV(0xCA) \
+ RSV(0xCB) \
+ OPC(0xCC, BufferFillI8) \
+ OPC(0xCD, BufferFillI16) \
+ RSV(0xCE) \
+ OPC(0xCF, BufferFillI32) \
+ RSV(0xD0) \
+ RSV(0xD1) \
+ RSV(0xD2) \
+ RSV(0xD3) \
+ RSV(0xD4) \
+ RSV(0xD5) \
+ RSV(0xD6) \
+ RSV(0xD7) \
+ RSV(0xD8) \
+ RSV(0xD9) \
+ RSV(0xDA) \
+ RSV(0xDB) \
+ RSV(0xDC) \
+ RSV(0xDD) \
+ RSV(0xDE) \
+ RSV(0xDF) \
+ OPC(0xE0, PrefixExtI64) \
+ OPC(0xE1, PrefixExtF32) \
+ OPC(0xE2, PrefixExtF64) \
+ RSV(0xE3) \
+ RSV(0xE4) \
+ RSV(0xE5) \
+ RSV(0xE6) \
+ RSV(0xE7) \
+ RSV(0xE8) \
+ RSV(0xE9) \
+ RSV(0xEA) \
+ RSV(0xEB) \
+ RSV(0xEC) \
+ RSV(0xED) \
+ RSV(0xEE) \
+ RSV(0xEF) \
+ RSV(0xF0) \
+ RSV(0xF1) \
+ RSV(0xF2) \
+ RSV(0xF3) \
+ RSV(0xF4) \
+ RSV(0xF5) \
+ RSV(0xF6) \
+ RSV(0xF7) \
+ RSV(0xF8) \
+ RSV(0xF9) \
+ RSV(0xFA) \
+ RSV(0xFB) \
+ RSV(0xFC) \
+ RSV(0xFD) \
+ RSV(0xFE) \
+ RSV(0xFF)
+
+typedef enum {
+ IREE_VM_OP_EXT_F32_GlobalLoadF32 = 0x00,
+ IREE_VM_OP_EXT_F32_GlobalStoreF32 = 0x01,
+ IREE_VM_OP_EXT_F32_GlobalLoadIndirectF32 = 0x02,
+ IREE_VM_OP_EXT_F32_GlobalStoreIndirectF32 = 0x03,
+ IREE_VM_OP_EXT_F32_RSV_0x04,
+ IREE_VM_OP_EXT_F32_RSV_0x05,
+ IREE_VM_OP_EXT_F32_RSV_0x06,
+ IREE_VM_OP_EXT_F32_RSV_0x07,
+ IREE_VM_OP_EXT_F32_ConstF32Zero = 0x08,
+ IREE_VM_OP_EXT_F32_ConstF32 = 0x09,
+ IREE_VM_OP_EXT_F32_RSV_0x0A,
+ IREE_VM_OP_EXT_F32_RSV_0x0B,
+ IREE_VM_OP_EXT_F32_RSV_0x0C,
+ IREE_VM_OP_EXT_F32_RSV_0x0D,
+ IREE_VM_OP_EXT_F32_RSV_0x0E,
+ IREE_VM_OP_EXT_F32_RSV_0x0F,
+ IREE_VM_OP_EXT_F32_RSV_0x10,
+ IREE_VM_OP_EXT_F32_RSV_0x11,
+ IREE_VM_OP_EXT_F32_RSV_0x12,
+ IREE_VM_OP_EXT_F32_RSV_0x13,
+ IREE_VM_OP_EXT_F32_ListGetF32 = 0x14,
+ IREE_VM_OP_EXT_F32_ListSetF32 = 0x15,
+ IREE_VM_OP_EXT_F32_RSV_0x16,
+ IREE_VM_OP_EXT_F32_RSV_0x17,
+ IREE_VM_OP_EXT_F32_RSV_0x18,
+ IREE_VM_OP_EXT_F32_RSV_0x19,
+ IREE_VM_OP_EXT_F32_RSV_0x1A,
+ IREE_VM_OP_EXT_F32_RSV_0x1B,
+ IREE_VM_OP_EXT_F32_RSV_0x1C,
+ IREE_VM_OP_EXT_F32_RSV_0x1D,
+ IREE_VM_OP_EXT_F32_SelectF32 = 0x1E,
+ IREE_VM_OP_EXT_F32_RSV_0x1F,
+ IREE_VM_OP_EXT_F32_SwitchF32 = 0x20,
+ IREE_VM_OP_EXT_F32_RSV_0x21,
+ IREE_VM_OP_EXT_F32_AddF32 = 0x22,
+ IREE_VM_OP_EXT_F32_SubF32 = 0x23,
+ IREE_VM_OP_EXT_F32_MulF32 = 0x24,
+ IREE_VM_OP_EXT_F32_DivF32 = 0x25,
+ IREE_VM_OP_EXT_F32_RemF32 = 0x26,
+ IREE_VM_OP_EXT_F32_FMAF32 = 0x27,
+ IREE_VM_OP_EXT_F32_AbsF32 = 0x28,
+ IREE_VM_OP_EXT_F32_NegF32 = 0x29,
+ IREE_VM_OP_EXT_F32_CeilF32 = 0x2A,
+ IREE_VM_OP_EXT_F32_FloorF32 = 0x2B,
+ IREE_VM_OP_EXT_F32_RSV_0x2C,
+ IREE_VM_OP_EXT_F32_RSV_0x2D,
+ IREE_VM_OP_EXT_F32_RSV_0x2E,
+ IREE_VM_OP_EXT_F32_RSV_0x2F,
+ IREE_VM_OP_EXT_F32_CastSI32F32 = 0x30,
+ IREE_VM_OP_EXT_F32_CastUI32F32 = 0x31,
+ IREE_VM_OP_EXT_F32_CastF32SI32 = 0x32,
+ IREE_VM_OP_EXT_F32_CastF32UI32 = 0x33,
+ IREE_VM_OP_EXT_F32_BitcastI32F32 = 0x34,
+ IREE_VM_OP_EXT_F32_BitcastF32I32 = 0x35,
+ IREE_VM_OP_EXT_F32_RSV_0x36,
+ IREE_VM_OP_EXT_F32_RSV_0x37,
+ IREE_VM_OP_EXT_F32_RSV_0x38,
+ IREE_VM_OP_EXT_F32_RSV_0x39,
+ IREE_VM_OP_EXT_F32_RSV_0x3A,
+ IREE_VM_OP_EXT_F32_RSV_0x3B,
+ IREE_VM_OP_EXT_F32_RSV_0x3C,
+ IREE_VM_OP_EXT_F32_RSV_0x3D,
+ IREE_VM_OP_EXT_F32_RSV_0x3E,
+ IREE_VM_OP_EXT_F32_RSV_0x3F,
+ IREE_VM_OP_EXT_F32_AtanF32 = 0x40,
+ IREE_VM_OP_EXT_F32_Atan2F32 = 0x41,
+ IREE_VM_OP_EXT_F32_CosF32 = 0x42,
+ IREE_VM_OP_EXT_F32_SinF32 = 0x43,
+ IREE_VM_OP_EXT_F32_ExpF32 = 0x44,
+ IREE_VM_OP_EXT_F32_Exp2F32 = 0x45,
+ IREE_VM_OP_EXT_F32_ExpM1F32 = 0x46,
+ IREE_VM_OP_EXT_F32_LogF32 = 0x47,
+ IREE_VM_OP_EXT_F32_Log10F32 = 0x48,
+ IREE_VM_OP_EXT_F32_Log1pF32 = 0x49,
+ IREE_VM_OP_EXT_F32_Log2F32 = 0x4A,
+ IREE_VM_OP_EXT_F32_PowF32 = 0x4B,
+ IREE_VM_OP_EXT_F32_RsqrtF32 = 0x4C,
+ IREE_VM_OP_EXT_F32_SqrtF32 = 0x4D,
+ IREE_VM_OP_EXT_F32_TanhF32 = 0x4E,
+ IREE_VM_OP_EXT_F32_ErfF32 = 0x4F,
+ IREE_VM_OP_EXT_F32_RSV_0x50,
+ IREE_VM_OP_EXT_F32_RSV_0x51,
+ IREE_VM_OP_EXT_F32_RSV_0x52,
+ IREE_VM_OP_EXT_F32_RSV_0x53,
+ IREE_VM_OP_EXT_F32_RSV_0x54,
+ IREE_VM_OP_EXT_F32_RSV_0x55,
+ IREE_VM_OP_EXT_F32_RSV_0x56,
+ IREE_VM_OP_EXT_F32_RSV_0x57,
+ IREE_VM_OP_EXT_F32_RSV_0x58,
+ IREE_VM_OP_EXT_F32_RSV_0x59,
+ IREE_VM_OP_EXT_F32_RSV_0x5A,
+ IREE_VM_OP_EXT_F32_RSV_0x5B,
+ IREE_VM_OP_EXT_F32_RSV_0x5C,
+ IREE_VM_OP_EXT_F32_RSV_0x5D,
+ IREE_VM_OP_EXT_F32_RSV_0x5E,
+ IREE_VM_OP_EXT_F32_RSV_0x5F,
+ IREE_VM_OP_EXT_F32_CmpEQF32O = 0x60,
+ IREE_VM_OP_EXT_F32_CmpEQF32U = 0x61,
+ IREE_VM_OP_EXT_F32_CmpNEF32O = 0x62,
+ IREE_VM_OP_EXT_F32_CmpNEF32U = 0x63,
+ IREE_VM_OP_EXT_F32_CmpLTF32O = 0x64,
+ IREE_VM_OP_EXT_F32_CmpLTF32U = 0x65,
+ IREE_VM_OP_EXT_F32_CmpLTEF32O = 0x66,
+ IREE_VM_OP_EXT_F32_CmpLTEF32U = 0x67,
+ IREE_VM_OP_EXT_F32_RSV_0x68,
+ IREE_VM_OP_EXT_F32_RSV_0x69,
+ IREE_VM_OP_EXT_F32_RSV_0x6A,
+ IREE_VM_OP_EXT_F32_RSV_0x6B,
+ IREE_VM_OP_EXT_F32_RSV_0x6C,
+ IREE_VM_OP_EXT_F32_RSV_0x6D,
+ IREE_VM_OP_EXT_F32_RSV_0x6E,
+ IREE_VM_OP_EXT_F32_RSV_0x6F,
+ IREE_VM_OP_EXT_F32_CmpNaNF32 = 0x70,
+ IREE_VM_OP_EXT_F32_RSV_0x71,
+ IREE_VM_OP_EXT_F32_RSV_0x72,
+ IREE_VM_OP_EXT_F32_RSV_0x73,
+ IREE_VM_OP_EXT_F32_RSV_0x74,
+ IREE_VM_OP_EXT_F32_RSV_0x75,
+ IREE_VM_OP_EXT_F32_RSV_0x76,
+ IREE_VM_OP_EXT_F32_RSV_0x77,
+ IREE_VM_OP_EXT_F32_RSV_0x78,
+ IREE_VM_OP_EXT_F32_RSV_0x79,
+ IREE_VM_OP_EXT_F32_RSV_0x7A,
+ IREE_VM_OP_EXT_F32_RSV_0x7B,
+ IREE_VM_OP_EXT_F32_RSV_0x7C,
+ IREE_VM_OP_EXT_F32_RSV_0x7D,
+ IREE_VM_OP_EXT_F32_RSV_0x7E,
+ IREE_VM_OP_EXT_F32_RSV_0x7F,
+ IREE_VM_OP_EXT_F32_RSV_0x80,
+ IREE_VM_OP_EXT_F32_RSV_0x81,
+ IREE_VM_OP_EXT_F32_RSV_0x82,
+ IREE_VM_OP_EXT_F32_RSV_0x83,
+ IREE_VM_OP_EXT_F32_RSV_0x84,
+ IREE_VM_OP_EXT_F32_RSV_0x85,
+ IREE_VM_OP_EXT_F32_RSV_0x86,
+ IREE_VM_OP_EXT_F32_RSV_0x87,
+ IREE_VM_OP_EXT_F32_RSV_0x88,
+ IREE_VM_OP_EXT_F32_RSV_0x89,
+ IREE_VM_OP_EXT_F32_RSV_0x8A,
+ IREE_VM_OP_EXT_F32_RSV_0x8B,
+ IREE_VM_OP_EXT_F32_RSV_0x8C,
+ IREE_VM_OP_EXT_F32_RSV_0x8D,
+ IREE_VM_OP_EXT_F32_RSV_0x8E,
+ IREE_VM_OP_EXT_F32_RSV_0x8F,
+ IREE_VM_OP_EXT_F32_RSV_0x90,
+ IREE_VM_OP_EXT_F32_RSV_0x91,
+ IREE_VM_OP_EXT_F32_RSV_0x92,
+ IREE_VM_OP_EXT_F32_RSV_0x93,
+ IREE_VM_OP_EXT_F32_RSV_0x94,
+ IREE_VM_OP_EXT_F32_RSV_0x95,
+ IREE_VM_OP_EXT_F32_RSV_0x96,
+ IREE_VM_OP_EXT_F32_RSV_0x97,
+ IREE_VM_OP_EXT_F32_RSV_0x98,
+ IREE_VM_OP_EXT_F32_RSV_0x99,
+ IREE_VM_OP_EXT_F32_RSV_0x9A,
+ IREE_VM_OP_EXT_F32_RSV_0x9B,
+ IREE_VM_OP_EXT_F32_RSV_0x9C,
+ IREE_VM_OP_EXT_F32_RSV_0x9D,
+ IREE_VM_OP_EXT_F32_RSV_0x9E,
+ IREE_VM_OP_EXT_F32_RSV_0x9F,
+ IREE_VM_OP_EXT_F32_RSV_0xA0,
+ IREE_VM_OP_EXT_F32_RSV_0xA1,
+ IREE_VM_OP_EXT_F32_RSV_0xA2,
+ IREE_VM_OP_EXT_F32_RSV_0xA3,
+ IREE_VM_OP_EXT_F32_RSV_0xA4,
+ IREE_VM_OP_EXT_F32_RSV_0xA5,
+ IREE_VM_OP_EXT_F32_RSV_0xA6,
+ IREE_VM_OP_EXT_F32_RSV_0xA7,
+ IREE_VM_OP_EXT_F32_RSV_0xA8,
+ IREE_VM_OP_EXT_F32_RSV_0xA9,
+ IREE_VM_OP_EXT_F32_RSV_0xAA,
+ IREE_VM_OP_EXT_F32_RSV_0xAB,
+ IREE_VM_OP_EXT_F32_RSV_0xAC,
+ IREE_VM_OP_EXT_F32_RSV_0xAD,
+ IREE_VM_OP_EXT_F32_RSV_0xAE,
+ IREE_VM_OP_EXT_F32_RSV_0xAF,
+ IREE_VM_OP_EXT_F32_BufferLoadF32 = 0xB0,
+ IREE_VM_OP_EXT_F32_BufferStoreF32 = 0xB1,
+ IREE_VM_OP_EXT_F32_RSV_0xB2,
+ IREE_VM_OP_EXT_F32_RSV_0xB3,
+ IREE_VM_OP_EXT_F32_RSV_0xB4,
+ IREE_VM_OP_EXT_F32_RSV_0xB5,
+ IREE_VM_OP_EXT_F32_RSV_0xB6,
+ IREE_VM_OP_EXT_F32_RSV_0xB7,
+ IREE_VM_OP_EXT_F32_RSV_0xB8,
+ IREE_VM_OP_EXT_F32_RSV_0xB9,
+ IREE_VM_OP_EXT_F32_RSV_0xBA,
+ IREE_VM_OP_EXT_F32_RSV_0xBB,
+ IREE_VM_OP_EXT_F32_RSV_0xBC,
+ IREE_VM_OP_EXT_F32_RSV_0xBD,
+ IREE_VM_OP_EXT_F32_RSV_0xBE,
+ IREE_VM_OP_EXT_F32_RSV_0xBF,
+ IREE_VM_OP_EXT_F32_BufferFillF32 = 0xC0,
+ IREE_VM_OP_EXT_F32_RSV_0xC1,
+ IREE_VM_OP_EXT_F32_RSV_0xC2,
+ IREE_VM_OP_EXT_F32_RSV_0xC3,
+ IREE_VM_OP_EXT_F32_RSV_0xC4,
+ IREE_VM_OP_EXT_F32_RSV_0xC5,
+ IREE_VM_OP_EXT_F32_RSV_0xC6,
+ IREE_VM_OP_EXT_F32_RSV_0xC7,
+ IREE_VM_OP_EXT_F32_RSV_0xC8,
+ IREE_VM_OP_EXT_F32_RSV_0xC9,
+ IREE_VM_OP_EXT_F32_RSV_0xCA,
+ IREE_VM_OP_EXT_F32_RSV_0xCB,
+ IREE_VM_OP_EXT_F32_RSV_0xCC,
+ IREE_VM_OP_EXT_F32_RSV_0xCD,
+ IREE_VM_OP_EXT_F32_RSV_0xCE,
+ IREE_VM_OP_EXT_F32_RSV_0xCF,
+ IREE_VM_OP_EXT_F32_RSV_0xD0,
+ IREE_VM_OP_EXT_F32_RSV_0xD1,
+ IREE_VM_OP_EXT_F32_RSV_0xD2,
+ IREE_VM_OP_EXT_F32_RSV_0xD3,
+ IREE_VM_OP_EXT_F32_RSV_0xD4,
+ IREE_VM_OP_EXT_F32_RSV_0xD5,
+ IREE_VM_OP_EXT_F32_RSV_0xD6,
+ IREE_VM_OP_EXT_F32_RSV_0xD7,
+ IREE_VM_OP_EXT_F32_RSV_0xD8,
+ IREE_VM_OP_EXT_F32_RSV_0xD9,
+ IREE_VM_OP_EXT_F32_RSV_0xDA,
+ IREE_VM_OP_EXT_F32_RSV_0xDB,
+ IREE_VM_OP_EXT_F32_RSV_0xDC,
+ IREE_VM_OP_EXT_F32_RSV_0xDD,
+ IREE_VM_OP_EXT_F32_RSV_0xDE,
+ IREE_VM_OP_EXT_F32_RSV_0xDF,
+ IREE_VM_OP_EXT_F32_RSV_0xE0,
+ IREE_VM_OP_EXT_F32_RSV_0xE1,
+ IREE_VM_OP_EXT_F32_RSV_0xE2,
+ IREE_VM_OP_EXT_F32_RSV_0xE3,
+ IREE_VM_OP_EXT_F32_RSV_0xE4,
+ IREE_VM_OP_EXT_F32_RSV_0xE5,
+ IREE_VM_OP_EXT_F32_RSV_0xE6,
+ IREE_VM_OP_EXT_F32_RSV_0xE7,
+ IREE_VM_OP_EXT_F32_RSV_0xE8,
+ IREE_VM_OP_EXT_F32_RSV_0xE9,
+ IREE_VM_OP_EXT_F32_RSV_0xEA,
+ IREE_VM_OP_EXT_F32_RSV_0xEB,
+ IREE_VM_OP_EXT_F32_RSV_0xEC,
+ IREE_VM_OP_EXT_F32_RSV_0xED,
+ IREE_VM_OP_EXT_F32_RSV_0xEE,
+ IREE_VM_OP_EXT_F32_RSV_0xEF,
+ IREE_VM_OP_EXT_F32_RSV_0xF0,
+ IREE_VM_OP_EXT_F32_RSV_0xF1,
+ IREE_VM_OP_EXT_F32_RSV_0xF2,
+ IREE_VM_OP_EXT_F32_RSV_0xF3,
+ IREE_VM_OP_EXT_F32_RSV_0xF4,
+ IREE_VM_OP_EXT_F32_RSV_0xF5,
+ IREE_VM_OP_EXT_F32_RSV_0xF6,
+ IREE_VM_OP_EXT_F32_RSV_0xF7,
+ IREE_VM_OP_EXT_F32_RSV_0xF8,
+ IREE_VM_OP_EXT_F32_RSV_0xF9,
+ IREE_VM_OP_EXT_F32_RSV_0xFA,
+ IREE_VM_OP_EXT_F32_RSV_0xFB,
+ IREE_VM_OP_EXT_F32_RSV_0xFC,
+ IREE_VM_OP_EXT_F32_RSV_0xFD,
+ IREE_VM_OP_EXT_F32_RSV_0xFE,
+ IREE_VM_OP_EXT_F32_RSV_0xFF,
+} iree_vm_ext_f32_op_t;
+
+#define IREE_VM_OP_EXT_F32_TABLE(OPC, RSV) \
+ OPC(0x00, GlobalLoadF32) \
+ OPC(0x01, GlobalStoreF32) \
+ OPC(0x02, GlobalLoadIndirectF32) \
+ OPC(0x03, GlobalStoreIndirectF32) \
+ RSV(0x04) \
+ RSV(0x05) \
+ RSV(0x06) \
+ RSV(0x07) \
+ OPC(0x08, ConstF32Zero) \
+ OPC(0x09, ConstF32) \
+ RSV(0x0A) \
+ RSV(0x0B) \
+ RSV(0x0C) \
+ RSV(0x0D) \
+ RSV(0x0E) \
+ RSV(0x0F) \
+ RSV(0x10) \
+ RSV(0x11) \
+ RSV(0x12) \
+ RSV(0x13) \
+ OPC(0x14, ListGetF32) \
+ OPC(0x15, ListSetF32) \
+ RSV(0x16) \
+ RSV(0x17) \
+ RSV(0x18) \
+ RSV(0x19) \
+ RSV(0x1A) \
+ RSV(0x1B) \
+ RSV(0x1C) \
+ RSV(0x1D) \
+ OPC(0x1E, SelectF32) \
+ RSV(0x1F) \
+ OPC(0x20, SwitchF32) \
+ RSV(0x21) \
+ OPC(0x22, AddF32) \
+ OPC(0x23, SubF32) \
+ OPC(0x24, MulF32) \
+ OPC(0x25, DivF32) \
+ OPC(0x26, RemF32) \
+ OPC(0x27, FMAF32) \
+ OPC(0x28, AbsF32) \
+ OPC(0x29, NegF32) \
+ OPC(0x2A, CeilF32) \
+ OPC(0x2B, FloorF32) \
+ RSV(0x2C) \
+ RSV(0x2D) \
+ RSV(0x2E) \
+ RSV(0x2F) \
+ OPC(0x30, CastSI32F32) \
+ OPC(0x31, CastUI32F32) \
+ OPC(0x32, CastF32SI32) \
+ OPC(0x33, CastF32UI32) \
+ OPC(0x34, BitcastI32F32) \
+ OPC(0x35, BitcastF32I32) \
+ RSV(0x36) \
+ RSV(0x37) \
+ RSV(0x38) \
+ RSV(0x39) \
+ RSV(0x3A) \
+ RSV(0x3B) \
+ RSV(0x3C) \
+ RSV(0x3D) \
+ RSV(0x3E) \
+ RSV(0x3F) \
+ OPC(0x40, AtanF32) \
+ OPC(0x41, Atan2F32) \
+ OPC(0x42, CosF32) \
+ OPC(0x43, SinF32) \
+ OPC(0x44, ExpF32) \
+ OPC(0x45, Exp2F32) \
+ OPC(0x46, ExpM1F32) \
+ OPC(0x47, LogF32) \
+ OPC(0x48, Log10F32) \
+ OPC(0x49, Log1pF32) \
+ OPC(0x4A, Log2F32) \
+ OPC(0x4B, PowF32) \
+ OPC(0x4C, RsqrtF32) \
+ OPC(0x4D, SqrtF32) \
+ OPC(0x4E, TanhF32) \
+ OPC(0x4F, ErfF32) \
+ RSV(0x50) \
+ RSV(0x51) \
+ RSV(0x52) \
+ RSV(0x53) \
+ RSV(0x54) \
+ RSV(0x55) \
+ RSV(0x56) \
+ RSV(0x57) \
+ RSV(0x58) \
+ RSV(0x59) \
+ RSV(0x5A) \
+ RSV(0x5B) \
+ RSV(0x5C) \
+ RSV(0x5D) \
+ RSV(0x5E) \
+ RSV(0x5F) \
+ OPC(0x60, CmpEQF32O) \
+ OPC(0x61, CmpEQF32U) \
+ OPC(0x62, CmpNEF32O) \
+ OPC(0x63, CmpNEF32U) \
+ OPC(0x64, CmpLTF32O) \
+ OPC(0x65, CmpLTF32U) \
+ OPC(0x66, CmpLTEF32O) \
+ OPC(0x67, CmpLTEF32U) \
+ RSV(0x68) \
+ RSV(0x69) \
+ RSV(0x6A) \
+ RSV(0x6B) \
+ RSV(0x6C) \
+ RSV(0x6D) \
+ RSV(0x6E) \
+ RSV(0x6F) \
+ OPC(0x70, CmpNaNF32) \
+ RSV(0x71) \
+ RSV(0x72) \
+ RSV(0x73) \
+ RSV(0x74) \
+ RSV(0x75) \
+ RSV(0x76) \
+ RSV(0x77) \
+ RSV(0x78) \
+ RSV(0x79) \
+ RSV(0x7A) \
+ RSV(0x7B) \
+ RSV(0x7C) \
+ RSV(0x7D) \
+ RSV(0x7E) \
+ RSV(0x7F) \
+ RSV(0x80) \
+ RSV(0x81) \
+ RSV(0x82) \
+ RSV(0x83) \
+ RSV(0x84) \
+ RSV(0x85) \
+ RSV(0x86) \
+ RSV(0x87) \
+ RSV(0x88) \
+ RSV(0x89) \
+ RSV(0x8A) \
+ RSV(0x8B) \
+ RSV(0x8C) \
+ RSV(0x8D) \
+ RSV(0x8E) \
+ RSV(0x8F) \
+ RSV(0x90) \
+ RSV(0x91) \
+ RSV(0x92) \
+ RSV(0x93) \
+ RSV(0x94) \
+ RSV(0x95) \
+ RSV(0x96) \
+ RSV(0x97) \
+ RSV(0x98) \
+ RSV(0x99) \
+ RSV(0x9A) \
+ RSV(0x9B) \
+ RSV(0x9C) \
+ RSV(0x9D) \
+ RSV(0x9E) \
+ RSV(0x9F) \
+ RSV(0xA0) \
+ RSV(0xA1) \
+ RSV(0xA2) \
+ RSV(0xA3) \
+ RSV(0xA4) \
+ RSV(0xA5) \
+ RSV(0xA6) \
+ RSV(0xA7) \
+ RSV(0xA8) \
+ RSV(0xA9) \
+ RSV(0xAA) \
+ RSV(0xAB) \
+ RSV(0xAC) \
+ RSV(0xAD) \
+ RSV(0xAE) \
+ RSV(0xAF) \
+ OPC(0xB0, BufferLoadF32) \
+ OPC(0xB1, BufferStoreF32) \
+ RSV(0xB2) \
+ RSV(0xB3) \
+ RSV(0xB4) \
+ RSV(0xB5) \
+ RSV(0xB6) \
+ RSV(0xB7) \
+ RSV(0xB8) \
+ RSV(0xB9) \
+ RSV(0xBA) \
+ RSV(0xBB) \
+ RSV(0xBC) \
+ RSV(0xBD) \
+ RSV(0xBE) \
+ RSV(0xBF) \
+ OPC(0xC0, BufferFillF32) \
+ RSV(0xC1) \
+ RSV(0xC2) \
+ RSV(0xC3) \
+ RSV(0xC4) \
+ RSV(0xC5) \
+ RSV(0xC6) \
+ RSV(0xC7) \
+ RSV(0xC8) \
+ RSV(0xC9) \
+ RSV(0xCA) \
+ RSV(0xCB) \
+ RSV(0xCC) \
+ RSV(0xCD) \
+ RSV(0xCE) \
+ RSV(0xCF) \
+ RSV(0xD0) \
+ RSV(0xD1) \
+ RSV(0xD2) \
+ RSV(0xD3) \
+ RSV(0xD4) \
+ RSV(0xD5) \
+ RSV(0xD6) \
+ RSV(0xD7) \
+ RSV(0xD8) \
+ RSV(0xD9) \
+ RSV(0xDA) \
+ RSV(0xDB) \
+ RSV(0xDC) \
+ RSV(0xDD) \
+ RSV(0xDE) \
+ RSV(0xDF) \
+ RSV(0xE0) \
+ RSV(0xE1) \
+ RSV(0xE2) \
+ RSV(0xE3) \
+ RSV(0xE4) \
+ RSV(0xE5) \
+ RSV(0xE6) \
+ RSV(0xE7) \
+ RSV(0xE8) \
+ RSV(0xE9) \
+ RSV(0xEA) \
+ RSV(0xEB) \
+ RSV(0xEC) \
+ RSV(0xED) \
+ RSV(0xEE) \
+ RSV(0xEF) \
+ RSV(0xF0) \
+ RSV(0xF1) \
+ RSV(0xF2) \
+ RSV(0xF3) \
+ RSV(0xF4) \
+ RSV(0xF5) \
+ RSV(0xF6) \
+ RSV(0xF7) \
+ RSV(0xF8) \
+ RSV(0xF9) \
+ RSV(0xFA) \
+ RSV(0xFB) \
+ RSV(0xFC) \
+ RSV(0xFD) \
+ RSV(0xFE) \
+ RSV(0xFF)
+
+typedef enum {
+ IREE_VM_OP_EXT_F64_GlobalLoadF64 = 0x00,
+ IREE_VM_OP_EXT_F64_GlobalStoreF64 = 0x01,
+ IREE_VM_OP_EXT_F64_GlobalLoadIndirectF64 = 0x02,
+ IREE_VM_OP_EXT_F64_GlobalStoreIndirectF64 = 0x03,
+ IREE_VM_OP_EXT_F64_RSV_0x04,
+ IREE_VM_OP_EXT_F64_RSV_0x05,
+ IREE_VM_OP_EXT_F64_RSV_0x06,
+ IREE_VM_OP_EXT_F64_RSV_0x07,
+ IREE_VM_OP_EXT_F64_ConstF64Zero = 0x08,
+ IREE_VM_OP_EXT_F64_ConstF64 = 0x09,
+ IREE_VM_OP_EXT_F64_RSV_0x0A,
+ IREE_VM_OP_EXT_F64_RSV_0x0B,
+ IREE_VM_OP_EXT_F64_RSV_0x0C,
+ IREE_VM_OP_EXT_F64_RSV_0x0D,
+ IREE_VM_OP_EXT_F64_RSV_0x0E,
+ IREE_VM_OP_EXT_F64_RSV_0x0F,
+ IREE_VM_OP_EXT_F64_RSV_0x10,
+ IREE_VM_OP_EXT_F64_RSV_0x11,
+ IREE_VM_OP_EXT_F64_RSV_0x12,
+ IREE_VM_OP_EXT_F64_RSV_0x13,
+ IREE_VM_OP_EXT_F64_ListGetF64 = 0x14,
+ IREE_VM_OP_EXT_F64_ListSetF64 = 0x15,
+ IREE_VM_OP_EXT_F64_RSV_0x16,
+ IREE_VM_OP_EXT_F64_RSV_0x17,
+ IREE_VM_OP_EXT_F64_RSV_0x18,
+ IREE_VM_OP_EXT_F64_RSV_0x19,
+ IREE_VM_OP_EXT_F64_RSV_0x1A,
+ IREE_VM_OP_EXT_F64_RSV_0x1B,
+ IREE_VM_OP_EXT_F64_RSV_0x1C,
+ IREE_VM_OP_EXT_F64_RSV_0x1D,
+ IREE_VM_OP_EXT_F64_SelectF64 = 0x1E,
+ IREE_VM_OP_EXT_F64_RSV_0x1F,
+ IREE_VM_OP_EXT_F64_SwitchF64 = 0x20,
+ IREE_VM_OP_EXT_F64_RSV_0x21,
+ IREE_VM_OP_EXT_F64_AddF64 = 0x22,
+ IREE_VM_OP_EXT_F64_SubF64 = 0x23,
+ IREE_VM_OP_EXT_F64_MulF64 = 0x24,
+ IREE_VM_OP_EXT_F64_DivF64 = 0x25,
+ IREE_VM_OP_EXT_F64_RemF64 = 0x26,
+ IREE_VM_OP_EXT_F64_FMAF64 = 0x27,
+ IREE_VM_OP_EXT_F64_AbsF64 = 0x28,
+ IREE_VM_OP_EXT_F64_NegF64 = 0x29,
+ IREE_VM_OP_EXT_F64_CeilF64 = 0x2A,
+ IREE_VM_OP_EXT_F64_FloorF64 = 0x2B,
+ IREE_VM_OP_EXT_F64_TruncF64F32 = 0x2C,
+ IREE_VM_OP_EXT_F64_ExtF32F64 = 0x2D,
+ IREE_VM_OP_EXT_F64_RSV_0x2E,
+ IREE_VM_OP_EXT_F64_RSV_0x2F,
+ IREE_VM_OP_EXT_F64_CastSI32F64 = 0x30,
+ IREE_VM_OP_EXT_F64_CastUI32F64 = 0x31,
+ IREE_VM_OP_EXT_F64_CastF64SI32 = 0x32,
+ IREE_VM_OP_EXT_F64_CastF64UI32 = 0x33,
+ IREE_VM_OP_EXT_F64_CastSI64F64 = 0x34,
+ IREE_VM_OP_EXT_F64_CastUI64F64 = 0x35,
+ IREE_VM_OP_EXT_F64_CastF64SI64 = 0x36,
+ IREE_VM_OP_EXT_F64_CastF64UI64 = 0x37,
+ IREE_VM_OP_EXT_F64_BitcastI64F64 = 0x38,
+ IREE_VM_OP_EXT_F64_BitcastF64I64 = 0x39,
+ IREE_VM_OP_EXT_F64_RSV_0x3A,
+ IREE_VM_OP_EXT_F64_RSV_0x3B,
+ IREE_VM_OP_EXT_F64_RSV_0x3C,
+ IREE_VM_OP_EXT_F64_RSV_0x3D,
+ IREE_VM_OP_EXT_F64_RSV_0x3E,
+ IREE_VM_OP_EXT_F64_RSV_0x3F,
+ IREE_VM_OP_EXT_F64_AtanF64 = 0x40,
+ IREE_VM_OP_EXT_F64_Atan2F64 = 0x41,
+ IREE_VM_OP_EXT_F64_CosF64 = 0x42,
+ IREE_VM_OP_EXT_F64_SinF64 = 0x43,
+ IREE_VM_OP_EXT_F64_ExpF64 = 0x44,
+ IREE_VM_OP_EXT_F64_Exp2F64 = 0x45,
+ IREE_VM_OP_EXT_F64_ExpM1F64 = 0x46,
+ IREE_VM_OP_EXT_F64_LogF64 = 0x47,
+ IREE_VM_OP_EXT_F64_Log10F64 = 0x48,
+ IREE_VM_OP_EXT_F64_Log1pF64 = 0x49,
+ IREE_VM_OP_EXT_F64_Log2F64 = 0x4A,
+ IREE_VM_OP_EXT_F64_PowF64 = 0x4B,
+ IREE_VM_OP_EXT_F64_RsqrtF64 = 0x4C,
+ IREE_VM_OP_EXT_F64_SqrtF64 = 0x4D,
+ IREE_VM_OP_EXT_F64_TanhF64 = 0x4E,
+ IREE_VM_OP_EXT_F64_ErfF64 = 0x4F,
+ IREE_VM_OP_EXT_F64_RSV_0x50,
+ IREE_VM_OP_EXT_F64_RSV_0x51,
+ IREE_VM_OP_EXT_F64_RSV_0x52,
+ IREE_VM_OP_EXT_F64_RSV_0x53,
+ IREE_VM_OP_EXT_F64_RSV_0x54,
+ IREE_VM_OP_EXT_F64_RSV_0x55,
+ IREE_VM_OP_EXT_F64_RSV_0x56,
+ IREE_VM_OP_EXT_F64_RSV_0x57,
+ IREE_VM_OP_EXT_F64_RSV_0x58,
+ IREE_VM_OP_EXT_F64_RSV_0x59,
+ IREE_VM_OP_EXT_F64_RSV_0x5A,
+ IREE_VM_OP_EXT_F64_RSV_0x5B,
+ IREE_VM_OP_EXT_F64_RSV_0x5C,
+ IREE_VM_OP_EXT_F64_RSV_0x5D,
+ IREE_VM_OP_EXT_F64_RSV_0x5E,
+ IREE_VM_OP_EXT_F64_RSV_0x5F,
+ IREE_VM_OP_EXT_F64_CmpEQF64O = 0x60,
+ IREE_VM_OP_EXT_F64_CmpEQF64U = 0x61,
+ IREE_VM_OP_EXT_F64_CmpNEF64O = 0x62,
+ IREE_VM_OP_EXT_F64_CmpNEF64U = 0x63,
+ IREE_VM_OP_EXT_F64_CmpLTF64O = 0x64,
+ IREE_VM_OP_EXT_F64_CmpLTF64U = 0x65,
+ IREE_VM_OP_EXT_F64_CmpLTEF64O = 0x66,
+ IREE_VM_OP_EXT_F64_CmpLTEF64U = 0x67,
+ IREE_VM_OP_EXT_F64_RSV_0x68,
+ IREE_VM_OP_EXT_F64_RSV_0x69,
+ IREE_VM_OP_EXT_F64_RSV_0x6A,
+ IREE_VM_OP_EXT_F64_RSV_0x6B,
+ IREE_VM_OP_EXT_F64_RSV_0x6C,
+ IREE_VM_OP_EXT_F64_RSV_0x6D,
+ IREE_VM_OP_EXT_F64_RSV_0x6E,
+ IREE_VM_OP_EXT_F64_RSV_0x6F,
+ IREE_VM_OP_EXT_F64_CmpNaNF64 = 0x70,
+ IREE_VM_OP_EXT_F64_RSV_0x71,
+ IREE_VM_OP_EXT_F64_RSV_0x72,
+ IREE_VM_OP_EXT_F64_RSV_0x73,
+ IREE_VM_OP_EXT_F64_RSV_0x74,
+ IREE_VM_OP_EXT_F64_RSV_0x75,
+ IREE_VM_OP_EXT_F64_RSV_0x76,
+ IREE_VM_OP_EXT_F64_RSV_0x77,
+ IREE_VM_OP_EXT_F64_RSV_0x78,
+ IREE_VM_OP_EXT_F64_RSV_0x79,
+ IREE_VM_OP_EXT_F64_RSV_0x7A,
+ IREE_VM_OP_EXT_F64_RSV_0x7B,
+ IREE_VM_OP_EXT_F64_RSV_0x7C,
+ IREE_VM_OP_EXT_F64_RSV_0x7D,
+ IREE_VM_OP_EXT_F64_RSV_0x7E,
+ IREE_VM_OP_EXT_F64_RSV_0x7F,
+ IREE_VM_OP_EXT_F64_RSV_0x80,
+ IREE_VM_OP_EXT_F64_RSV_0x81,
+ IREE_VM_OP_EXT_F64_RSV_0x82,
+ IREE_VM_OP_EXT_F64_RSV_0x83,
+ IREE_VM_OP_EXT_F64_RSV_0x84,
+ IREE_VM_OP_EXT_F64_RSV_0x85,
+ IREE_VM_OP_EXT_F64_RSV_0x86,
+ IREE_VM_OP_EXT_F64_RSV_0x87,
+ IREE_VM_OP_EXT_F64_RSV_0x88,
+ IREE_VM_OP_EXT_F64_RSV_0x89,
+ IREE_VM_OP_EXT_F64_RSV_0x8A,
+ IREE_VM_OP_EXT_F64_RSV_0x8B,
+ IREE_VM_OP_EXT_F64_RSV_0x8C,
+ IREE_VM_OP_EXT_F64_RSV_0x8D,
+ IREE_VM_OP_EXT_F64_RSV_0x8E,
+ IREE_VM_OP_EXT_F64_RSV_0x8F,
+ IREE_VM_OP_EXT_F64_RSV_0x90,
+ IREE_VM_OP_EXT_F64_RSV_0x91,
+ IREE_VM_OP_EXT_F64_RSV_0x92,
+ IREE_VM_OP_EXT_F64_RSV_0x93,
+ IREE_VM_OP_EXT_F64_RSV_0x94,
+ IREE_VM_OP_EXT_F64_RSV_0x95,
+ IREE_VM_OP_EXT_F64_RSV_0x96,
+ IREE_VM_OP_EXT_F64_RSV_0x97,
+ IREE_VM_OP_EXT_F64_RSV_0x98,
+ IREE_VM_OP_EXT_F64_RSV_0x99,
+ IREE_VM_OP_EXT_F64_RSV_0x9A,
+ IREE_VM_OP_EXT_F64_RSV_0x9B,
+ IREE_VM_OP_EXT_F64_RSV_0x9C,
+ IREE_VM_OP_EXT_F64_RSV_0x9D,
+ IREE_VM_OP_EXT_F64_RSV_0x9E,
+ IREE_VM_OP_EXT_F64_RSV_0x9F,
+ IREE_VM_OP_EXT_F64_RSV_0xA0,
+ IREE_VM_OP_EXT_F64_RSV_0xA1,
+ IREE_VM_OP_EXT_F64_RSV_0xA2,
+ IREE_VM_OP_EXT_F64_RSV_0xA3,
+ IREE_VM_OP_EXT_F64_RSV_0xA4,
+ IREE_VM_OP_EXT_F64_RSV_0xA5,
+ IREE_VM_OP_EXT_F64_RSV_0xA6,
+ IREE_VM_OP_EXT_F64_RSV_0xA7,
+ IREE_VM_OP_EXT_F64_RSV_0xA8,
+ IREE_VM_OP_EXT_F64_RSV_0xA9,
+ IREE_VM_OP_EXT_F64_RSV_0xAA,
+ IREE_VM_OP_EXT_F64_RSV_0xAB,
+ IREE_VM_OP_EXT_F64_RSV_0xAC,
+ IREE_VM_OP_EXT_F64_RSV_0xAD,
+ IREE_VM_OP_EXT_F64_RSV_0xAE,
+ IREE_VM_OP_EXT_F64_RSV_0xAF,
+ IREE_VM_OP_EXT_F64_BufferLoadF64 = 0xB0,
+ IREE_VM_OP_EXT_F64_BufferStoreF64 = 0xB1,
+ IREE_VM_OP_EXT_F64_RSV_0xB2,
+ IREE_VM_OP_EXT_F64_RSV_0xB3,
+ IREE_VM_OP_EXT_F64_RSV_0xB4,
+ IREE_VM_OP_EXT_F64_RSV_0xB5,
+ IREE_VM_OP_EXT_F64_RSV_0xB6,
+ IREE_VM_OP_EXT_F64_RSV_0xB7,
+ IREE_VM_OP_EXT_F64_RSV_0xB8,
+ IREE_VM_OP_EXT_F64_RSV_0xB9,
+ IREE_VM_OP_EXT_F64_RSV_0xBA,
+ IREE_VM_OP_EXT_F64_RSV_0xBB,
+ IREE_VM_OP_EXT_F64_RSV_0xBC,
+ IREE_VM_OP_EXT_F64_RSV_0xBD,
+ IREE_VM_OP_EXT_F64_RSV_0xBE,
+ IREE_VM_OP_EXT_F64_RSV_0xBF,
+ IREE_VM_OP_EXT_F64_BufferFillF64 = 0xC0,
+ IREE_VM_OP_EXT_F64_RSV_0xC1,
+ IREE_VM_OP_EXT_F64_RSV_0xC2,
+ IREE_VM_OP_EXT_F64_RSV_0xC3,
+ IREE_VM_OP_EXT_F64_RSV_0xC4,
+ IREE_VM_OP_EXT_F64_RSV_0xC5,
+ IREE_VM_OP_EXT_F64_RSV_0xC6,
+ IREE_VM_OP_EXT_F64_RSV_0xC7,
+ IREE_VM_OP_EXT_F64_RSV_0xC8,
+ IREE_VM_OP_EXT_F64_RSV_0xC9,
+ IREE_VM_OP_EXT_F64_RSV_0xCA,
+ IREE_VM_OP_EXT_F64_RSV_0xCB,
+ IREE_VM_OP_EXT_F64_RSV_0xCC,
+ IREE_VM_OP_EXT_F64_RSV_0xCD,
+ IREE_VM_OP_EXT_F64_RSV_0xCE,
+ IREE_VM_OP_EXT_F64_RSV_0xCF,
+ IREE_VM_OP_EXT_F64_RSV_0xD0,
+ IREE_VM_OP_EXT_F64_RSV_0xD1,
+ IREE_VM_OP_EXT_F64_RSV_0xD2,
+ IREE_VM_OP_EXT_F64_RSV_0xD3,
+ IREE_VM_OP_EXT_F64_RSV_0xD4,
+ IREE_VM_OP_EXT_F64_RSV_0xD5,
+ IREE_VM_OP_EXT_F64_RSV_0xD6,
+ IREE_VM_OP_EXT_F64_RSV_0xD7,
+ IREE_VM_OP_EXT_F64_RSV_0xD8,
+ IREE_VM_OP_EXT_F64_RSV_0xD9,
+ IREE_VM_OP_EXT_F64_RSV_0xDA,
+ IREE_VM_OP_EXT_F64_RSV_0xDB,
+ IREE_VM_OP_EXT_F64_RSV_0xDC,
+ IREE_VM_OP_EXT_F64_RSV_0xDD,
+ IREE_VM_OP_EXT_F64_RSV_0xDE,
+ IREE_VM_OP_EXT_F64_RSV_0xDF,
+ IREE_VM_OP_EXT_F64_RSV_0xE0,
+ IREE_VM_OP_EXT_F64_RSV_0xE1,
+ IREE_VM_OP_EXT_F64_RSV_0xE2,
+ IREE_VM_OP_EXT_F64_RSV_0xE3,
+ IREE_VM_OP_EXT_F64_RSV_0xE4,
+ IREE_VM_OP_EXT_F64_RSV_0xE5,
+ IREE_VM_OP_EXT_F64_RSV_0xE6,
+ IREE_VM_OP_EXT_F64_RSV_0xE7,
+ IREE_VM_OP_EXT_F64_RSV_0xE8,
+ IREE_VM_OP_EXT_F64_RSV_0xE9,
+ IREE_VM_OP_EXT_F64_RSV_0xEA,
+ IREE_VM_OP_EXT_F64_RSV_0xEB,
+ IREE_VM_OP_EXT_F64_RSV_0xEC,
+ IREE_VM_OP_EXT_F64_RSV_0xED,
+ IREE_VM_OP_EXT_F64_RSV_0xEE,
+ IREE_VM_OP_EXT_F64_RSV_0xEF,
+ IREE_VM_OP_EXT_F64_RSV_0xF0,
+ IREE_VM_OP_EXT_F64_RSV_0xF1,
+ IREE_VM_OP_EXT_F64_RSV_0xF2,
+ IREE_VM_OP_EXT_F64_RSV_0xF3,
+ IREE_VM_OP_EXT_F64_RSV_0xF4,
+ IREE_VM_OP_EXT_F64_RSV_0xF5,
+ IREE_VM_OP_EXT_F64_RSV_0xF6,
+ IREE_VM_OP_EXT_F64_RSV_0xF7,
+ IREE_VM_OP_EXT_F64_RSV_0xF8,
+ IREE_VM_OP_EXT_F64_RSV_0xF9,
+ IREE_VM_OP_EXT_F64_RSV_0xFA,
+ IREE_VM_OP_EXT_F64_RSV_0xFB,
+ IREE_VM_OP_EXT_F64_RSV_0xFC,
+ IREE_VM_OP_EXT_F64_RSV_0xFD,
+ IREE_VM_OP_EXT_F64_RSV_0xFE,
+ IREE_VM_OP_EXT_F64_RSV_0xFF,
+} iree_vm_ext_f64_op_t;
+
+#define IREE_VM_OP_EXT_F64_TABLE(OPC, RSV) \
+ OPC(0x00, GlobalLoadF64) \
+ OPC(0x01, GlobalStoreF64) \
+ OPC(0x02, GlobalLoadIndirectF64) \
+ OPC(0x03, GlobalStoreIndirectF64) \
+ RSV(0x04) \
+ RSV(0x05) \
+ RSV(0x06) \
+ RSV(0x07) \
+ OPC(0x08, ConstF64Zero) \
+ OPC(0x09, ConstF64) \
+ RSV(0x0A) \
+ RSV(0x0B) \
+ RSV(0x0C) \
+ RSV(0x0D) \
+ RSV(0x0E) \
+ RSV(0x0F) \
+ RSV(0x10) \
+ RSV(0x11) \
+ RSV(0x12) \
+ RSV(0x13) \
+ OPC(0x14, ListGetF64) \
+ OPC(0x15, ListSetF64) \
+ RSV(0x16) \
+ RSV(0x17) \
+ RSV(0x18) \
+ RSV(0x19) \
+ RSV(0x1A) \
+ RSV(0x1B) \
+ RSV(0x1C) \
+ RSV(0x1D) \
+ OPC(0x1E, SelectF64) \
+ RSV(0x1F) \
+ OPC(0x20, SwitchF64) \
+ RSV(0x21) \
+ OPC(0x22, AddF64) \
+ OPC(0x23, SubF64) \
+ OPC(0x24, MulF64) \
+ OPC(0x25, DivF64) \
+ OPC(0x26, RemF64) \
+ OPC(0x27, FMAF64) \
+ OPC(0x28, AbsF64) \
+ OPC(0x29, NegF64) \
+ OPC(0x2A, CeilF64) \
+ OPC(0x2B, FloorF64) \
+ OPC(0x2C, TruncF64F32) \
+ OPC(0x2D, ExtF32F64) \
+ RSV(0x2E) \
+ RSV(0x2F) \
+ OPC(0x30, CastSI32F64) \
+ OPC(0x31, CastUI32F64) \
+ OPC(0x32, CastF64SI32) \
+ OPC(0x33, CastF64UI32) \
+ OPC(0x34, CastSI64F64) \
+ OPC(0x35, CastUI64F64) \
+ OPC(0x36, CastF64SI64) \
+ OPC(0x37, CastF64UI64) \
+ OPC(0x38, BitcastI64F64) \
+ OPC(0x39, BitcastF64I64) \
+ RSV(0x3A) \
+ RSV(0x3B) \
+ RSV(0x3C) \
+ RSV(0x3D) \
+ RSV(0x3E) \
+ RSV(0x3F) \
+ OPC(0x40, AtanF64) \
+ OPC(0x41, Atan2F64) \
+ OPC(0x42, CosF64) \
+ OPC(0x43, SinF64) \
+ OPC(0x44, ExpF64) \
+ OPC(0x45, Exp2F64) \
+ OPC(0x46, ExpM1F64) \
+ OPC(0x47, LogF64) \
+ OPC(0x48, Log10F64) \
+ OPC(0x49, Log1pF64) \
+ OPC(0x4A, Log2F64) \
+ OPC(0x4B, PowF64) \
+ OPC(0x4C, RsqrtF64) \
+ OPC(0x4D, SqrtF64) \
+ OPC(0x4E, TanhF64) \
+ OPC(0x4F, ErfF64) \
+ RSV(0x50) \
+ RSV(0x51) \
+ RSV(0x52) \
+ RSV(0x53) \
+ RSV(0x54) \
+ RSV(0x55) \
+ RSV(0x56) \
+ RSV(0x57) \
+ RSV(0x58) \
+ RSV(0x59) \
+ RSV(0x5A) \
+ RSV(0x5B) \
+ RSV(0x5C) \
+ RSV(0x5D) \
+ RSV(0x5E) \
+ RSV(0x5F) \
+ OPC(0x60, CmpEQF64O) \
+ OPC(0x61, CmpEQF64U) \
+ OPC(0x62, CmpNEF64O) \
+ OPC(0x63, CmpNEF64U) \
+ OPC(0x64, CmpLTF64O) \
+ OPC(0x65, CmpLTF64U) \
+ OPC(0x66, CmpLTEF64O) \
+ OPC(0x67, CmpLTEF64U) \
+ RSV(0x68) \
+ RSV(0x69) \
+ RSV(0x6A) \
+ RSV(0x6B) \
+ RSV(0x6C) \
+ RSV(0x6D) \
+ RSV(0x6E) \
+ RSV(0x6F) \
+ OPC(0x70, CmpNaNF64) \
+ RSV(0x71) \
+ RSV(0x72) \
+ RSV(0x73) \
+ RSV(0x74) \
+ RSV(0x75) \
+ RSV(0x76) \
+ RSV(0x77) \
+ RSV(0x78) \
+ RSV(0x79) \
+ RSV(0x7A) \
+ RSV(0x7B) \
+ RSV(0x7C) \
+ RSV(0x7D) \
+ RSV(0x7E) \
+ RSV(0x7F) \
+ RSV(0x80) \
+ RSV(0x81) \
+ RSV(0x82) \
+ RSV(0x83) \
+ RSV(0x84) \
+ RSV(0x85) \
+ RSV(0x86) \
+ RSV(0x87) \
+ RSV(0x88) \
+ RSV(0x89) \
+ RSV(0x8A) \
+ RSV(0x8B) \
+ RSV(0x8C) \
+ RSV(0x8D) \
+ RSV(0x8E) \
+ RSV(0x8F) \
+ RSV(0x90) \
+ RSV(0x91) \
+ RSV(0x92) \
+ RSV(0x93) \
+ RSV(0x94) \
+ RSV(0x95) \
+ RSV(0x96) \
+ RSV(0x97) \
+ RSV(0x98) \
+ RSV(0x99) \
+ RSV(0x9A) \
+ RSV(0x9B) \
+ RSV(0x9C) \
+ RSV(0x9D) \
+ RSV(0x9E) \
+ RSV(0x9F) \
+ RSV(0xA0) \
+ RSV(0xA1) \
+ RSV(0xA2) \
+ RSV(0xA3) \
+ RSV(0xA4) \
+ RSV(0xA5) \
+ RSV(0xA6) \
+ RSV(0xA7) \
+ RSV(0xA8) \
+ RSV(0xA9) \
+ RSV(0xAA) \
+ RSV(0xAB) \
+ RSV(0xAC) \
+ RSV(0xAD) \
+ RSV(0xAE) \
+ RSV(0xAF) \
+ OPC(0xB0, BufferLoadF64) \
+ OPC(0xB1, BufferStoreF64) \
+ RSV(0xB2) \
+ RSV(0xB3) \
+ RSV(0xB4) \
+ RSV(0xB5) \
+ RSV(0xB6) \
+ RSV(0xB7) \
+ RSV(0xB8) \
+ RSV(0xB9) \
+ RSV(0xBA) \
+ RSV(0xBB) \
+ RSV(0xBC) \
+ RSV(0xBD) \
+ RSV(0xBE) \
+ RSV(0xBF) \
+ OPC(0xC0, BufferFillF64) \
+ RSV(0xC1) \
+ RSV(0xC2) \
+ RSV(0xC3) \
+ RSV(0xC4) \
+ RSV(0xC5) \
+ RSV(0xC6) \
+ RSV(0xC7) \
+ RSV(0xC8) \
+ RSV(0xC9) \
+ RSV(0xCA) \
+ RSV(0xCB) \
+ RSV(0xCC) \
+ RSV(0xCD) \
+ RSV(0xCE) \
+ RSV(0xCF) \
+ RSV(0xD0) \
+ RSV(0xD1) \
+ RSV(0xD2) \
+ RSV(0xD3) \
+ RSV(0xD4) \
+ RSV(0xD5) \
+ RSV(0xD6) \
+ RSV(0xD7) \
+ RSV(0xD8) \
+ RSV(0xD9) \
+ RSV(0xDA) \
+ RSV(0xDB) \
+ RSV(0xDC) \
+ RSV(0xDD) \
+ RSV(0xDE) \
+ RSV(0xDF) \
+ RSV(0xE0) \
+ RSV(0xE1) \
+ RSV(0xE2) \
+ RSV(0xE3) \
+ RSV(0xE4) \
+ RSV(0xE5) \
+ RSV(0xE6) \
+ RSV(0xE7) \
+ RSV(0xE8) \
+ RSV(0xE9) \
+ RSV(0xEA) \
+ RSV(0xEB) \
+ RSV(0xEC) \
+ RSV(0xED) \
+ RSV(0xEE) \
+ RSV(0xEF) \
+ RSV(0xF0) \
+ RSV(0xF1) \
+ RSV(0xF2) \
+ RSV(0xF3) \
+ RSV(0xF4) \
+ RSV(0xF5) \
+ RSV(0xF6) \
+ RSV(0xF7) \
+ RSV(0xF8) \
+ RSV(0xF9) \
+ RSV(0xFA) \
+ RSV(0xFB) \
+ RSV(0xFC) \
+ RSV(0xFD) \
+ RSV(0xFE) \
+ RSV(0xFF)
+
+typedef enum {
+ IREE_VM_OP_EXT_I64_GlobalLoadI64 = 0x00,
+ IREE_VM_OP_EXT_I64_GlobalStoreI64 = 0x01,
+ IREE_VM_OP_EXT_I64_GlobalLoadIndirectI64 = 0x02,
+ IREE_VM_OP_EXT_I64_GlobalStoreIndirectI64 = 0x03,
+ IREE_VM_OP_EXT_I64_RSV_0x04,
+ IREE_VM_OP_EXT_I64_RSV_0x05,
+ IREE_VM_OP_EXT_I64_RSV_0x06,
+ IREE_VM_OP_EXT_I64_RSV_0x07,
+ IREE_VM_OP_EXT_I64_ConstI64Zero = 0x08,
+ IREE_VM_OP_EXT_I64_ConstI64 = 0x09,
+ IREE_VM_OP_EXT_I64_RSV_0x0A,
+ IREE_VM_OP_EXT_I64_RSV_0x0B,
+ IREE_VM_OP_EXT_I64_RSV_0x0C,
+ IREE_VM_OP_EXT_I64_RSV_0x0D,
+ IREE_VM_OP_EXT_I64_RSV_0x0E,
+ IREE_VM_OP_EXT_I64_RSV_0x0F,
+ IREE_VM_OP_EXT_I64_RSV_0x10,
+ IREE_VM_OP_EXT_I64_RSV_0x11,
+ IREE_VM_OP_EXT_I64_RSV_0x12,
+ IREE_VM_OP_EXT_I64_RSV_0x13,
+ IREE_VM_OP_EXT_I64_ListGetI64 = 0x14,
+ IREE_VM_OP_EXT_I64_ListSetI64 = 0x15,
+ IREE_VM_OP_EXT_I64_RSV_0x16,
+ IREE_VM_OP_EXT_I64_RSV_0x17,
+ IREE_VM_OP_EXT_I64_RSV_0x18,
+ IREE_VM_OP_EXT_I64_RSV_0x19,
+ IREE_VM_OP_EXT_I64_RSV_0x1A,
+ IREE_VM_OP_EXT_I64_RSV_0x1B,
+ IREE_VM_OP_EXT_I64_RSV_0x1C,
+ IREE_VM_OP_EXT_I64_RSV_0x1D,
+ IREE_VM_OP_EXT_I64_SelectI64 = 0x1E,
+ IREE_VM_OP_EXT_I64_RSV_0x1F,
+ IREE_VM_OP_EXT_I64_SwitchI64 = 0x20,
+ IREE_VM_OP_EXT_I64_RSV_0x21,
+ IREE_VM_OP_EXT_I64_AddI64 = 0x22,
+ IREE_VM_OP_EXT_I64_SubI64 = 0x23,
+ IREE_VM_OP_EXT_I64_MulI64 = 0x24,
+ IREE_VM_OP_EXT_I64_DivI64S = 0x25,
+ IREE_VM_OP_EXT_I64_DivI64U = 0x26,
+ IREE_VM_OP_EXT_I64_RemI64S = 0x27,
+ IREE_VM_OP_EXT_I64_RemI64U = 0x28,
+ IREE_VM_OP_EXT_I64_FMAI64 = 0x29,
+ IREE_VM_OP_EXT_I64_RSV_0x2A,
+ IREE_VM_OP_EXT_I64_RSV_0x2B,
+ IREE_VM_OP_EXT_I64_RSV_0x2C,
+ IREE_VM_OP_EXT_I64_RSV_0x2D,
+ IREE_VM_OP_EXT_I64_RSV_0x2E,
+ IREE_VM_OP_EXT_I64_RSV_0x2F,
+ IREE_VM_OP_EXT_I64_NotI64 = 0x30,
+ IREE_VM_OP_EXT_I64_AndI64 = 0x31,
+ IREE_VM_OP_EXT_I64_OrI64 = 0x32,
+ IREE_VM_OP_EXT_I64_XorI64 = 0x33,
+ IREE_VM_OP_EXT_I64_ShlI64 = 0x34,
+ IREE_VM_OP_EXT_I64_ShrI64S = 0x35,
+ IREE_VM_OP_EXT_I64_ShrI64U = 0x36,
+ IREE_VM_OP_EXT_I64_TruncI64I32 = 0x37,
+ IREE_VM_OP_EXT_I64_ExtI32I64S = 0x38,
+ IREE_VM_OP_EXT_I64_ExtI32I64U = 0x39,
+ IREE_VM_OP_EXT_I64_RSV_0x3A,
+ IREE_VM_OP_EXT_I64_RSV_0x3B,
+ IREE_VM_OP_EXT_I64_RSV_0x3C,
+ IREE_VM_OP_EXT_I64_RSV_0x3D,
+ IREE_VM_OP_EXT_I64_RSV_0x3E,
+ IREE_VM_OP_EXT_I64_RSV_0x3F,
+ IREE_VM_OP_EXT_I64_CmpEQI64 = 0x40,
+ IREE_VM_OP_EXT_I64_CmpNEI64 = 0x41,
+ IREE_VM_OP_EXT_I64_CmpLTI64S = 0x42,
+ IREE_VM_OP_EXT_I64_CmpLTI64U = 0x43,
+ IREE_VM_OP_EXT_I64_RSV_0x44,
+ IREE_VM_OP_EXT_I64_RSV_0x45,
+ IREE_VM_OP_EXT_I64_RSV_0x46,
+ IREE_VM_OP_EXT_I64_RSV_0x47,
+ IREE_VM_OP_EXT_I64_RSV_0x48,
+ IREE_VM_OP_EXT_I64_RSV_0x49,
+ IREE_VM_OP_EXT_I64_RSV_0x4A,
+ IREE_VM_OP_EXT_I64_RSV_0x4B,
+ IREE_VM_OP_EXT_I64_RSV_0x4C,
+ IREE_VM_OP_EXT_I64_CmpNZI64 = 0x4D,
+ IREE_VM_OP_EXT_I64_RSV_0x4E,
+ IREE_VM_OP_EXT_I64_RSV_0x4F,
+ IREE_VM_OP_EXT_I64_RSV_0x50,
+ IREE_VM_OP_EXT_I64_RSV_0x51,
+ IREE_VM_OP_EXT_I64_RSV_0x52,
+ IREE_VM_OP_EXT_I64_RSV_0x53,
+ IREE_VM_OP_EXT_I64_RSV_0x54,
+ IREE_VM_OP_EXT_I64_RSV_0x55,
+ IREE_VM_OP_EXT_I64_RSV_0x56,
+ IREE_VM_OP_EXT_I64_RSV_0x57,
+ IREE_VM_OP_EXT_I64_RSV_0x58,
+ IREE_VM_OP_EXT_I64_RSV_0x59,
+ IREE_VM_OP_EXT_I64_RSV_0x5A,
+ IREE_VM_OP_EXT_I64_RSV_0x5B,
+ IREE_VM_OP_EXT_I64_RSV_0x5C,
+ IREE_VM_OP_EXT_I64_RSV_0x5D,
+ IREE_VM_OP_EXT_I64_RSV_0x5E,
+ IREE_VM_OP_EXT_I64_RSV_0x5F,
+ IREE_VM_OP_EXT_I64_RSV_0x60,
+ IREE_VM_OP_EXT_I64_RSV_0x61,
+ IREE_VM_OP_EXT_I64_RSV_0x62,
+ IREE_VM_OP_EXT_I64_RSV_0x63,
+ IREE_VM_OP_EXT_I64_RSV_0x64,
+ IREE_VM_OP_EXT_I64_RSV_0x65,
+ IREE_VM_OP_EXT_I64_RSV_0x66,
+ IREE_VM_OP_EXT_I64_RSV_0x67,
+ IREE_VM_OP_EXT_I64_RSV_0x68,
+ IREE_VM_OP_EXT_I64_RSV_0x69,
+ IREE_VM_OP_EXT_I64_RSV_0x6A,
+ IREE_VM_OP_EXT_I64_RSV_0x6B,
+ IREE_VM_OP_EXT_I64_RSV_0x6C,
+ IREE_VM_OP_EXT_I64_RSV_0x6D,
+ IREE_VM_OP_EXT_I64_RSV_0x6E,
+ IREE_VM_OP_EXT_I64_RSV_0x6F,
+ IREE_VM_OP_EXT_I64_RSV_0x70,
+ IREE_VM_OP_EXT_I64_RSV_0x71,
+ IREE_VM_OP_EXT_I64_RSV_0x72,
+ IREE_VM_OP_EXT_I64_RSV_0x73,
+ IREE_VM_OP_EXT_I64_RSV_0x74,
+ IREE_VM_OP_EXT_I64_RSV_0x75,
+ IREE_VM_OP_EXT_I64_RSV_0x76,
+ IREE_VM_OP_EXT_I64_RSV_0x77,
+ IREE_VM_OP_EXT_I64_RSV_0x78,
+ IREE_VM_OP_EXT_I64_RSV_0x79,
+ IREE_VM_OP_EXT_I64_RSV_0x7A,
+ IREE_VM_OP_EXT_I64_RSV_0x7B,
+ IREE_VM_OP_EXT_I64_RSV_0x7C,
+ IREE_VM_OP_EXT_I64_RSV_0x7D,
+ IREE_VM_OP_EXT_I64_RSV_0x7E,
+ IREE_VM_OP_EXT_I64_RSV_0x7F,
+ IREE_VM_OP_EXT_I64_RSV_0x80,
+ IREE_VM_OP_EXT_I64_RSV_0x81,
+ IREE_VM_OP_EXT_I64_RSV_0x82,
+ IREE_VM_OP_EXT_I64_RSV_0x83,
+ IREE_VM_OP_EXT_I64_RSV_0x84,
+ IREE_VM_OP_EXT_I64_RSV_0x85,
+ IREE_VM_OP_EXT_I64_RSV_0x86,
+ IREE_VM_OP_EXT_I64_RSV_0x87,
+ IREE_VM_OP_EXT_I64_RSV_0x88,
+ IREE_VM_OP_EXT_I64_RSV_0x89,
+ IREE_VM_OP_EXT_I64_RSV_0x8A,
+ IREE_VM_OP_EXT_I64_RSV_0x8B,
+ IREE_VM_OP_EXT_I64_RSV_0x8C,
+ IREE_VM_OP_EXT_I64_RSV_0x8D,
+ IREE_VM_OP_EXT_I64_RSV_0x8E,
+ IREE_VM_OP_EXT_I64_RSV_0x8F,
+ IREE_VM_OP_EXT_I64_RSV_0x90,
+ IREE_VM_OP_EXT_I64_RSV_0x91,
+ IREE_VM_OP_EXT_I64_RSV_0x92,
+ IREE_VM_OP_EXT_I64_RSV_0x93,
+ IREE_VM_OP_EXT_I64_RSV_0x94,
+ IREE_VM_OP_EXT_I64_RSV_0x95,
+ IREE_VM_OP_EXT_I64_RSV_0x96,
+ IREE_VM_OP_EXT_I64_RSV_0x97,
+ IREE_VM_OP_EXT_I64_RSV_0x98,
+ IREE_VM_OP_EXT_I64_RSV_0x99,
+ IREE_VM_OP_EXT_I64_RSV_0x9A,
+ IREE_VM_OP_EXT_I64_RSV_0x9B,
+ IREE_VM_OP_EXT_I64_RSV_0x9C,
+ IREE_VM_OP_EXT_I64_RSV_0x9D,
+ IREE_VM_OP_EXT_I64_RSV_0x9E,
+ IREE_VM_OP_EXT_I64_RSV_0x9F,
+ IREE_VM_OP_EXT_I64_RSV_0xA0,
+ IREE_VM_OP_EXT_I64_RSV_0xA1,
+ IREE_VM_OP_EXT_I64_RSV_0xA2,
+ IREE_VM_OP_EXT_I64_RSV_0xA3,
+ IREE_VM_OP_EXT_I64_RSV_0xA4,
+ IREE_VM_OP_EXT_I64_RSV_0xA5,
+ IREE_VM_OP_EXT_I64_RSV_0xA6,
+ IREE_VM_OP_EXT_I64_RSV_0xA7,
+ IREE_VM_OP_EXT_I64_RSV_0xA8,
+ IREE_VM_OP_EXT_I64_RSV_0xA9,
+ IREE_VM_OP_EXT_I64_RSV_0xAA,
+ IREE_VM_OP_EXT_I64_RSV_0xAB,
+ IREE_VM_OP_EXT_I64_RSV_0xAC,
+ IREE_VM_OP_EXT_I64_RSV_0xAD,
+ IREE_VM_OP_EXT_I64_RSV_0xAE,
+ IREE_VM_OP_EXT_I64_RSV_0xAF,
+ IREE_VM_OP_EXT_I64_BufferLoadI64 = 0xB0,
+ IREE_VM_OP_EXT_I64_BufferStoreI64 = 0xB1,
+ IREE_VM_OP_EXT_I64_RSV_0xB2,
+ IREE_VM_OP_EXT_I64_RSV_0xB3,
+ IREE_VM_OP_EXT_I64_RSV_0xB4,
+ IREE_VM_OP_EXT_I64_RSV_0xB5,
+ IREE_VM_OP_EXT_I64_RSV_0xB6,
+ IREE_VM_OP_EXT_I64_RSV_0xB7,
+ IREE_VM_OP_EXT_I64_RSV_0xB8,
+ IREE_VM_OP_EXT_I64_RSV_0xB9,
+ IREE_VM_OP_EXT_I64_RSV_0xBA,
+ IREE_VM_OP_EXT_I64_RSV_0xBB,
+ IREE_VM_OP_EXT_I64_RSV_0xBC,
+ IREE_VM_OP_EXT_I64_RSV_0xBD,
+ IREE_VM_OP_EXT_I64_RSV_0xBE,
+ IREE_VM_OP_EXT_I64_RSV_0xBF,
+ IREE_VM_OP_EXT_I64_BufferFillI64 = 0xC0,
+ IREE_VM_OP_EXT_I64_RSV_0xC1,
+ IREE_VM_OP_EXT_I64_RSV_0xC2,
+ IREE_VM_OP_EXT_I64_RSV_0xC3,
+ IREE_VM_OP_EXT_I64_RSV_0xC4,
+ IREE_VM_OP_EXT_I64_RSV_0xC5,
+ IREE_VM_OP_EXT_I64_RSV_0xC6,
+ IREE_VM_OP_EXT_I64_RSV_0xC7,
+ IREE_VM_OP_EXT_I64_RSV_0xC8,
+ IREE_VM_OP_EXT_I64_RSV_0xC9,
+ IREE_VM_OP_EXT_I64_RSV_0xCA,
+ IREE_VM_OP_EXT_I64_RSV_0xCB,
+ IREE_VM_OP_EXT_I64_RSV_0xCC,
+ IREE_VM_OP_EXT_I64_RSV_0xCD,
+ IREE_VM_OP_EXT_I64_RSV_0xCE,
+ IREE_VM_OP_EXT_I64_RSV_0xCF,
+ IREE_VM_OP_EXT_I64_RSV_0xD0,
+ IREE_VM_OP_EXT_I64_RSV_0xD1,
+ IREE_VM_OP_EXT_I64_RSV_0xD2,
+ IREE_VM_OP_EXT_I64_RSV_0xD3,
+ IREE_VM_OP_EXT_I64_RSV_0xD4,
+ IREE_VM_OP_EXT_I64_RSV_0xD5,
+ IREE_VM_OP_EXT_I64_RSV_0xD6,
+ IREE_VM_OP_EXT_I64_RSV_0xD7,
+ IREE_VM_OP_EXT_I64_RSV_0xD8,
+ IREE_VM_OP_EXT_I64_RSV_0xD9,
+ IREE_VM_OP_EXT_I64_RSV_0xDA,
+ IREE_VM_OP_EXT_I64_RSV_0xDB,
+ IREE_VM_OP_EXT_I64_RSV_0xDC,
+ IREE_VM_OP_EXT_I64_RSV_0xDD,
+ IREE_VM_OP_EXT_I64_RSV_0xDE,
+ IREE_VM_OP_EXT_I64_RSV_0xDF,
+ IREE_VM_OP_EXT_I64_RSV_0xE0,
+ IREE_VM_OP_EXT_I64_RSV_0xE1,
+ IREE_VM_OP_EXT_I64_RSV_0xE2,
+ IREE_VM_OP_EXT_I64_RSV_0xE3,
+ IREE_VM_OP_EXT_I64_RSV_0xE4,
+ IREE_VM_OP_EXT_I64_RSV_0xE5,
+ IREE_VM_OP_EXT_I64_RSV_0xE6,
+ IREE_VM_OP_EXT_I64_RSV_0xE7,
+ IREE_VM_OP_EXT_I64_RSV_0xE8,
+ IREE_VM_OP_EXT_I64_RSV_0xE9,
+ IREE_VM_OP_EXT_I64_RSV_0xEA,
+ IREE_VM_OP_EXT_I64_RSV_0xEB,
+ IREE_VM_OP_EXT_I64_RSV_0xEC,
+ IREE_VM_OP_EXT_I64_RSV_0xED,
+ IREE_VM_OP_EXT_I64_RSV_0xEE,
+ IREE_VM_OP_EXT_I64_RSV_0xEF,
+ IREE_VM_OP_EXT_I64_RSV_0xF0,
+ IREE_VM_OP_EXT_I64_RSV_0xF1,
+ IREE_VM_OP_EXT_I64_RSV_0xF2,
+ IREE_VM_OP_EXT_I64_RSV_0xF3,
+ IREE_VM_OP_EXT_I64_RSV_0xF4,
+ IREE_VM_OP_EXT_I64_RSV_0xF5,
+ IREE_VM_OP_EXT_I64_RSV_0xF6,
+ IREE_VM_OP_EXT_I64_RSV_0xF7,
+ IREE_VM_OP_EXT_I64_RSV_0xF8,
+ IREE_VM_OP_EXT_I64_RSV_0xF9,
+ IREE_VM_OP_EXT_I64_RSV_0xFA,
+ IREE_VM_OP_EXT_I64_RSV_0xFB,
+ IREE_VM_OP_EXT_I64_RSV_0xFC,
+ IREE_VM_OP_EXT_I64_RSV_0xFD,
+ IREE_VM_OP_EXT_I64_RSV_0xFE,
+ IREE_VM_OP_EXT_I64_RSV_0xFF,
+} iree_vm_ext_i64_op_t;
+
+#define IREE_VM_OP_EXT_I64_TABLE(OPC, RSV) \
+ OPC(0x00, GlobalLoadI64) \
+ OPC(0x01, GlobalStoreI64) \
+ OPC(0x02, GlobalLoadIndirectI64) \
+ OPC(0x03, GlobalStoreIndirectI64) \
+ RSV(0x04) \
+ RSV(0x05) \
+ RSV(0x06) \
+ RSV(0x07) \
+ OPC(0x08, ConstI64Zero) \
+ OPC(0x09, ConstI64) \
+ RSV(0x0A) \
+ RSV(0x0B) \
+ RSV(0x0C) \
+ RSV(0x0D) \
+ RSV(0x0E) \
+ RSV(0x0F) \
+ RSV(0x10) \
+ RSV(0x11) \
+ RSV(0x12) \
+ RSV(0x13) \
+ OPC(0x14, ListGetI64) \
+ OPC(0x15, ListSetI64) \
+ RSV(0x16) \
+ RSV(0x17) \
+ RSV(0x18) \
+ RSV(0x19) \
+ RSV(0x1A) \
+ RSV(0x1B) \
+ RSV(0x1C) \
+ RSV(0x1D) \
+ OPC(0x1E, SelectI64) \
+ RSV(0x1F) \
+ OPC(0x20, SwitchI64) \
+ RSV(0x21) \
+ OPC(0x22, AddI64) \
+ OPC(0x23, SubI64) \
+ OPC(0x24, MulI64) \
+ OPC(0x25, DivI64S) \
+ OPC(0x26, DivI64U) \
+ OPC(0x27, RemI64S) \
+ OPC(0x28, RemI64U) \
+ OPC(0x29, FMAI64) \
+ RSV(0x2A) \
+ RSV(0x2B) \
+ RSV(0x2C) \
+ RSV(0x2D) \
+ RSV(0x2E) \
+ RSV(0x2F) \
+ OPC(0x30, NotI64) \
+ OPC(0x31, AndI64) \
+ OPC(0x32, OrI64) \
+ OPC(0x33, XorI64) \
+ OPC(0x34, ShlI64) \
+ OPC(0x35, ShrI64S) \
+ OPC(0x36, ShrI64U) \
+ OPC(0x37, TruncI64I32) \
+ OPC(0x38, ExtI32I64S) \
+ OPC(0x39, ExtI32I64U) \
+ RSV(0x3A) \
+ RSV(0x3B) \
+ RSV(0x3C) \
+ RSV(0x3D) \
+ RSV(0x3E) \
+ RSV(0x3F) \
+ OPC(0x40, CmpEQI64) \
+ OPC(0x41, CmpNEI64) \
+ OPC(0x42, CmpLTI64S) \
+ OPC(0x43, CmpLTI64U) \
+ RSV(0x44) \
+ RSV(0x45) \
+ RSV(0x46) \
+ RSV(0x47) \
+ RSV(0x48) \
+ RSV(0x49) \
+ RSV(0x4A) \
+ RSV(0x4B) \
+ RSV(0x4C) \
+ OPC(0x4D, CmpNZI64) \
+ RSV(0x4E) \
+ RSV(0x4F) \
+ RSV(0x50) \
+ RSV(0x51) \
+ RSV(0x52) \
+ RSV(0x53) \
+ RSV(0x54) \
+ RSV(0x55) \
+ RSV(0x56) \
+ RSV(0x57) \
+ RSV(0x58) \
+ RSV(0x59) \
+ RSV(0x5A) \
+ RSV(0x5B) \
+ RSV(0x5C) \
+ RSV(0x5D) \
+ RSV(0x5E) \
+ RSV(0x5F) \
+ RSV(0x60) \
+ RSV(0x61) \
+ RSV(0x62) \
+ RSV(0x63) \
+ RSV(0x64) \
+ RSV(0x65) \
+ RSV(0x66) \
+ RSV(0x67) \
+ RSV(0x68) \
+ RSV(0x69) \
+ RSV(0x6A) \
+ RSV(0x6B) \
+ RSV(0x6C) \
+ RSV(0x6D) \
+ RSV(0x6E) \
+ RSV(0x6F) \
+ RSV(0x70) \
+ RSV(0x71) \
+ RSV(0x72) \
+ RSV(0x73) \
+ RSV(0x74) \
+ RSV(0x75) \
+ RSV(0x76) \
+ RSV(0x77) \
+ RSV(0x78) \
+ RSV(0x79) \
+ RSV(0x7A) \
+ RSV(0x7B) \
+ RSV(0x7C) \
+ RSV(0x7D) \
+ RSV(0x7E) \
+ RSV(0x7F) \
+ RSV(0x80) \
+ RSV(0x81) \
+ RSV(0x82) \
+ RSV(0x83) \
+ RSV(0x84) \
+ RSV(0x85) \
+ RSV(0x86) \
+ RSV(0x87) \
+ RSV(0x88) \
+ RSV(0x89) \
+ RSV(0x8A) \
+ RSV(0x8B) \
+ RSV(0x8C) \
+ RSV(0x8D) \
+ RSV(0x8E) \
+ RSV(0x8F) \
+ RSV(0x90) \
+ RSV(0x91) \
+ RSV(0x92) \
+ RSV(0x93) \
+ RSV(0x94) \
+ RSV(0x95) \
+ RSV(0x96) \
+ RSV(0x97) \
+ RSV(0x98) \
+ RSV(0x99) \
+ RSV(0x9A) \
+ RSV(0x9B) \
+ RSV(0x9C) \
+ RSV(0x9D) \
+ RSV(0x9E) \
+ RSV(0x9F) \
+ RSV(0xA0) \
+ RSV(0xA1) \
+ RSV(0xA2) \
+ RSV(0xA3) \
+ RSV(0xA4) \
+ RSV(0xA5) \
+ RSV(0xA6) \
+ RSV(0xA7) \
+ RSV(0xA8) \
+ RSV(0xA9) \
+ RSV(0xAA) \
+ RSV(0xAB) \
+ RSV(0xAC) \
+ RSV(0xAD) \
+ RSV(0xAE) \
+ RSV(0xAF) \
+ OPC(0xB0, BufferLoadI64) \
+ OPC(0xB1, BufferStoreI64) \
+ RSV(0xB2) \
+ RSV(0xB3) \
+ RSV(0xB4) \
+ RSV(0xB5) \
+ RSV(0xB6) \
+ RSV(0xB7) \
+ RSV(0xB8) \
+ RSV(0xB9) \
+ RSV(0xBA) \
+ RSV(0xBB) \
+ RSV(0xBC) \
+ RSV(0xBD) \
+ RSV(0xBE) \
+ RSV(0xBF) \
+ OPC(0xC0, BufferFillI64) \
+ RSV(0xC1) \
+ RSV(0xC2) \
+ RSV(0xC3) \
+ RSV(0xC4) \
+ RSV(0xC5) \
+ RSV(0xC6) \
+ RSV(0xC7) \
+ RSV(0xC8) \
+ RSV(0xC9) \
+ RSV(0xCA) \
+ RSV(0xCB) \
+ RSV(0xCC) \
+ RSV(0xCD) \
+ RSV(0xCE) \
+ RSV(0xCF) \
+ RSV(0xD0) \
+ RSV(0xD1) \
+ RSV(0xD2) \
+ RSV(0xD3) \
+ RSV(0xD4) \
+ RSV(0xD5) \
+ RSV(0xD6) \
+ RSV(0xD7) \
+ RSV(0xD8) \
+ RSV(0xD9) \
+ RSV(0xDA) \
+ RSV(0xDB) \
+ RSV(0xDC) \
+ RSV(0xDD) \
+ RSV(0xDE) \
+ RSV(0xDF) \
+ RSV(0xE0) \
+ RSV(0xE1) \
+ RSV(0xE2) \
+ RSV(0xE3) \
+ RSV(0xE4) \
+ RSV(0xE5) \
+ RSV(0xE6) \
+ RSV(0xE7) \
+ RSV(0xE8) \
+ RSV(0xE9) \
+ RSV(0xEA) \
+ RSV(0xEB) \
+ RSV(0xEC) \
+ RSV(0xED) \
+ RSV(0xEE) \
+ RSV(0xEF) \
+ RSV(0xF0) \
+ RSV(0xF1) \
+ RSV(0xF2) \
+ RSV(0xF3) \
+ RSV(0xF4) \
+ RSV(0xF5) \
+ RSV(0xF6) \
+ RSV(0xF7) \
+ RSV(0xF8) \
+ RSV(0xF9) \
+ RSV(0xFA) \
+ RSV(0xFB) \
+ RSV(0xFC) \
+ RSV(0xFD) \
+ RSV(0xFE) \
+ RSV(0xFF)
+
diff --git a/runtime/src/iree/vm/instance.c b/runtime/src/iree/vm/instance.c
new file mode 100644
index 0000000..9d1f4c6
--- /dev/null
+++ b/runtime/src/iree/vm/instance.c
@@ -0,0 +1,57 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/instance.h"
+
+#include <stddef.h>
+
+#include "iree/base/internal/atomics.h"
+#include "iree/base/tracing.h"
+#include "iree/vm/builtin_types.h"
+
+struct iree_vm_instance_t {
+ iree_atomic_ref_count_t ref_count;
+ iree_allocator_t allocator;
+};
+
+IREE_API_EXPORT iree_status_t iree_vm_instance_create(
+ iree_allocator_t allocator, iree_vm_instance_t** out_instance) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(out_instance);
+ *out_instance = NULL;
+
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(z0, iree_vm_register_builtin_types());
+
+ iree_vm_instance_t* instance = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_allocator_malloc(allocator, sizeof(*instance), (void**)&instance));
+ instance->allocator = allocator;
+ iree_atomic_ref_count_init(&instance->ref_count);
+
+ *out_instance = instance;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+static void iree_vm_instance_destroy(iree_vm_instance_t* instance) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_ASSERT_ARGUMENT(instance);
+ iree_allocator_free(instance->allocator, instance);
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT void iree_vm_instance_retain(iree_vm_instance_t* instance) {
+ if (instance) {
+ iree_atomic_ref_count_inc(&instance->ref_count);
+ }
+}
+
+IREE_API_EXPORT void iree_vm_instance_release(iree_vm_instance_t* instance) {
+ if (instance && iree_atomic_ref_count_dec(&instance->ref_count) == 1) {
+ iree_vm_instance_destroy(instance);
+ }
+}
diff --git a/runtime/src/iree/vm/instance.h b/runtime/src/iree/vm/instance.h
new file mode 100644
index 0000000..e54e7bb
--- /dev/null
+++ b/runtime/src/iree/vm/instance.h
@@ -0,0 +1,46 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_INSTANCE_H_
+#define IREE_VM_INSTANCE_H_
+
+#include "iree/base/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Shared runtime instance responsible for routing iree_vm_context_ events,
+// enumerating and creating hardware device interfaces, and managing device
+// resource pools.
+//
+// A single runtime instance can service multiple contexts and hosting
+// applications should try to reuse instances as much as possible. This ensures
+// that resource allocation across contexts is handled and extraneous device
+// interaction is avoided. For devices that may have exclusive access
+// restrictions it is mandatory to share instances, so plan accordingly.
+//
+// Thread-safe.
+typedef struct iree_vm_instance_t iree_vm_instance_t;
+
+// Creates a new instance. This should be shared with all contexts in an
+// application to ensure that resources are tracked properly and threads are
+// managed correctly.
+// |out_instance| must be released by the caller.
+IREE_API_EXPORT iree_status_t iree_vm_instance_create(
+ iree_allocator_t allocator, iree_vm_instance_t** out_instance);
+
+// Retains the given |instance| for the caller.
+IREE_API_EXPORT void iree_vm_instance_retain(iree_vm_instance_t* instance);
+
+// Releases the given |instance| from the caller.
+IREE_API_EXPORT void iree_vm_instance_release(iree_vm_instance_t* instance);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_VM_INSTANCE_H_
diff --git a/runtime/src/iree/vm/invocation.c b/runtime/src/iree/vm/invocation.c
new file mode 100644
index 0000000..58e385c
--- /dev/null
+++ b/runtime/src/iree/vm/invocation.c
@@ -0,0 +1,226 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/invocation.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/vm/ref.h"
+#include "iree/vm/stack.h"
+#include "iree/vm/value.h"
+
+// Marshals caller arguments from the variant list to the ABI convention.
+static iree_status_t iree_vm_invoke_marshal_inputs(
+ iree_string_view_t cconv_arguments, iree_vm_list_t* inputs,
+ iree_byte_span_t arguments) {
+ // We are 1:1 right now with no variadic args, so do a quick verification on
+ // the input list.
+ iree_host_size_t expected_input_count =
+ cconv_arguments.size > 0
+ ? (cconv_arguments.data[0] == 'v' ? 0 : cconv_arguments.size)
+ : 0;
+ if (IREE_UNLIKELY(!inputs)) {
+ if (IREE_UNLIKELY(expected_input_count > 0)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "no input provided to a function that has inputs");
+ }
+ return iree_ok_status();
+ } else if (IREE_UNLIKELY(expected_input_count != iree_vm_list_size(inputs))) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "input list and function mismatch; expected %zu "
+ "arguments but passed %zu",
+ expected_input_count, iree_vm_list_size(inputs));
+ }
+
+ uint8_t* p = arguments.data;
+ for (iree_host_size_t cconv_i = 0, arg_i = 0; cconv_i < cconv_arguments.size;
+ ++cconv_i, ++arg_i) {
+ switch (cconv_arguments.data[cconv_i]) {
+ case IREE_VM_CCONV_TYPE_VOID:
+ break;
+ case IREE_VM_CCONV_TYPE_I32: {
+ iree_vm_value_t value;
+ IREE_RETURN_IF_ERROR(iree_vm_list_get_value_as(
+ inputs, arg_i, IREE_VM_VALUE_TYPE_I32, &value));
+ memcpy(p, &value.i32, sizeof(int32_t));
+ p += sizeof(int32_t);
+ } break;
+ case IREE_VM_CCONV_TYPE_I64: {
+ iree_vm_value_t value;
+ IREE_RETURN_IF_ERROR(iree_vm_list_get_value_as(
+ inputs, arg_i, IREE_VM_VALUE_TYPE_I64, &value));
+ memcpy(p, &value.i64, sizeof(int64_t));
+ p += sizeof(int64_t);
+ } break;
+ case IREE_VM_CCONV_TYPE_F32: {
+ iree_vm_value_t value;
+ IREE_RETURN_IF_ERROR(iree_vm_list_get_value_as(
+ inputs, arg_i, IREE_VM_VALUE_TYPE_F32, &value));
+ memcpy(p, &value.f32, sizeof(float));
+ p += sizeof(float);
+ } break;
+ case IREE_VM_CCONV_TYPE_F64: {
+ iree_vm_value_t value;
+ IREE_RETURN_IF_ERROR(iree_vm_list_get_value_as(
+ inputs, arg_i, IREE_VM_VALUE_TYPE_F64, &value));
+ memcpy(p, &value.f64, sizeof(double));
+ p += sizeof(double);
+ } break;
+ case IREE_VM_CCONV_TYPE_REF: {
+ // TODO(benvanik): see if we can't remove this retain by instead relying
+ // on the caller still owning the list.
+ IREE_RETURN_IF_ERROR(
+ iree_vm_list_get_ref_retain(inputs, arg_i, (iree_vm_ref_t*)p));
+ p += sizeof(iree_vm_ref_t);
+ } break;
+ }
+ }
+ return iree_ok_status();
+}
+
+// Marshals callee results from the ABI convention to the variant list.
+static iree_status_t iree_vm_invoke_marshal_outputs(
+ iree_string_view_t cconv_results, iree_byte_span_t results,
+ iree_vm_list_t* outputs) {
+ iree_host_size_t expected_output_count =
+ cconv_results.size > 0
+ ? (cconv_results.data[0] == 'v' ? 0 : cconv_results.size)
+ : 0;
+ if (IREE_UNLIKELY(!outputs)) {
+ if (IREE_UNLIKELY(expected_output_count > 0)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "no output provided to a function that has outputs");
+ }
+ return iree_ok_status();
+ }
+
+ // Resize the output list to hold all results (and kill anything that may
+ // have been in there).
+ IREE_RETURN_IF_ERROR(iree_vm_list_resize(outputs, 0));
+ IREE_RETURN_IF_ERROR(iree_vm_list_resize(outputs, expected_output_count));
+
+ uint8_t* p = results.data;
+ for (iree_host_size_t cconv_i = 0, arg_i = 0; cconv_i < cconv_results.size;
+ ++cconv_i, ++arg_i) {
+ switch (cconv_results.data[cconv_i]) {
+ case IREE_VM_CCONV_TYPE_VOID:
+ break;
+ case IREE_VM_CCONV_TYPE_I32: {
+ iree_vm_value_t value = iree_vm_value_make_i32(*(int32_t*)p);
+ IREE_RETURN_IF_ERROR(iree_vm_list_set_value(outputs, arg_i, &value));
+ p += sizeof(int32_t);
+ } break;
+ case IREE_VM_CCONV_TYPE_I64: {
+ iree_vm_value_t value = iree_vm_value_make_i64(*(int64_t*)p);
+ IREE_RETURN_IF_ERROR(iree_vm_list_set_value(outputs, arg_i, &value));
+ p += sizeof(int64_t);
+ } break;
+ case IREE_VM_CCONV_TYPE_F32: {
+ iree_vm_value_t value = iree_vm_value_make_f32(*(float*)p);
+ IREE_RETURN_IF_ERROR(iree_vm_list_set_value(outputs, arg_i, &value));
+ p += sizeof(float);
+ } break;
+ case IREE_VM_CCONV_TYPE_F64: {
+ iree_vm_value_t value = iree_vm_value_make_f64(*(double*)p);
+ IREE_RETURN_IF_ERROR(iree_vm_list_set_value(outputs, arg_i, &value));
+ p += sizeof(double);
+ } break;
+ case IREE_VM_CCONV_TYPE_REF: {
+ IREE_RETURN_IF_ERROR(
+ iree_vm_list_set_ref_move(outputs, arg_i, (iree_vm_ref_t*)p));
+ p += sizeof(iree_vm_ref_t);
+ } break;
+ }
+ }
+ return iree_ok_status();
+}
+
+// TODO(benvanik): implement this as an iree_vm_invocation_t sequence.
+static iree_status_t iree_vm_invoke_within(
+ iree_vm_context_t* context, iree_vm_stack_t* stack,
+ iree_vm_function_t function, const iree_vm_invocation_policy_t* policy,
+ iree_vm_list_t* inputs, iree_vm_list_t* outputs) {
+ IREE_ASSERT_ARGUMENT(context);
+ IREE_ASSERT_ARGUMENT(stack);
+
+ iree_vm_function_signature_t signature =
+ iree_vm_function_signature(&function);
+ iree_string_view_t cconv_arguments = iree_string_view_empty();
+ iree_string_view_t cconv_results = iree_string_view_empty();
+ IREE_RETURN_IF_ERROR(iree_vm_function_call_get_cconv_fragments(
+ &signature, &cconv_arguments, &cconv_results));
+
+ // Marshal the input arguments into the VM ABI and preallocate the result
+ // buffer.
+ // NOTE: today we don't support variadic arguments through this interface.
+ iree_byte_span_t arguments = iree_make_byte_span(NULL, 0);
+ IREE_RETURN_IF_ERROR(iree_vm_function_call_compute_cconv_fragment_size(
+ cconv_arguments, /*segment_size_list=*/NULL, &arguments.data_length));
+ arguments.data = iree_alloca(arguments.data_length);
+ memset(arguments.data, 0, arguments.data_length);
+ IREE_RETURN_IF_ERROR(
+ iree_vm_invoke_marshal_inputs(cconv_arguments, inputs, arguments));
+
+ // Allocate the result output that will be populated by the callee.
+ iree_byte_span_t results = iree_make_byte_span(NULL, 0);
+ IREE_RETURN_IF_ERROR(iree_vm_function_call_compute_cconv_fragment_size(
+ cconv_results, /*segment_size_list=*/NULL, &results.data_length));
+ results.data = iree_alloca(results.data_length);
+ memset(results.data, 0, results.data_length);
+
+ // Perform execution. Note that for synchronous execution we expect this to
+ // complete without yielding.
+ iree_vm_function_call_t call;
+ memset(&call, 0, sizeof(call));
+ call.function = function;
+ call.arguments = arguments;
+ call.results = results;
+ iree_vm_execution_result_t result;
+ iree_status_t status =
+ function.module->begin_call(function.module->self, stack, &call, &result);
+ if (!iree_status_is_ok(status)) {
+ iree_vm_function_call_release(&call, &signature);
+ return status;
+ }
+
+ // Read back the outputs from the result buffer.
+ IREE_RETURN_IF_ERROR(
+ iree_vm_invoke_marshal_outputs(cconv_results, results, outputs));
+
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_invoke(
+ iree_vm_context_t* context, iree_vm_function_t function,
+ iree_vm_invocation_flags_t flags, const iree_vm_invocation_policy_t* policy,
+ iree_vm_list_t* inputs, iree_vm_list_t* outputs,
+ iree_allocator_t allocator) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Force tracing if specified on the context.
+ if (iree_vm_context_flags(context) & IREE_VM_CONTEXT_FLAG_TRACE_EXECUTION) {
+ flags |= IREE_VM_INVOCATION_FLAG_TRACE_EXECUTION;
+ }
+
+ // Allocate a VM stack on the host stack and initialize it.
+ IREE_VM_INLINE_STACK_INITIALIZE(
+ stack, flags, iree_vm_context_state_resolver(context), allocator);
+ iree_status_t status =
+ iree_vm_invoke_within(context, stack, function, policy, inputs, outputs);
+ if (!iree_status_is_ok(status)) {
+ status = IREE_VM_STACK_ANNOTATE_BACKTRACE_IF_ENABLED(stack, status);
+ }
+ iree_vm_stack_deinitialize(stack);
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
diff --git a/runtime/src/iree/vm/invocation.h b/runtime/src/iree/vm/invocation.h
new file mode 100644
index 0000000..9de07b2
--- /dev/null
+++ b/runtime/src/iree/vm/invocation.h
@@ -0,0 +1,93 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// See iree/base/api.h for documentation on the API conventions used.
+
+#ifndef IREE_VM_INVOCATION_H_
+#define IREE_VM_INVOCATION_H_
+
+#include "iree/base/api.h"
+#include "iree/vm/context.h"
+#include "iree/vm/list.h"
+#include "iree/vm/module.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_vm_invocation_t iree_vm_invocation_t;
+typedef struct iree_vm_invocation_policy_t iree_vm_invocation_policy_t;
+
+// Synchronously invokes a function in the VM.
+//
+// |policy| is used to schedule the invocation relative to other pending or
+// in-flight invocations. It may be omitted to leave the behavior up to the
+// implementation.
+//
+// |inputs| is used to pass values and objects into the target function and must
+// match the signature defined by the compiled function. List ownership remains
+// with the caller.
+//
+// |outputs| is populated after the function completes execution with the
+// output values and objects of the function. List ownership remains with the
+// caller.
+IREE_API_EXPORT iree_status_t iree_vm_invoke(
+ iree_vm_context_t* context, iree_vm_function_t function,
+ iree_vm_invocation_flags_t flags, const iree_vm_invocation_policy_t* policy,
+ iree_vm_list_t* inputs, iree_vm_list_t* outputs,
+ iree_allocator_t allocator);
+
+// TODO(benvanik): document and implement.
+IREE_API_EXPORT iree_status_t iree_vm_invocation_create(
+ iree_vm_context_t* context, iree_vm_function_t function,
+ iree_vm_invocation_flags_t flags, const iree_vm_invocation_policy_t* policy,
+ const iree_vm_list_t* inputs, iree_allocator_t allocator,
+ iree_vm_invocation_t** out_invocation);
+
+// Retains the given |invocation| for the caller.
+IREE_API_EXPORT iree_status_t
+iree_vm_invocation_retain(iree_vm_invocation_t* invocation);
+
+// Releases the given |invocation| from the caller.
+IREE_API_EXPORT iree_status_t
+iree_vm_invocation_release(iree_vm_invocation_t* invocation);
+
+// Queries the completion status of the invocation.
+// Returns one of the following:
+// IREE_STATUS_OK: the invocation completed successfully.
+// IREE_STATUS_UNAVAILABLE: the invocation has not yet completed.
+// IREE_STATUS_CANCELLED: the invocation was cancelled internally.
+// IREE_STATUS_ABORTED: the invocation was aborted.
+// IREE_STATUS_*: an error occurred during invocation.
+IREE_API_EXPORT iree_status_t
+iree_vm_invocation_query_status(iree_vm_invocation_t* invocation);
+
+// Returns a reference to the output of the invocation.
+// The returned structure is valid for the lifetime of the invocation and
+// callers must retain any refs they want to outlive the invocation once
+// released.
+//
+// Returns NULL if the invocation did not complete successfully.
+IREE_API_EXPORT const iree_vm_list_t* iree_vm_invocation_output(
+ iree_vm_invocation_t* invocation);
+
+// Blocks the caller until the invocation completes (successfully or otherwise).
+//
+// Returns IREE_STATUS_DEADLINE_EXCEEDED if |deadline| elapses before the
+// invocation completes and otherwise returns iree_vm_invocation_query_status.
+IREE_API_EXPORT iree_status_t iree_vm_invocation_await(
+ iree_vm_invocation_t* invocation, iree_time_t deadline);
+
+// Attempts to abort the invocation if it is in-flight.
+// A no-op if the invocation has already completed.
+IREE_API_EXPORT iree_status_t
+iree_vm_invocation_abort(iree_vm_invocation_t* invocation);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_VM_INVOCATION_H_
diff --git a/runtime/src/iree/vm/list.c b/runtime/src/iree/vm/list.c
new file mode 100644
index 0000000..e9a47b1
--- /dev/null
+++ b/runtime/src/iree/vm/list.c
@@ -0,0 +1,707 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/list.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/tracing.h"
+
+static uint8_t iree_vm_value_type_size(iree_vm_value_type_t type) {
+ // Size of each iree_vm_value_type_t in bytes. We bitpack these so that we
+ // can do a simple shift and mask to get the size.
+ const uint32_t kValueTypeSizes = (0u << 0) | // IREE_VM_VALUE_TYPE_NONE
+ (1u << 4) | // IREE_VM_VALUE_TYPE_I8
+ (2u << 8) | // IREE_VM_VALUE_TYPE_I16
+ (4u << 12) | // IREE_VM_VALUE_TYPE_I32
+ (8u << 16) | // IREE_VM_VALUE_TYPE_I64
+ (4u << 20) | // IREE_VM_VALUE_TYPE_F32
+ (8u << 24) | // IREE_VM_VALUE_TYPE_F64
+ (0u << 28); // unused
+ return (kValueTypeSizes >> ((type & 0x7) * 4)) & 0xF;
+}
+
+// Defines how the iree_vm_list_t storage is allocated and what elements are
+// interpreted as.
+typedef enum iree_vm_list_storage_mode_e {
+ // Each element is a primitive value and stored as a dense array.
+ IREE_VM_LIST_STORAGE_MODE_VALUE = 0,
+ // Each element is an iree_vm_ref_t of some type.
+ IREE_VM_LIST_STORAGE_MODE_REF,
+ // Each element is a variant of any type (possibly all different).
+ IREE_VM_LIST_STORAGE_MODE_VARIANT,
+} iree_vm_list_storage_mode_t;
+
+// A list able to hold either flat primitive elements or ref values.
+struct iree_vm_list_t {
+ iree_vm_ref_object_t ref_object;
+ iree_allocator_t allocator;
+
+ // Current capacity of the list storage, in elements.
+ iree_host_size_t capacity;
+ // Current count of elements in the list.
+ iree_host_size_t count;
+
+ // Element type stored within the list.
+ iree_vm_type_def_t element_type;
+ // Size of each element in the storage in bytes.
+ iree_host_size_t element_size;
+
+ // Storage mode defining how the storage array is managed.
+ iree_vm_list_storage_mode_t storage_mode;
+ // A flat dense array of elements in the type defined by storage_mode.
+ // For certain storage modes, such as IREE_VM_STORAGE_MODE_REF, special
+ // lifetime management and cleanup logic is required.
+ void* storage;
+};
+
+static iree_vm_ref_type_descriptor_t iree_vm_list_descriptor = {0};
+
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_vm_list, iree_vm_list_t);
+
+static void iree_vm_list_reset_range(iree_vm_list_t* list,
+ iree_host_size_t offset,
+ iree_host_size_t length) {
+ switch (list->storage_mode) {
+ case IREE_VM_LIST_STORAGE_MODE_VALUE: {
+ void* base_ptr =
+ (void*)((uintptr_t)list->storage + offset * list->element_size);
+ memset(base_ptr, 0, length * list->element_size);
+ break;
+ }
+ case IREE_VM_LIST_STORAGE_MODE_REF: {
+ iree_vm_ref_t* ref_storage = (iree_vm_ref_t*)list->storage;
+ for (iree_host_size_t i = offset; i < offset + length; ++i) {
+ iree_vm_ref_release(&ref_storage[i]);
+ }
+ break;
+ }
+ case IREE_VM_LIST_STORAGE_MODE_VARIANT: {
+ iree_vm_variant_t* variant_storage = (iree_vm_variant_t*)list->storage;
+ for (iree_host_size_t i = offset; i < offset + length; ++i) {
+ if (iree_vm_type_def_is_ref(&variant_storage[i].type)) {
+ iree_vm_ref_release(&variant_storage[i].ref);
+ memset(&variant_storage[i].type, 0, sizeof(variant_storage[i].type));
+ } else {
+ memset(&variant_storage[i], 0, sizeof(variant_storage[i]));
+ }
+ }
+ break;
+ }
+ }
+}
+
+IREE_API_EXPORT iree_host_size_t iree_vm_list_storage_size(
+ const iree_vm_type_def_t* element_type, iree_host_size_t capacity) {
+ iree_host_size_t element_size = sizeof(iree_vm_variant_t);
+ if (element_type) {
+ if (iree_vm_type_def_is_value(element_type)) {
+ element_size = iree_vm_value_type_size(element_type->value_type);
+ } else if (iree_vm_type_def_is_ref(element_type)) {
+ element_size = sizeof(iree_vm_ref_t);
+ } else {
+ element_size = sizeof(iree_vm_variant_t);
+ }
+ }
+ return iree_host_align(sizeof(iree_vm_list_t), 8) +
+ iree_host_align(capacity * element_size, 8);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_initialize(
+ iree_byte_span_t storage, const iree_vm_type_def_t* element_type,
+ iree_host_size_t capacity, iree_vm_list_t** out_list) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_vm_list_storage_mode_t storage_mode = IREE_VM_LIST_STORAGE_MODE_VARIANT;
+ iree_host_size_t element_size = sizeof(iree_vm_variant_t);
+ if (element_type) {
+ if (iree_vm_type_def_is_value(element_type)) {
+ storage_mode = IREE_VM_LIST_STORAGE_MODE_VALUE;
+ element_size = iree_vm_value_type_size(element_type->value_type);
+ } else if (iree_vm_type_def_is_ref(element_type)) {
+ storage_mode = IREE_VM_LIST_STORAGE_MODE_REF;
+ element_size = sizeof(iree_vm_ref_t);
+ } else {
+ storage_mode = IREE_VM_LIST_STORAGE_MODE_VARIANT;
+ element_size = sizeof(iree_vm_variant_t);
+ }
+ }
+
+ iree_host_size_t storage_offset = iree_host_align(sizeof(iree_vm_list_t), 8);
+ iree_host_size_t required_storage_size =
+ storage_offset + iree_host_align(capacity * element_size, 8);
+ if (storage.data_length < required_storage_size) {
+ return iree_make_status(
+ IREE_STATUS_OUT_OF_RANGE,
+ "storage buffer underflow: provided=%zu < required=%zu",
+ storage.data_length, required_storage_size);
+ }
+ memset(storage.data, 0, required_storage_size);
+
+ iree_vm_list_t* list = (iree_vm_list_t*)storage.data;
+ iree_atomic_ref_count_init(&list->ref_object.counter);
+ if (element_type) {
+ list->element_type = *element_type;
+ }
+ list->element_size = element_size;
+ list->storage_mode = storage_mode;
+ list->capacity = capacity;
+ list->storage = storage.data + storage_offset;
+
+ *out_list = list;
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT void iree_vm_list_deinitialize(iree_vm_list_t* list) {
+ IREE_ASSERT_ARGUMENT(list);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_atomic_ref_count_abort_if_uses(&list->ref_object.counter);
+ iree_vm_list_reset_range(list, 0, list->count);
+ list->count = 0;
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_create(
+ const iree_vm_type_def_t* element_type, iree_host_size_t initial_capacity,
+ iree_allocator_t allocator, iree_vm_list_t** out_list) {
+ IREE_ASSERT_ARGUMENT(out_list);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_vm_list_t* list = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(allocator, sizeof(*list), (void**)&list));
+ memset(list, 0, sizeof(*list));
+ iree_atomic_ref_count_init(&list->ref_object.counter);
+ list->allocator = allocator;
+ if (element_type) {
+ list->element_type = *element_type;
+ }
+
+ if (iree_vm_type_def_is_value(&list->element_type) && element_type) {
+ list->storage_mode = IREE_VM_LIST_STORAGE_MODE_VALUE;
+ list->element_size = iree_vm_value_type_size(element_type->value_type);
+ } else if (iree_vm_type_def_is_ref(&list->element_type)) {
+ list->storage_mode = IREE_VM_LIST_STORAGE_MODE_REF;
+ list->element_size = sizeof(iree_vm_ref_t);
+ } else {
+ list->storage_mode = IREE_VM_LIST_STORAGE_MODE_VARIANT;
+ list->element_size = sizeof(iree_vm_variant_t);
+ }
+
+ iree_status_t status = iree_vm_list_reserve(list, initial_capacity);
+
+ if (iree_status_is_ok(status)) {
+ *out_list = list;
+ } else {
+ iree_allocator_free(allocator, list);
+ }
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_vm_list_destroy(void* ptr) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_vm_list_t* list = (iree_vm_list_t*)ptr;
+ iree_vm_list_reset_range(list, 0, list->count);
+ iree_allocator_free(list->allocator, list->storage);
+ iree_allocator_free(list->allocator, list);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT void iree_vm_list_retain(iree_vm_list_t* list) {
+ iree_vm_ref_object_retain(list, &iree_vm_list_descriptor);
+}
+
+IREE_API_EXPORT void iree_vm_list_release(iree_vm_list_t* list) {
+ iree_vm_ref_object_release(list, &iree_vm_list_descriptor);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_element_type(
+ const iree_vm_list_t* list, iree_vm_type_def_t* out_element_type) {
+ *out_element_type = list->element_type;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_host_size_t
+iree_vm_list_capacity(const iree_vm_list_t* list) {
+ return list->capacity;
+}
+
+IREE_API_EXPORT iree_status_t
+iree_vm_list_reserve(iree_vm_list_t* list, iree_host_size_t minimum_capacity) {
+ if (list->capacity >= minimum_capacity) {
+ return iree_ok_status();
+ }
+ iree_host_size_t old_capacity = list->capacity;
+ iree_host_size_t new_capacity = iree_host_align(minimum_capacity, 64);
+ IREE_RETURN_IF_ERROR(iree_allocator_realloc(
+ list->allocator, new_capacity * list->element_size, &list->storage));
+ memset((void*)((uintptr_t)list->storage + old_capacity * list->element_size),
+ 0, (new_capacity - old_capacity) * list->element_size);
+ list->capacity = new_capacity;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_host_size_t iree_vm_list_size(const iree_vm_list_t* list) {
+ return list->count;
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_resize(iree_vm_list_t* list,
+ iree_host_size_t new_size) {
+ if (new_size == list->count) {
+ return iree_ok_status();
+ } else if (new_size < list->count) {
+ // Truncating.
+ iree_vm_list_reset_range(list, new_size, list->count - new_size);
+ list->count = new_size;
+ } else if (new_size > list->capacity) {
+ // Extending beyond capacity.
+ IREE_RETURN_IF_ERROR(iree_vm_list_reserve(
+ list, iree_max(list->capacity * 2, iree_host_align(new_size, 64))));
+ }
+ list->count = new_size;
+ return iree_ok_status();
+}
+
+static void iree_vm_list_convert_value_type(
+ const iree_vm_value_t* source_value, iree_vm_value_type_t target_value_type,
+ iree_vm_value_t* out_value) {
+ if (target_value_type == source_value->type) {
+ memcpy(out_value, source_value, sizeof(*out_value));
+ return;
+ }
+ out_value->type = target_value_type;
+ out_value->i64 = 0;
+ switch (source_value->type) {
+ default:
+ return;
+ case IREE_VM_VALUE_TYPE_I8:
+ switch (target_value_type) {
+ case IREE_VM_VALUE_TYPE_I16:
+ out_value->i16 = (int16_t)source_value->i8;
+ return;
+ case IREE_VM_VALUE_TYPE_I32:
+ out_value->i32 = (int32_t)source_value->i8;
+ return;
+ case IREE_VM_VALUE_TYPE_I64:
+ out_value->i64 = (int64_t)source_value->i8;
+ return;
+ default:
+ return;
+ }
+ case IREE_VM_VALUE_TYPE_I16:
+ switch (target_value_type) {
+ case IREE_VM_VALUE_TYPE_I8:
+ out_value->i8 = (int8_t)source_value->i16;
+ return;
+ case IREE_VM_VALUE_TYPE_I32:
+ out_value->i32 = (int32_t)source_value->i16;
+ return;
+ case IREE_VM_VALUE_TYPE_I64:
+ out_value->i64 = (int64_t)source_value->i16;
+ return;
+ default:
+ return;
+ }
+ case IREE_VM_VALUE_TYPE_I32:
+ switch (target_value_type) {
+ case IREE_VM_VALUE_TYPE_I8:
+ out_value->i8 = (int8_t)source_value->i32;
+ return;
+ case IREE_VM_VALUE_TYPE_I16:
+ out_value->i16 = (int16_t)source_value->i32;
+ return;
+ case IREE_VM_VALUE_TYPE_I64:
+ out_value->i64 = (int64_t)source_value->i32;
+ return;
+ default:
+ return;
+ }
+ case IREE_VM_VALUE_TYPE_I64:
+ switch (target_value_type) {
+ case IREE_VM_VALUE_TYPE_I8:
+ out_value->i8 = (int8_t)source_value->i64;
+ return;
+ case IREE_VM_VALUE_TYPE_I16:
+ out_value->i16 = (int16_t)source_value->i64;
+ return;
+ case IREE_VM_VALUE_TYPE_I32:
+ out_value->i32 = (int32_t)source_value->i64;
+ return;
+ default:
+ return;
+ }
+ }
+}
+
+IREE_API_EXPORT iree_status_t
+iree_vm_list_get_value(const iree_vm_list_t* list, iree_host_size_t i,
+ iree_vm_value_t* out_value) {
+ if (i >= list->count) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "index %zu out of bounds (%zu)", i, list->count);
+ }
+ uintptr_t element_ptr = (uintptr_t)list->storage + i * list->element_size;
+ memset(out_value, 0, sizeof(*out_value));
+ switch (list->storage_mode) {
+ case IREE_VM_LIST_STORAGE_MODE_VALUE: {
+ out_value->type = list->element_type.value_type;
+ // TODO(benvanik): #ifdef on LITTLE/BIG_ENDIAN and just memcpy.
+ switch (list->element_size) {
+ case 1:
+ out_value->i8 = *(int8_t*)element_ptr;
+ break;
+ case 2:
+ out_value->i16 = *(int16_t*)element_ptr;
+ break;
+ case 4:
+ out_value->i32 = *(int32_t*)element_ptr;
+ break;
+ case 8:
+ out_value->i64 = *(int64_t*)element_ptr;
+ break;
+ }
+ break;
+ }
+ case IREE_VM_LIST_STORAGE_MODE_VARIANT: {
+ iree_vm_variant_t* variant = (iree_vm_variant_t*)element_ptr;
+ if (!iree_vm_type_def_is_value(&variant->type)) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "variant at index %zu is not a value type", i);
+ }
+ out_value->type = variant->type.value_type;
+ memcpy(out_value->value_storage, variant->value_storage,
+ sizeof(out_value->value_storage));
+ break;
+ }
+ default:
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION);
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_get_value_as(
+ const iree_vm_list_t* list, iree_host_size_t i,
+ iree_vm_value_type_t value_type, iree_vm_value_t* out_value) {
+ if (i >= list->count) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "index %zu out of bounds (%zu)", i, list->count);
+ }
+ uintptr_t element_ptr = (uintptr_t)list->storage + i * list->element_size;
+ iree_vm_value_t value;
+ value.i64 = 0;
+ switch (list->storage_mode) {
+ case IREE_VM_LIST_STORAGE_MODE_VALUE: {
+ value.type = list->element_type.value_type;
+ // TODO(benvanik): #ifdef on LITTLE/BIG_ENDIAN and just memcpy.
+ switch (list->element_size) {
+ case 1:
+ value.i8 = *(int8_t*)element_ptr;
+ break;
+ case 2:
+ value.i16 = *(int16_t*)element_ptr;
+ break;
+ case 4:
+ value.i32 = *(int32_t*)element_ptr;
+ break;
+ case 8:
+ value.i64 = *(int64_t*)element_ptr;
+ break;
+ }
+ break;
+ }
+ case IREE_VM_LIST_STORAGE_MODE_VARIANT: {
+ iree_vm_variant_t* variant = (iree_vm_variant_t*)element_ptr;
+ if (!iree_vm_type_def_is_value(&variant->type)) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "variant at index %zu is not a value type", i);
+ }
+ value.type = variant->type.value_type;
+ memcpy(value.value_storage, variant->value_storage,
+ sizeof(value.value_storage));
+ break;
+ }
+ default:
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "list does not store values");
+ }
+ iree_vm_list_convert_value_type(&value, value_type, out_value);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_set_value(
+ iree_vm_list_t* list, iree_host_size_t i, const iree_vm_value_t* value) {
+ if (i >= list->count) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "index %zu out of bounds (%zu)", i, list->count);
+ }
+ iree_vm_value_type_t target_type;
+ switch (list->storage_mode) {
+ case IREE_VM_LIST_STORAGE_MODE_VALUE: {
+ target_type = list->element_type.value_type;
+ break;
+ }
+ case IREE_VM_LIST_STORAGE_MODE_VARIANT: {
+ target_type = value->type;
+ break;
+ }
+ default:
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "list cannot store values");
+ }
+ iree_vm_value_t converted_value;
+ iree_vm_list_convert_value_type(value, target_type, &converted_value);
+ uintptr_t element_ptr = (uintptr_t)list->storage + i * list->element_size;
+ switch (list->storage_mode) {
+ case IREE_VM_LIST_STORAGE_MODE_VALUE: {
+ // TODO(benvanik): #ifdef on LITTLE/BIG_ENDIAN and just memcpy.
+ switch (list->element_size) {
+ case 1:
+ *(int8_t*)element_ptr = converted_value.i8;
+ break;
+ case 2:
+ *(int16_t*)element_ptr = converted_value.i16;
+ break;
+ case 4:
+ *(int32_t*)element_ptr = converted_value.i32;
+ break;
+ case 8:
+ *(int64_t*)element_ptr = converted_value.i64;
+ break;
+ }
+ break;
+ }
+ case IREE_VM_LIST_STORAGE_MODE_VARIANT: {
+ iree_vm_variant_t* variant = (iree_vm_variant_t*)element_ptr;
+ if (variant->type.ref_type) {
+ iree_vm_ref_release(&variant->ref);
+ }
+ variant->type.value_type = target_type;
+ variant->type.ref_type = IREE_VM_REF_TYPE_NULL;
+ memcpy(variant->value_storage, converted_value.value_storage,
+ sizeof(variant->value_storage));
+ break;
+ }
+ default:
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "list cannot store values");
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t
+iree_vm_list_push_value(iree_vm_list_t* list, const iree_vm_value_t* value) {
+ iree_host_size_t i = iree_vm_list_size(list);
+ IREE_RETURN_IF_ERROR(iree_vm_list_resize(list, i + 1));
+ return iree_vm_list_set_value(list, i, value);
+}
+
+IREE_API_EXPORT void* iree_vm_list_get_ref_deref(
+ const iree_vm_list_t* list, iree_host_size_t i,
+ const iree_vm_ref_type_descriptor_t* type_descriptor) {
+ iree_vm_ref_t value = {0};
+ iree_status_t status = iree_vm_list_get_ref_assign(list, i, &value);
+ if (!iree_status_is_ok(iree_status_consume_code(status))) {
+ return NULL;
+ }
+ status = iree_vm_ref_check(value, type_descriptor->type);
+ if (!iree_status_is_ok(iree_status_consume_code(status))) {
+ return NULL;
+ }
+ return value.ptr;
+}
+
+// Gets a ref type |list| element at |i| and stores it into |out_value|.
+// If |is_retain|=true then the reference count is incremented and otherwise
+// the ref type is assigned directly (as with iree_vm_ref_assign).
+static iree_status_t iree_vm_list_get_ref_assign_or_retain(
+ const iree_vm_list_t* list, iree_host_size_t i, bool is_retain,
+ iree_vm_ref_t* out_value) {
+ if (i >= list->count) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "index %zu out of bounds (%zu)", i, list->count);
+ }
+ uintptr_t element_ptr = (uintptr_t)list->storage + i * list->element_size;
+ switch (list->storage_mode) {
+ case IREE_VM_LIST_STORAGE_MODE_REF: {
+ iree_vm_ref_t* element_ref = (iree_vm_ref_t*)element_ptr;
+ is_retain ? iree_vm_ref_retain(element_ref, out_value)
+ : iree_vm_ref_assign(element_ref, out_value);
+ break;
+ }
+ case IREE_VM_LIST_STORAGE_MODE_VARIANT: {
+ iree_vm_variant_t* variant = (iree_vm_variant_t*)element_ptr;
+ if (!iree_vm_type_def_is_ref(&variant->type)) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION);
+ }
+ is_retain ? iree_vm_ref_retain(&variant->ref, out_value)
+ : iree_vm_ref_assign(&variant->ref, out_value);
+ break;
+ }
+ default:
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "list does not store refs");
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_get_ref_assign(
+ const iree_vm_list_t* list, iree_host_size_t i, iree_vm_ref_t* out_value) {
+ return iree_vm_list_get_ref_assign_or_retain(list, i, /*is_retain=*/false,
+ out_value);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_get_ref_retain(
+ const iree_vm_list_t* list, iree_host_size_t i, iree_vm_ref_t* out_value) {
+ return iree_vm_list_get_ref_assign_or_retain(list, i, /*is_retain=*/true,
+ out_value);
+}
+
+static iree_status_t iree_vm_list_set_ref(iree_vm_list_t* list,
+ iree_host_size_t i, bool is_move,
+ iree_vm_ref_t* value) {
+ if (i >= list->count) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "index %zu out of bounds (%zu)", i, list->count);
+ }
+ uintptr_t element_ptr = (uintptr_t)list->storage + i * list->element_size;
+ switch (list->storage_mode) {
+ case IREE_VM_LIST_STORAGE_MODE_REF: {
+ iree_vm_ref_t* element_ref = (iree_vm_ref_t*)element_ptr;
+ IREE_RETURN_IF_ERROR(iree_vm_ref_retain_or_move_checked(
+ is_move, value, list->element_type.ref_type, element_ref));
+ break;
+ }
+ case IREE_VM_LIST_STORAGE_MODE_VARIANT: {
+ iree_vm_variant_t* variant = (iree_vm_variant_t*)element_ptr;
+ if (variant->type.value_type) {
+ memset(&variant->ref, 0, sizeof(variant->ref));
+ }
+ variant->type.value_type = IREE_VM_VALUE_TYPE_NONE;
+ variant->type.ref_type = value->type;
+ iree_vm_ref_retain_or_move(is_move, value, &variant->ref);
+ break;
+ }
+ default:
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "list cannot store refs");
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_set_ref_retain(
+ iree_vm_list_t* list, iree_host_size_t i, const iree_vm_ref_t* value) {
+ return iree_vm_list_set_ref(list, i, /*is_move=*/false,
+ (iree_vm_ref_t*)value);
+}
+
+IREE_API_EXPORT iree_status_t
+iree_vm_list_push_ref_retain(iree_vm_list_t* list, const iree_vm_ref_t* value) {
+ iree_host_size_t i = iree_vm_list_size(list);
+ IREE_RETURN_IF_ERROR(iree_vm_list_resize(list, i + 1));
+ return iree_vm_list_set_ref_retain(list, i, value);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_set_ref_move(iree_vm_list_t* list,
+ iree_host_size_t i,
+ iree_vm_ref_t* value) {
+ return iree_vm_list_set_ref(list, i, /*is_move=*/true, value);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_push_ref_move(iree_vm_list_t* list,
+ iree_vm_ref_t* value) {
+ iree_host_size_t i = iree_vm_list_size(list);
+ IREE_RETURN_IF_ERROR(iree_vm_list_resize(list, i + 1));
+ return iree_vm_list_set_ref_move(list, i, value);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_pop_front_ref_move(
+ iree_vm_list_t* list, iree_vm_ref_t* out_value) {
+ iree_host_size_t list_size = iree_vm_list_size(list);
+ if (list_size == 0) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "cannot pop from an empty list");
+ }
+ IREE_RETURN_IF_ERROR(iree_vm_list_get_ref_assign(list, 0, out_value));
+ memmove(list->storage, (uint8_t*)list->storage + list->element_size,
+ (list_size - 1) * list->element_size);
+ --list->count;
+ memset((uint8_t*)list->storage + list->count * list->element_size, 0,
+ list->element_size);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t
+iree_vm_list_get_variant(const iree_vm_list_t* list, iree_host_size_t i,
+ iree_vm_variant_t* out_value) {
+ if (i >= list->count) {
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "index %zu out of bounds (%zu)", i, list->count);
+ }
+ uintptr_t element_ptr = (uintptr_t)list->storage + i * list->element_size;
+ switch (list->storage_mode) {
+ case IREE_VM_LIST_STORAGE_MODE_VALUE: {
+ out_value->type = list->element_type;
+ memcpy(out_value->value_storage, (void*)element_ptr, list->element_size);
+ break;
+ }
+ case IREE_VM_LIST_STORAGE_MODE_REF: {
+ iree_vm_ref_t* element_ref = (iree_vm_ref_t*)element_ptr;
+ out_value->type.ref_type = element_ref->type;
+ out_value->type.value_type = IREE_VM_VALUE_TYPE_NONE;
+ iree_vm_ref_retain(element_ref, &out_value->ref);
+ break;
+ }
+ case IREE_VM_LIST_STORAGE_MODE_VARIANT: {
+ iree_vm_variant_t* variant = (iree_vm_variant_t*)element_ptr;
+ out_value->type = variant->type;
+ if (iree_vm_type_def_is_ref(&variant->type)) {
+ iree_vm_ref_assign(&variant->ref, &out_value->ref);
+ } else {
+ memcpy(out_value->value_storage, variant->value_storage,
+ sizeof(variant->value_storage));
+ }
+ break;
+ }
+ default:
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION);
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_set_variant(
+ iree_vm_list_t* list, iree_host_size_t i, const iree_vm_variant_t* value) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "iree_vm_list_set_variant unimplemented");
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_list_push_variant(
+ iree_vm_list_t* list, const iree_vm_variant_t* value) {
+ iree_host_size_t i = iree_vm_list_size(list);
+ IREE_RETURN_IF_ERROR(iree_vm_list_resize(list, i + 1));
+ return iree_vm_list_set_variant(list, i, value);
+}
+
+iree_status_t iree_vm_list_register_types(void) {
+ if (iree_vm_list_descriptor.type != IREE_VM_REF_TYPE_NULL) {
+ // Already registered.
+ return iree_ok_status();
+ }
+
+ iree_vm_list_descriptor.destroy = iree_vm_list_destroy;
+ iree_vm_list_descriptor.offsetof_counter =
+ offsetof(iree_vm_list_t, ref_object.counter);
+ iree_vm_list_descriptor.type_name = iree_make_cstring_view("vm.list");
+ return iree_vm_ref_register_type(&iree_vm_list_descriptor);
+}
diff --git a/runtime/src/iree/vm/list.h b/runtime/src/iree/vm/list.h
new file mode 100644
index 0000000..bded73b
--- /dev/null
+++ b/runtime/src/iree/vm/list.h
@@ -0,0 +1,193 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_LIST_H_
+#define IREE_VM_LIST_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/vm/ref.h"
+#include "iree/vm/type_def.h"
+#include "iree/vm/value.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// A growable list that can hold primitive value types or ref objects or a mix.
+// This maps to the `!vm.list<...>` type in the VM IR and is designed to enable
+// flexible interop between hosting applications using the VM C API to invoke IR
+// and custom modules that need to pass arbitrary list-like data across the VM
+// ABI. It is not designed for efficiency: if you are performing large amounts
+// of work on the list type you should instead be representing that using the
+// HAL types so that you can get acceleration.
+//
+// This type the same performance characteristics as std::vector; pushes may
+// grow the capacity of the list and to ensure minimal wastage it is always
+// better to reserve the exact desired element count first.
+typedef struct iree_vm_list_t iree_vm_list_t;
+
+// Returns the size in bytes required to store a list with the given element
+// type and capacity. This storage size can be used to stack allocate or reserve
+// memory that is then used by iree_vm_list_initialize to avoid dynamic
+// allocations.
+IREE_API_EXPORT iree_host_size_t iree_vm_list_storage_size(
+ const iree_vm_type_def_t* element_type, iree_host_size_t capacity);
+
+// Initializes a statically-allocated list in the |storage| memory.
+// The storage capacity must be large enough to hold the list internals and
+// its contents which may vary across compilers/platforms/etc; use
+// iree_vm_list_storage_size to query the required capacity.
+//
+// Statically-allocated lists have their lifetime controlled by the caller and
+// must be deinitialized with iree_vm_list_deinitialize only when there are no
+// more users of the list.
+IREE_API_EXPORT iree_status_t iree_vm_list_initialize(
+ iree_byte_span_t storage, const iree_vm_type_def_t* element_type,
+ iree_host_size_t capacity, iree_vm_list_t** out_list);
+
+// Deinitializes a statically-allocated |list| previously initialized with
+// iree_vm_list_initialize. Aborts if there are still references remaining.
+IREE_API_EXPORT void iree_vm_list_deinitialize(iree_vm_list_t* list);
+
+// Creates a growable list containing the given |element_type|, which may either
+// be a primitive iree_vm_value_type_t value (like i32) or a ref type. When
+// storing ref types the list may either store a specific iree_vm_ref_type_t
+// and ensure that all elements set match the type or IREE_VM_REF_TYPE_ANY to
+// indicate that any ref type is allowed.
+//
+// |element_type| can be set to iree_vm_type_def_make_variant_type (or null) to
+// indicate that the list stores variants (each element can differ in type).
+IREE_API_EXPORT iree_status_t iree_vm_list_create(
+ const iree_vm_type_def_t* element_type, iree_host_size_t initial_capacity,
+ iree_allocator_t allocator, iree_vm_list_t** out_list);
+
+// Retains the given |list| for the caller.
+IREE_API_EXPORT void iree_vm_list_retain(iree_vm_list_t* list);
+
+// Releases the given |list| from the caller.
+IREE_API_EXPORT void iree_vm_list_release(iree_vm_list_t* list);
+
+// Returns the element type stored in the list.
+IREE_API_EXPORT iree_status_t iree_vm_list_element_type(
+ const iree_vm_list_t* list, iree_vm_type_def_t* out_element_type);
+
+// Returns the capacity of the list in elements.
+IREE_API_EXPORT iree_host_size_t
+iree_vm_list_capacity(const iree_vm_list_t* list);
+
+// Reserves storage for at least minimum_capacity elements. If the list already
+// has at least the specified capacity the operation is ignored.
+IREE_API_EXPORT iree_status_t
+iree_vm_list_reserve(iree_vm_list_t* list, iree_host_size_t minimum_capacity);
+
+// Returns the current size of the list in elements.
+IREE_API_EXPORT iree_host_size_t iree_vm_list_size(const iree_vm_list_t* list);
+
+// Resizes the list to contain new_size elements. This will either truncate
+// the list if the existing size is greater than new_size or extend the list
+// with the default list value of 0 if storing primitives, null if refs, or
+// empty if variants.
+IREE_API_EXPORT iree_status_t iree_vm_list_resize(iree_vm_list_t* list,
+ iree_host_size_t new_size);
+
+// Returns the value of the element at the given index.
+// Note that the value type may vary from element to element in variant lists
+// and callers should check the |out_value| type.
+IREE_API_EXPORT iree_status_t iree_vm_list_get_value(
+ const iree_vm_list_t* list, iree_host_size_t i, iree_vm_value_t* out_value);
+
+// Returns the value of the element at the given index. If the specified
+// |value_type| differs from the list storage type the value will be converted
+// using the value type semantics (such as sign/zero extend, etc).
+IREE_API_EXPORT iree_status_t iree_vm_list_get_value_as(
+ const iree_vm_list_t* list, iree_host_size_t i,
+ iree_vm_value_type_t value_type, iree_vm_value_t* out_value);
+
+// Sets the value of the element at the given index. If the specified |value|
+// type differs from the list storage type the value will be converted using the
+// value type semantics (such as sign/zero extend, etc).
+IREE_API_EXPORT iree_status_t iree_vm_list_set_value(
+ iree_vm_list_t* list, iree_host_size_t i, const iree_vm_value_t* value);
+
+// Pushes the value of the element to the end of the list.
+// If the specified |value| type differs from the list storage type the value
+// will be converted using the value type semantics (such as sign/zero extend,
+// etc).
+IREE_API_EXPORT iree_status_t
+iree_vm_list_push_value(iree_vm_list_t* list, const iree_vm_value_t* value);
+
+// Returns a dereferenced pointer to the given type if the element at the given
+// index matches the type. Returns NULL on error.
+IREE_API_EXPORT void* iree_vm_list_get_ref_deref(
+ const iree_vm_list_t* list, iree_host_size_t i,
+ const iree_vm_ref_type_descriptor_t* type_descriptor);
+
+// Returns the ref value of the element at the given index.
+// The ref will not be retained and must be retained by the caller to extend
+// its lifetime.
+IREE_API_EXPORT iree_status_t iree_vm_list_get_ref_assign(
+ const iree_vm_list_t* list, iree_host_size_t i, iree_vm_ref_t* out_value);
+
+// Returns the ref value of the element at the given index.
+// The ref will be retained and must be released by the caller.
+IREE_API_EXPORT iree_status_t iree_vm_list_get_ref_retain(
+ const iree_vm_list_t* list, iree_host_size_t i, iree_vm_ref_t* out_value);
+
+// Sets the ref value of the element at the given index, retaining a reference
+// in the list until the element is cleared or the list is disposed.
+IREE_API_EXPORT iree_status_t iree_vm_list_set_ref_retain(
+ iree_vm_list_t* list, iree_host_size_t i, const iree_vm_ref_t* value);
+
+// Pushes the ref value of the element to the end of the list, retaining a
+// reference in the list until the element is cleared or the list is disposed.
+IREE_API_EXPORT iree_status_t
+iree_vm_list_push_ref_retain(iree_vm_list_t* list, const iree_vm_ref_t* value);
+
+// Sets the ref value of the element at the given index, moving ownership of the
+// |value| reference to the list.
+IREE_API_EXPORT iree_status_t iree_vm_list_set_ref_move(iree_vm_list_t* list,
+ iree_host_size_t i,
+ iree_vm_ref_t* value);
+
+// Pushes the ref value of the element to the end of the list, moving ownership
+// of the |value| reference to the list.
+IREE_API_EXPORT iree_status_t iree_vm_list_push_ref_move(iree_vm_list_t* list,
+ iree_vm_ref_t* value);
+
+// Pops the front ref value from the list and transfers ownership to the caller.
+IREE_API_EXPORT iree_status_t
+iree_vm_list_pop_front_ref_move(iree_vm_list_t* list, iree_vm_ref_t* out_value);
+
+// Returns the value of the element at the given index. If the element contains
+// a ref it will *not* be retained and the caller must retain it to extend its
+// lifetime.
+IREE_API_EXPORT iree_status_t
+iree_vm_list_get_variant(const iree_vm_list_t* list, iree_host_size_t i,
+ iree_vm_variant_t* out_value);
+
+// Sets the value of the element at the given index. If the specified |value|
+// type differs from the list storage type the value will be converted using the
+// value type semantics (such as sign/zero extend, etc). If the variant is a ref
+// then it will be retained.
+IREE_API_EXPORT iree_status_t iree_vm_list_set_variant(
+ iree_vm_list_t* list, iree_host_size_t i, const iree_vm_variant_t* value);
+
+// Pushes the value of the element to the end of the list. If the specified
+// |value| type differs from the list storage type the value will be converted
+// using the value type semantics (such as sign/zero extend, etc). If the
+// variant is a ref then it will be retained.
+IREE_API_EXPORT iree_status_t
+iree_vm_list_push_variant(iree_vm_list_t* list, const iree_vm_variant_t* value);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_vm_list, iree_vm_list_t);
+
+#endif // IREE_VM_LIST_H_
diff --git a/runtime/src/iree/vm/list_test.cc b/runtime/src/iree/vm/list_test.cc
new file mode 100644
index 0000000..d1b43be
--- /dev/null
+++ b/runtime/src/iree/vm/list_test.cc
@@ -0,0 +1,460 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/list.h"
+
+#include <cstdint>
+#include <cstring>
+
+#include "iree/base/api.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+#include "iree/vm/builtin_types.h"
+#include "iree/vm/ref_cc.h"
+
+class A : public iree::vm::RefObject<A> {
+ public:
+ float data() const { return data_; }
+ void set_data(float value) { data_ = value; }
+
+ private:
+ float data_ = 1.0f;
+};
+static iree_vm_ref_type_descriptor_t test_a_descriptor = {0};
+IREE_VM_DECLARE_TYPE_ADAPTERS(test_a, A);
+IREE_VM_DEFINE_TYPE_ADAPTERS(test_a, A);
+
+class B : public iree::vm::RefObject<B> {
+ public:
+ int data() const { return data_; }
+ void set_data(int value) { data_ = value; }
+
+ private:
+ int data_ = 2;
+};
+static iree_vm_ref_type_descriptor_t test_b_descriptor = {0};
+IREE_VM_DECLARE_TYPE_ADAPTERS(test_b, B);
+IREE_VM_DEFINE_TYPE_ADAPTERS(test_b, B);
+
+namespace {
+
+using ::iree::Status;
+using ::iree::testing::status::StatusIs;
+
+template <typename T>
+static void RegisterRefType(iree_vm_ref_type_descriptor_t* descriptor,
+ const char* type_name) {
+ if (descriptor->type == IREE_VM_REF_TYPE_NULL) {
+ descriptor->type_name = iree_make_cstring_view(type_name);
+ descriptor->offsetof_counter = T::offsetof_counter();
+ descriptor->destroy = T::DirectDestroy;
+ IREE_CHECK_OK(iree_vm_ref_register_type(descriptor));
+ }
+}
+
+static void RegisterRefTypes() {
+ RegisterRefType<A>(&test_a_descriptor, "AType");
+ RegisterRefType<B>(&test_b_descriptor, "BType");
+}
+
+template <typename T, typename V>
+static iree_vm_ref_t MakeRef(V value) {
+ iree_vm_ref_t ref = {0};
+ auto* obj = new T();
+ obj->set_data(value);
+ IREE_CHECK_OK(iree_vm_ref_wrap_assign(
+ obj, iree::vm::ref_type_descriptor<T>::get()->type, &ref));
+ return ref;
+}
+
+class VMListTest : public ::testing::Test {
+ protected:
+ static void SetUpTestSuite() {
+ IREE_CHECK_OK(iree_vm_register_builtin_types());
+ RegisterRefTypes();
+ }
+};
+
+// Tests simple primitive value list usage, mainly just for demonstration.
+// Stores only i32 element types, equivalent to `!vm.list<i32>`.
+TEST_F(VMListTest, UsageI32) {
+ iree_vm_type_def_t element_type =
+ iree_vm_type_def_make_value_type(IREE_VM_VALUE_TYPE_I32);
+ iree_host_size_t initial_capacity = 123;
+ iree_vm_list_t* list = nullptr;
+ IREE_ASSERT_OK(iree_vm_list_create(&element_type, initial_capacity,
+ iree_allocator_system(), &list));
+
+ iree_vm_type_def_t queried_element_type;
+ IREE_ASSERT_OK(iree_vm_list_element_type(list, &queried_element_type));
+ EXPECT_TRUE(iree_vm_type_def_is_value(&queried_element_type));
+ EXPECT_EQ(0,
+ memcmp(&element_type, &queried_element_type, sizeof(element_type)));
+ EXPECT_LE(initial_capacity, iree_vm_list_capacity(list));
+ EXPECT_EQ(0, iree_vm_list_size(list));
+
+ IREE_ASSERT_OK(iree_vm_list_resize(list, 5));
+ EXPECT_EQ(5, iree_vm_list_size(list));
+
+ for (iree_host_size_t i = 0; i < 5; ++i) {
+ iree_vm_value_t value = iree_vm_value_make_i32((int32_t)i);
+ IREE_ASSERT_OK(iree_vm_list_set_value(list, i, &value));
+ }
+
+ for (iree_host_size_t i = 0; i < 5; ++i) {
+ iree_vm_value_t value;
+ IREE_ASSERT_OK(
+ iree_vm_list_get_value_as(list, i, IREE_VM_VALUE_TYPE_I32, &value));
+ EXPECT_EQ(IREE_VM_VALUE_TYPE_I32, value.type);
+ EXPECT_EQ(i, value.i32);
+ }
+
+ iree_vm_list_release(list);
+}
+
+// Tests simple ref object list usage, mainly just for demonstration.
+// Stores ref object type A elements only, equivalent to `!vm.list<!vm.ref<A>>`.
+TEST_F(VMListTest, UsageRef) {
+ iree_vm_type_def_t element_type =
+ iree_vm_type_def_make_ref_type(test_a_type_id());
+ iree_host_size_t initial_capacity = 123;
+ iree_vm_list_t* list = nullptr;
+ IREE_ASSERT_OK(iree_vm_list_create(&element_type, initial_capacity,
+ iree_allocator_system(), &list));
+
+ iree_vm_type_def_t queried_element_type;
+ IREE_ASSERT_OK(iree_vm_list_element_type(list, &queried_element_type));
+ EXPECT_TRUE(iree_vm_type_def_is_ref(&queried_element_type));
+ EXPECT_EQ(0,
+ memcmp(&element_type, &queried_element_type, sizeof(element_type)));
+ EXPECT_LE(initial_capacity, iree_vm_list_capacity(list));
+ EXPECT_EQ(0, iree_vm_list_size(list));
+
+ IREE_ASSERT_OK(iree_vm_list_resize(list, 5));
+ EXPECT_EQ(5, iree_vm_list_size(list));
+
+ for (iree_host_size_t i = 0; i < 5; ++i) {
+ iree_vm_ref_t ref_a = MakeRef<A>((float)i);
+ IREE_ASSERT_OK(iree_vm_list_set_ref_move(list, i, &ref_a));
+ }
+
+ for (iree_host_size_t i = 0; i < 5; ++i) {
+ iree_vm_ref_t ref_a{0};
+ IREE_ASSERT_OK(iree_vm_list_get_ref_retain(list, i, &ref_a));
+ EXPECT_TRUE(test_a_isa(ref_a));
+ auto* a = test_a_deref(ref_a);
+ EXPECT_EQ(i, a->data());
+ iree_vm_ref_release(&ref_a);
+ }
+
+ iree_vm_list_release(list);
+}
+
+// Tests simple variant list usage, mainly just for demonstration.
+// Stores any heterogeneous element type, equivalent to `!vm.list<?>`.
+TEST_F(VMListTest, UsageVariant) {
+ iree_vm_type_def_t element_type = iree_vm_type_def_make_variant_type();
+ iree_host_size_t initial_capacity = 123;
+ iree_vm_list_t* list = nullptr;
+ IREE_ASSERT_OK(iree_vm_list_create(&element_type, initial_capacity,
+ iree_allocator_system(), &list));
+
+ iree_vm_type_def_t queried_element_type;
+ IREE_ASSERT_OK(iree_vm_list_element_type(list, &queried_element_type));
+ EXPECT_TRUE(iree_vm_type_def_is_variant(&queried_element_type));
+ EXPECT_LE(initial_capacity, iree_vm_list_capacity(list));
+ EXPECT_EQ(0, iree_vm_list_size(list));
+
+ IREE_ASSERT_OK(iree_vm_list_resize(list, 10));
+ EXPECT_EQ(10, iree_vm_list_size(list));
+
+ for (iree_host_size_t i = 0; i < 5; ++i) {
+ iree_vm_value_t value = iree_vm_value_make_i32((int32_t)i);
+ IREE_ASSERT_OK(iree_vm_list_set_value(list, i, &value));
+ }
+ for (iree_host_size_t i = 5; i < 10; ++i) {
+ iree_vm_ref_t ref_a = MakeRef<A>(static_cast<float>(i));
+ IREE_ASSERT_OK(iree_vm_list_set_ref_move(list, i, &ref_a));
+ }
+
+ for (iree_host_size_t i = 0; i < 5; ++i) {
+ iree_vm_value_t value;
+ IREE_ASSERT_OK(
+ iree_vm_list_get_value_as(list, i, IREE_VM_VALUE_TYPE_I32, &value));
+ EXPECT_EQ(IREE_VM_VALUE_TYPE_I32, value.type);
+ EXPECT_EQ(i, value.i32);
+ }
+ for (iree_host_size_t i = 5; i < 10; ++i) {
+ iree_vm_ref_t ref_a{0};
+ IREE_ASSERT_OK(iree_vm_list_get_ref_retain(list, i, &ref_a));
+ EXPECT_TRUE(test_a_isa(ref_a));
+ auto* a = test_a_deref(ref_a);
+ EXPECT_EQ(i, a->data());
+ iree_vm_ref_release(&ref_a);
+ }
+
+ iree_vm_list_release(list);
+}
+
+// Tests capacity reservation.
+TEST_F(VMListTest, Reserve) {
+ // Allocate with 0 initial capacity (which may get rounded up).
+ iree_vm_type_def_t element_type = iree_vm_type_def_make_variant_type();
+ iree_host_size_t initial_capacity = 0;
+ iree_vm_list_t* list = nullptr;
+ IREE_ASSERT_OK(iree_vm_list_create(&element_type, initial_capacity,
+ iree_allocator_system(), &list));
+ EXPECT_LE(initial_capacity, iree_vm_list_capacity(list));
+ EXPECT_EQ(0, iree_vm_list_size(list));
+
+ // Reserve some capacity, which may allocate.
+ IREE_ASSERT_OK(iree_vm_list_reserve(list, 100));
+ iree_host_size_t current_capacity = iree_vm_list_capacity(list);
+ EXPECT_LE(100, current_capacity);
+
+ // Resize to add items, which should not change capacity.
+ IREE_ASSERT_OK(iree_vm_list_resize(list, 1));
+ EXPECT_EQ(1, iree_vm_list_size(list));
+ EXPECT_EQ(current_capacity, iree_vm_list_capacity(list));
+
+ // Reserving <= the current capacity should be a no-op.
+ IREE_ASSERT_OK(iree_vm_list_reserve(list, current_capacity));
+ EXPECT_EQ(current_capacity, iree_vm_list_capacity(list));
+
+ iree_vm_list_release(list);
+}
+
+// Tests the behavior of resize for truncation and extension on primitives.
+TEST_F(VMListTest, ResizeI32) {
+ iree_vm_type_def_t element_type =
+ iree_vm_type_def_make_value_type(IREE_VM_VALUE_TYPE_I32);
+ iree_host_size_t initial_capacity = 4;
+ iree_vm_list_t* list = nullptr;
+ IREE_ASSERT_OK(iree_vm_list_create(&element_type, initial_capacity,
+ iree_allocator_system(), &list));
+ EXPECT_LE(initial_capacity, iree_vm_list_capacity(list));
+ EXPECT_EQ(0, iree_vm_list_size(list));
+
+ // Extend and zero-initialize.
+ IREE_ASSERT_OK(iree_vm_list_resize(list, 5));
+ for (iree_host_size_t i = 0; i < 5; ++i) {
+ iree_vm_value_t value;
+ IREE_ASSERT_OK(
+ iree_vm_list_get_value_as(list, i, IREE_VM_VALUE_TYPE_I32, &value));
+ EXPECT_EQ(0, value.i32);
+ }
+
+ // Overwrite with [0, 5).
+ for (iree_host_size_t i = 0; i < 5; ++i) {
+ iree_vm_value_t value = iree_vm_value_make_i32((int32_t)i);
+ IREE_ASSERT_OK(iree_vm_list_set_value(list, i, &value));
+ }
+
+ // Truncate to [0, 2) and then extend again.
+ // This ensures that we test the primitive clearing path during cleanup:
+ // [int, int, int, int, int]
+ // |___________| <- truncation region
+ IREE_ASSERT_OK(iree_vm_list_resize(list, 2));
+ IREE_ASSERT_OK(iree_vm_list_resize(list, 5));
+
+ // Ensure that elements 2+ are zeroed after having been reset while 0 and 1
+ // are still valid as before.
+ for (iree_host_size_t i = 0; i < 2; ++i) {
+ iree_vm_value_t value;
+ IREE_ASSERT_OK(
+ iree_vm_list_get_value_as(list, i, IREE_VM_VALUE_TYPE_I32, &value));
+ EXPECT_EQ(i, value.i32);
+ }
+ for (iree_host_size_t i = 2; i < 5; ++i) {
+ iree_vm_value_t value;
+ IREE_ASSERT_OK(
+ iree_vm_list_get_value_as(list, i, IREE_VM_VALUE_TYPE_I32, &value));
+ EXPECT_EQ(0, value.i32);
+ }
+
+ iree_vm_list_release(list);
+}
+
+// Tests the behavior of resize for truncation and extension on refs.
+TEST_F(VMListTest, ResizeRef) {
+ iree_vm_type_def_t element_type =
+ iree_vm_type_def_make_ref_type(test_a_type_id());
+ iree_host_size_t initial_capacity = 4;
+ iree_vm_list_t* list = nullptr;
+ IREE_ASSERT_OK(iree_vm_list_create(&element_type, initial_capacity,
+ iree_allocator_system(), &list));
+ EXPECT_LE(initial_capacity, iree_vm_list_capacity(list));
+ EXPECT_EQ(0, iree_vm_list_size(list));
+
+ // Extend and zero-initialize.
+ IREE_ASSERT_OK(iree_vm_list_resize(list, 5));
+ for (iree_host_size_t i = 0; i < 5; ++i) {
+ iree_vm_ref_t ref_a{0};
+ IREE_ASSERT_OK(iree_vm_list_get_ref_assign(list, i, &ref_a));
+ EXPECT_TRUE(iree_vm_ref_is_null(&ref_a));
+ }
+
+ // Overwrite with [0, 5).
+ for (iree_host_size_t i = 0; i < 5; ++i) {
+ iree_vm_ref_t ref_a = MakeRef<A>((float)i);
+ IREE_ASSERT_OK(iree_vm_list_set_ref_move(list, i, &ref_a));
+ }
+
+ // Truncate to [0, 2) and then extend again.
+ // This ensures that we test the ref path during cleanup:
+ // [ref, ref, ref, ref, ref]
+ // |___________| <- truncation region
+ IREE_ASSERT_OK(iree_vm_list_resize(list, 2));
+ IREE_ASSERT_OK(iree_vm_list_resize(list, 5));
+
+ // Ensure that elements 2+ are reset after having been reset while 0 and 1
+ // are still valid as before.
+ for (iree_host_size_t i = 0; i < 2; ++i) {
+ iree_vm_ref_t ref_a{0};
+ IREE_ASSERT_OK(iree_vm_list_get_ref_retain(list, i, &ref_a));
+ EXPECT_TRUE(test_a_isa(ref_a));
+ auto* a = test_a_deref(ref_a);
+ EXPECT_EQ(i, a->data());
+ iree_vm_ref_release(&ref_a);
+ }
+ for (iree_host_size_t i = 2; i < 5; ++i) {
+ iree_vm_ref_t ref_a{0};
+ IREE_ASSERT_OK(iree_vm_list_get_ref_assign(list, i, &ref_a));
+ EXPECT_TRUE(iree_vm_ref_is_null(&ref_a));
+ }
+
+ iree_vm_list_release(list);
+}
+
+// Tests the behavior of resize for truncation and extension on variants.
+TEST_F(VMListTest, ResizeVariant) {
+ iree_vm_type_def_t element_type = iree_vm_type_def_make_variant_type();
+ iree_host_size_t initial_capacity = 4;
+ iree_vm_list_t* list = nullptr;
+ IREE_ASSERT_OK(iree_vm_list_create(&element_type, initial_capacity,
+ iree_allocator_system(), &list));
+ EXPECT_LE(initial_capacity, iree_vm_list_capacity(list));
+ EXPECT_EQ(0, iree_vm_list_size(list));
+
+ // Extend and zero-initialize.
+ IREE_ASSERT_OK(iree_vm_list_resize(list, 5));
+ for (iree_host_size_t i = 0; i < 5; ++i) {
+ iree_vm_variant_t value = iree_vm_variant_empty();
+ IREE_ASSERT_OK(iree_vm_list_get_variant(list, i, &value));
+ EXPECT_TRUE(iree_vm_variant_is_empty(value));
+ }
+
+ // Overwrite with [0, 5) in mixed types.
+ for (iree_host_size_t i = 0; i < 4; ++i) {
+ iree_vm_ref_t ref_a = MakeRef<A>((float)i);
+ IREE_ASSERT_OK(iree_vm_list_set_ref_move(list, i, &ref_a));
+ }
+ for (iree_host_size_t i = 4; i < 5; ++i) {
+ iree_vm_value_t value = iree_vm_value_make_i32((int32_t)i);
+ IREE_ASSERT_OK(iree_vm_list_set_value(list, i, &value));
+ }
+
+ // Truncate to [0, 2) and then extend again.
+ // This ensures that we test the variant path during cleanup:
+ // [ref, ref, ref, ref, int]
+ // |___________| <- truncation region
+ IREE_ASSERT_OK(iree_vm_list_resize(list, 2));
+ IREE_ASSERT_OK(iree_vm_list_resize(list, 5));
+
+ // Ensure that elements 2+ are reset after having been reset while 0 and 1
+ // are still valid as before.
+ for (iree_host_size_t i = 0; i < 2; ++i) {
+ iree_vm_ref_t ref_a{0};
+ IREE_ASSERT_OK(iree_vm_list_get_ref_retain(list, i, &ref_a));
+ EXPECT_TRUE(test_a_isa(ref_a));
+ auto* a = test_a_deref(ref_a);
+ EXPECT_EQ(i, a->data());
+ iree_vm_ref_release(&ref_a);
+ }
+ for (iree_host_size_t i = 2; i < 5; ++i) {
+ iree_vm_variant_t value = iree_vm_variant_empty();
+ IREE_ASSERT_OK(iree_vm_list_get_variant(list, i, &value));
+ EXPECT_TRUE(iree_vm_variant_is_empty(value));
+ }
+
+ iree_vm_list_release(list);
+}
+
+// TODO(benvanik): test value get/set.
+
+// TODO(benvanik): test value conversion.
+
+// TODO(benvanik): test ref get/set.
+
+// Tests pushing and popping ref objects.
+TEST_F(VMListTest, PushPopRef) {
+ iree_vm_type_def_t element_type =
+ iree_vm_type_def_make_ref_type(test_a_type_id());
+ iree_host_size_t initial_capacity = 4;
+ iree_vm_list_t* list = nullptr;
+ IREE_ASSERT_OK(iree_vm_list_create(&element_type, initial_capacity,
+ iree_allocator_system(), &list));
+ EXPECT_LE(initial_capacity, iree_vm_list_capacity(list));
+ EXPECT_EQ(0, iree_vm_list_size(list));
+
+ // Pops when empty fail.
+ iree_vm_ref_t empty_ref{0};
+ EXPECT_THAT(Status(iree_vm_list_pop_front_ref_move(list, &empty_ref)),
+ StatusIs(iree::StatusCode::kOutOfRange));
+
+ // Push back [0, 5).
+ for (iree_host_size_t i = 0; i < 5; ++i) {
+ iree_vm_ref_t ref_a = MakeRef<A>((float)i);
+ IREE_ASSERT_OK(iree_vm_list_push_ref_move(list, &ref_a));
+ }
+
+ // Pop the first two [0, 1] and leave [2, 5).
+ // This ensures that we test the ref path during cleanup:
+ // [ref, ref, ref, ref, ref]
+ // |______| <- popped region
+ for (iree_host_size_t i = 0; i < 2; ++i) {
+ iree_vm_ref_t ref_a{0};
+ IREE_ASSERT_OK(iree_vm_list_pop_front_ref_move(list, &ref_a));
+ EXPECT_TRUE(test_a_isa(ref_a));
+ auto* a = test_a_deref(ref_a);
+ EXPECT_EQ(i, a->data());
+ iree_vm_ref_release(&ref_a);
+ }
+
+ // Ensure that elements 2+ are valid but now at offset 0.
+ for (iree_host_size_t i = 2; i < 5; ++i) {
+ iree_vm_ref_t ref_a{0};
+ IREE_ASSERT_OK(iree_vm_list_get_ref_retain(list, i - 2, &ref_a));
+ EXPECT_TRUE(test_a_isa(ref_a));
+ auto* a = test_a_deref(ref_a);
+ EXPECT_EQ(i, a->data());
+ iree_vm_ref_release(&ref_a);
+ }
+
+ // Push back two more to get [2, 7).
+ for (iree_host_size_t i = 5; i < 7; ++i) {
+ iree_vm_ref_t ref_a = MakeRef<A>((float)i);
+ IREE_ASSERT_OK(iree_vm_list_push_ref_move(list, &ref_a));
+ }
+
+ // Ensure the new elements got added to the end.
+ for (iree_host_size_t i = 2; i < 7; ++i) {
+ iree_vm_ref_t ref_a{0};
+ IREE_ASSERT_OK(iree_vm_list_get_ref_retain(list, i - 2, &ref_a));
+ EXPECT_TRUE(test_a_isa(ref_a));
+ auto* a = test_a_deref(ref_a);
+ EXPECT_EQ(i, a->data());
+ iree_vm_ref_release(&ref_a);
+ }
+
+ iree_vm_list_release(list);
+}
+
+// TODO(benvanik): test primitive variant get/set.
+
+// TODO(benvanik): test ref variant get/set.
+
+} // namespace
diff --git a/runtime/src/iree/vm/module.c b/runtime/src/iree/vm/module.c
new file mode 100644
index 0000000..144d200
--- /dev/null
+++ b/runtime/src/iree/vm/module.c
@@ -0,0 +1,347 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/module.h"
+
+#include <string.h>
+
+#include "iree/base/internal/atomics.h"
+#include "iree/base/tracing.h"
+#include "iree/vm/ref.h"
+#include "iree/vm/stack.h"
+
+IREE_API_EXPORT iree_status_t iree_vm_function_call_get_cconv_fragments(
+ const iree_vm_function_signature_t* signature,
+ iree_string_view_t* out_arguments, iree_string_view_t* out_results) {
+ memset(out_arguments, 0, sizeof(*out_arguments));
+ memset(out_results, 0, sizeof(*out_results));
+ iree_string_view_t cconv = signature->calling_convention;
+ if (!cconv.size) {
+ // No cconv string, so function is `()->()`.
+ return iree_ok_status();
+ } else if (cconv.data[0] != '0') {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unsupported cconv version %c", cconv.data[0]);
+ }
+ iree_string_view_t cconv_body = iree_string_view_substr(cconv, 1, INTPTR_MAX);
+ if (iree_string_view_split(cconv_body, '_', out_arguments, out_results) ==
+ -1) {
+ *out_arguments = cconv_body;
+ }
+ return iree_ok_status();
+}
+
+static iree_status_t iree_vm_function_call_count_fragment_values(
+ iree_string_view_t cconv_fragment, iree_host_size_t* out_count) {
+ IREE_ASSERT_ARGUMENT(out_count);
+ *out_count = 0;
+ iree_host_size_t count = 0;
+ for (iree_host_size_t i = 0; i < cconv_fragment.size; ++i) {
+ switch (cconv_fragment.data[i]) {
+ case IREE_VM_CCONV_TYPE_VOID:
+ break;
+ case IREE_VM_CCONV_TYPE_I32:
+ case IREE_VM_CCONV_TYPE_F32:
+ case IREE_VM_CCONV_TYPE_I64:
+ case IREE_VM_CCONV_TYPE_F64:
+ case IREE_VM_CCONV_TYPE_REF:
+ ++count;
+ break;
+ case IREE_VM_CCONV_TYPE_SPAN_START: {
+ for (i = i + 1; i < cconv_fragment.size &&
+ cconv_fragment.data[i] != IREE_VM_CCONV_TYPE_SPAN_END;
+ ++i) {
+ switch (cconv_fragment.data[i]) {
+ case IREE_VM_CCONV_TYPE_VOID:
+ break;
+ case IREE_VM_CCONV_TYPE_I32:
+ case IREE_VM_CCONV_TYPE_F32:
+ case IREE_VM_CCONV_TYPE_I64:
+ case IREE_VM_CCONV_TYPE_F64:
+ case IREE_VM_CCONV_TYPE_REF:
+ ++count;
+ break;
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unsupported cconv span type %c",
+ cconv_fragment.data[i]);
+ }
+ }
+ } break;
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unsupported cconv type %c",
+ cconv_fragment.data[i]);
+ }
+ }
+ *out_count = count;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_function_call_count_arguments_and_results(
+ const iree_vm_function_signature_t* signature,
+ iree_host_size_t* out_argument_count, iree_host_size_t* out_result_count) {
+ IREE_ASSERT_ARGUMENT(signature);
+ IREE_ASSERT_ARGUMENT(out_argument_count);
+ IREE_ASSERT_ARGUMENT(out_result_count);
+ *out_argument_count = 0;
+ *out_result_count = 0;
+ iree_string_view_t arguments, results;
+ IREE_RETURN_IF_ERROR(iree_vm_function_call_get_cconv_fragments(
+ signature, &arguments, &results));
+ IREE_RETURN_IF_ERROR(iree_vm_function_call_count_fragment_values(
+ arguments, out_argument_count));
+ IREE_RETURN_IF_ERROR(
+ iree_vm_function_call_count_fragment_values(results, out_result_count));
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT bool iree_vm_function_call_is_variadic_cconv(
+ iree_string_view_t cconv) {
+ return iree_string_view_find_char(cconv, IREE_VM_CCONV_TYPE_SPAN_START, 0) !=
+ IREE_STRING_VIEW_NPOS;
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_function_call_compute_cconv_fragment_size(
+ iree_string_view_t cconv_fragment,
+ const iree_vm_register_list_t* segment_size_list,
+ iree_host_size_t* out_required_size) {
+ iree_host_size_t required_size = 0;
+ for (iree_host_size_t i = 0, seg_i = 0; i < cconv_fragment.size;
+ ++i, ++seg_i) {
+ switch (cconv_fragment.data[i]) {
+ case IREE_VM_CCONV_TYPE_VOID:
+ break;
+ case IREE_VM_CCONV_TYPE_I32:
+ case IREE_VM_CCONV_TYPE_F32:
+ required_size += sizeof(int32_t);
+ break;
+ case IREE_VM_CCONV_TYPE_I64:
+ case IREE_VM_CCONV_TYPE_F64:
+ required_size += sizeof(int64_t);
+ break;
+ case IREE_VM_CCONV_TYPE_REF:
+ required_size += sizeof(iree_vm_ref_t);
+ break;
+ case IREE_VM_CCONV_TYPE_SPAN_START: {
+ if (IREE_UNLIKELY(!segment_size_list) ||
+ IREE_UNLIKELY(seg_i >= segment_size_list->size)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "variadic argument found but segment size "
+ "list is missing/underflowed");
+ }
+ iree_host_size_t span_count = segment_size_list->registers[seg_i];
+ required_size += sizeof(int32_t); // count
+ iree_host_size_t span_size = 0;
+ for (i = i + 1; i < cconv_fragment.size &&
+ cconv_fragment.data[i] != IREE_VM_CCONV_TYPE_SPAN_END;
+ ++i) {
+ switch (cconv_fragment.data[i]) {
+ case IREE_VM_CCONV_TYPE_VOID:
+ break;
+ case IREE_VM_CCONV_TYPE_I32:
+ case IREE_VM_CCONV_TYPE_F32:
+ span_size += sizeof(int32_t);
+ break;
+ case IREE_VM_CCONV_TYPE_I64:
+ case IREE_VM_CCONV_TYPE_F64:
+ span_size += sizeof(int64_t);
+ break;
+ case IREE_VM_CCONV_TYPE_REF:
+ span_size += sizeof(iree_vm_ref_t);
+ break;
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unsupported cconv span type %c",
+ cconv_fragment.data[i]);
+ }
+ }
+ required_size += span_size * span_count;
+ } break;
+ default:
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "unsupported cconv type %c",
+ cconv_fragment.data[i]);
+ }
+ }
+ *out_required_size = required_size;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT void iree_vm_function_call_release(
+ iree_vm_function_call_t* call,
+ const iree_vm_function_signature_t* signature) {
+ if (!call->arguments.data_length || !call->results.data_length) {
+ return;
+ }
+ iree_string_view_t cconv = signature->calling_convention;
+ if (cconv.size == 0 || cconv.data[0] != '0') return;
+ uint8_t* p = call->arguments.data;
+ for (iree_host_size_t i = 1; i < cconv.size; ++i) {
+ char c = cconv.data[i];
+ if (c == '_') {
+ // Switch to results.
+ p = call->results.data;
+ }
+ switch (c) {
+ case IREE_VM_CCONV_TYPE_VOID:
+ break;
+ case IREE_VM_CCONV_TYPE_I32:
+ case IREE_VM_CCONV_TYPE_F32:
+ p += sizeof(int32_t);
+ break;
+ case IREE_VM_CCONV_TYPE_I64:
+ case IREE_VM_CCONV_TYPE_F64:
+ p += sizeof(int64_t);
+ break;
+ case IREE_VM_CCONV_TYPE_REF:
+ iree_vm_ref_release((iree_vm_ref_t*)p);
+ p += sizeof(iree_vm_ref_t);
+ break;
+ }
+ }
+}
+
+IREE_API_EXPORT iree_status_t
+iree_vm_module_initialize(iree_vm_module_t* module, void* self) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ memset(module, 0, sizeof(iree_vm_module_t));
+ module->self = self;
+ iree_atomic_ref_count_init(&module->ref_count);
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT void iree_vm_module_retain(iree_vm_module_t* module) {
+ if (module) {
+ iree_atomic_ref_count_inc(&module->ref_count);
+ }
+}
+
+IREE_API_EXPORT void iree_vm_module_release(iree_vm_module_t* module) {
+ if (module && iree_atomic_ref_count_dec(&module->ref_count) == 1) {
+ module->destroy(module->self);
+ }
+}
+
+IREE_API_EXPORT iree_string_view_t
+iree_vm_module_name(const iree_vm_module_t* module) {
+ if (!module) {
+ return iree_make_cstring_view("null");
+ }
+ return module->name(module->self);
+}
+
+IREE_API_EXPORT iree_vm_module_signature_t
+iree_vm_module_signature(const iree_vm_module_t* module) {
+ if (!module) {
+ iree_vm_module_signature_t empty;
+ memset(&empty, 0, sizeof(empty));
+ return empty;
+ }
+ return module->signature(module->self);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_module_lookup_function_by_name(
+ const iree_vm_module_t* module, iree_vm_function_linkage_t linkage,
+ iree_string_view_t name, iree_vm_function_t* out_function) {
+ return module->lookup_function(module->self, linkage, name, out_function);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_module_lookup_function_by_ordinal(
+ const iree_vm_module_t* module, iree_vm_function_linkage_t linkage,
+ iree_host_size_t ordinal, iree_vm_function_t* out_function) {
+ return module->get_function(module->self, linkage, ordinal, out_function,
+ /*out_name=*/NULL,
+ /*out_signature=*/NULL);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_module_resolve_source_location(
+ const iree_vm_module_t* module, iree_vm_stack_frame_t* frame,
+ iree_vm_source_location_t* out_source_location) {
+ IREE_ASSERT_ARGUMENT(module);
+ IREE_ASSERT_ARGUMENT(frame);
+ IREE_ASSERT_ARGUMENT(out_source_location);
+ memset(out_source_location, 0, sizeof(*out_source_location));
+ if (module->resolve_source_location) {
+ return module->resolve_source_location(module->self, frame,
+ out_source_location);
+ }
+ return iree_status_from_code(IREE_STATUS_UNAVAILABLE);
+}
+
+IREE_API_EXPORT iree_status_t
+iree_vm_source_location_format(iree_vm_source_location_t* source_location,
+ iree_vm_source_location_format_flags_t flags,
+ iree_string_builder_t* builder) {
+ IREE_ASSERT_ARGUMENT(builder);
+ if (!source_location || !source_location->format) {
+ return iree_status_from_code(IREE_STATUS_UNAVAILABLE);
+ }
+ return source_location->format(source_location->self, source_location->data,
+ flags, builder);
+}
+
+IREE_API_EXPORT iree_string_view_t
+iree_vm_function_name(const iree_vm_function_t* function) {
+ iree_string_view_t name;
+ iree_status_t status = function->module->get_function(
+ function->module->self, function->linkage, function->ordinal,
+ /*out_function=*/NULL,
+ /*out_name=*/&name,
+ /*out_signature=*/NULL);
+ if (!iree_status_is_ok(status)) {
+ iree_status_ignore(status);
+ return iree_make_cstring_view("<error>");
+ }
+ return name;
+}
+
+IREE_API_EXPORT iree_vm_function_signature_t
+iree_vm_function_signature(const iree_vm_function_t* function) {
+ iree_vm_function_signature_t signature;
+ memset(&signature, 0, sizeof(signature));
+ IREE_IGNORE_ERROR(function->module->get_function(
+ function->module->self, function->linkage, function->ordinal,
+ /*out_function=*/NULL,
+ /*out_name=*/NULL,
+ /*out_signature=*/&signature));
+ return signature;
+}
+
+IREE_API_EXPORT iree_string_view_t iree_vm_function_reflection_attr(
+ const iree_vm_function_t* function, iree_string_view_t key) {
+ iree_vm_module_t* module = function->module;
+ if (!module->get_function_reflection_attr) {
+ return iree_string_view_empty();
+ }
+ for (int index = 0;; ++index) {
+ iree_string_view_t index_key, index_value;
+ iree_status_t status = module->get_function_reflection_attr(
+ module->self, function->linkage, function->ordinal, index, &index_key,
+ &index_value);
+ if (!iree_status_is_ok(status)) {
+ iree_status_ignore(status);
+ break;
+ }
+ if (iree_string_view_equal(key, index_key)) {
+ return index_value;
+ }
+ }
+ return iree_string_view_empty();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_get_function_reflection_attr(
+ iree_vm_function_t function, iree_host_size_t index,
+ iree_string_view_t* key, iree_string_view_t* value) {
+ if (!function.module->get_function_reflection_attr) {
+ return iree_make_status(IREE_STATUS_NOT_FOUND,
+ "reflection not available for the given module");
+ }
+ return function.module->get_function_reflection_attr(
+ function.module->self, function.linkage, function.ordinal, index, key,
+ value);
+}
diff --git a/runtime/src/iree/vm/module.h b/runtime/src/iree/vm/module.h
new file mode 100644
index 0000000..55c858e
--- /dev/null
+++ b/runtime/src/iree/vm/module.h
@@ -0,0 +1,498 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_MODULE_H_
+#define IREE_VM_MODULE_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/alignment.h"
+#include "iree/base/api.h"
+#include "iree/base/internal/atomics.h"
+#include "iree/base/string_builder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef struct iree_vm_module_t iree_vm_module_t;
+typedef struct iree_vm_stack_t iree_vm_stack_t;
+typedef struct iree_vm_stack_frame_t iree_vm_stack_frame_t;
+
+//===----------------------------------------------------------------------===//
+// Module / function reflection
+//===----------------------------------------------------------------------===//
+
+// A key-value pair of module/function reflection information.
+typedef struct iree_vm_reflection_attr_t {
+ iree_string_view_t key;
+ iree_string_view_t value;
+} iree_vm_reflection_attr_t;
+
+// Describes the type of a function reference.
+typedef enum iree_vm_function_linkage_e {
+ // Function is internal to the module and may not be reflectable.
+ IREE_VM_FUNCTION_LINKAGE_INTERNAL = 0,
+ // Function is an import from another module.
+ IREE_VM_FUNCTION_LINKAGE_IMPORT = 1,
+ // Function is an export from the module.
+ IREE_VM_FUNCTION_LINKAGE_EXPORT = 2,
+ // Function is an import from another module that may be unavailable.
+ IREE_VM_FUNCTION_LINKAGE_IMPORT_OPTIONAL = 3,
+ // TODO(#1979): add linkage types for well-known functions like __init.
+} iree_vm_function_linkage_t;
+
+// A function reference that can be used with the iree_vm_function_* methods.
+// These should be treated as opaque and the accessor functions should be used
+// instead.
+//
+// The register counts specify required internal storage used for VM for stack
+// frame management and debugging. They must at least be able to contain all
+// entry arguments for the function. The counts may be omitted if the function
+// will not be referenced by a VM stack frame.
+typedef struct iree_vm_function_t {
+ // Module the function is contained within.
+ iree_vm_module_t* module;
+ // Linkage of the function. Note that IREE_VM_FUNCTION_LINKAGE_INTERNAL
+ // functions may be missing reflection information.
+ uint16_t linkage;
+ // Ordinal within the module in the linkage scope.
+ uint16_t ordinal;
+} iree_vm_function_t;
+static_assert(sizeof(iree_vm_function_t) <= 3 * sizeof(void*),
+ "Must remain small as stored on the stack");
+
+// Returns true if the |function| is null (didn't exist, etc).
+static inline bool iree_vm_function_is_null(iree_vm_function_t function) {
+ return function.module == NULL;
+}
+
+// Describes the expected calling convention and arguments/results of a
+// function.
+typedef struct iree_vm_function_signature_t {
+ // The VM calling convention declaration used to marshal arguments and
+ // results into and out of the function.
+ // Optional for imports and internal functions but required for exports.
+ //
+ // Format:
+ // - '0': version 0 prefix
+ // - Zero or more arguments:
+ // - 'i': int32_t integer (i32)
+ // - 'I': int64_t integer (i64)
+ // - 'r': ref-counted type pointer (!vm.ref<?>)
+ // - 'C' ... 'D': variadic list of flattened tuples of a specified type
+ // - EOL or '_'
+ // - Zero or more results:
+ // - 'i' or 'I'
+ // - 'r'
+ //
+ // Examples:
+ // `0` or `0_`: () -> ()
+ // `0i` or `0i_`: (i32) -> ()
+ // `0iiCiiD_i`: (i32, i32, tuple<i32, i32>...) -> i32
+ // `0irCirD_r`: (i32, !vm.ref<?>, tuple<i32, !vm.ref<?>>) -> !vm.ref<?>
+ //
+ // Users of this field must verify the version prefix in the first byte before
+ // using the declaration.
+ iree_string_view_t calling_convention;
+} iree_vm_function_signature_t;
+
+// Describes the imports, exports, and capabilities of a module.
+typedef struct iree_vm_module_signature_t {
+ // Total number of imported functions.
+ iree_host_size_t import_function_count;
+ // Total number of exported functions.
+ iree_host_size_t export_function_count;
+ // Total number of internal functions, if debugging info is present and they
+ // can be queried.
+ iree_host_size_t internal_function_count;
+} iree_vm_module_signature_t;
+
+// Internal storage for the module state.
+// Thread-compatible; it's expected that only one thread at a time is executing
+// VM functions and accessing this state.
+typedef struct iree_vm_module_state_t iree_vm_module_state_t;
+
+//===----------------------------------------------------------------------===//
+// Function calls and coroutines
+//===----------------------------------------------------------------------===//
+
+// A variable-length list of registers.
+//
+// This structure is an overlay for the bytecode that is serialized in a
+// matching format, though it can be stack allocated as needed.
+//
+// TODO(benvanik): this should be made private to the bytecode module, but is
+// used for toll-free variadic argument lists here. We could just define an
+// identical structure (and static_assert) to at least rename it to something
+// sensible (iree_vm_segment_size_list_t).
+typedef struct iree_vm_register_list_t {
+ uint16_t size;
+ uint16_t registers[];
+} iree_vm_register_list_t;
+static_assert(iree_alignof(iree_vm_register_list_t) == 2,
+ "expecting byte alignment (to avoid padding)");
+static_assert(offsetof(iree_vm_register_list_t, registers) == 2,
+ "expect no padding in the struct");
+
+// Function call data.
+//
+// Arguments and results are encoded following a standard format shared across
+// all module types. This allows implementations that have different storage
+// types (such as physical machine registers vs. virtual registers) to use the
+// same cross-module calling convention.
+//
+// Callees can assume that callers have properly allocated and setup the
+// argument and result buffers and need not verify them. This works only because
+// the calling convention format is directly queried from the callee module.
+//
+// Encoding:
+// - each int is encoded as a 4-byte aligned value
+// - each ref is encoded as a 4-byte aligned iree_vm_ref_t value
+// - variadic tuples are encoded as a 4-byte count prefix and the tuple values
+//
+// For example, (i32, tuple<!vm.ref<?>, i32>..., i32) is encoded as:
+// 4b: i32
+// 4b: tuple count
+// repeated:
+// 8b-16b: iree_vm_ref_t
+// 4b: i32
+// 4b: i32
+//
+// Example sequence:
+// 1. ModuleA wants to call SomeFunction from ModuleB
+// 2. ModuleA imports SomeFunction from ModuleB and gets its
+// iree_vm_function_signature_t during import resolution
+// 3. ModuleA checks that it understands/supports that calling convention
+// with error handling if needed (e.g. if ModuleB is newer and uses a newer
+// version that ModuleA wasn't compiled knowing about, or ModuleB is ancient
+// and uses a deprecated version that ModuleA has already dropped)
+// 4. ModuleA prepares argument and result buffers according to the calling
+// convention defined by ModuleB and calls SomeFunction
+// 5. ModuleB handles the call, trusting that the input and output buffers are
+// as expected
+//
+// NOTE: we could switch to using libffi, but I didn't want to require that for
+// all uses and didn't want to enable the issues that can arise when crossing
+// device boundaries. With what we have here we can rather easily serialize the
+// argument/result buffers and map them between independent address spaces.
+// Instead, implementing a native_module-alike of libffi_module would be a
+// better layering for callee modules.
+typedef struct iree_vm_function_call_t {
+ // Function to call.
+ iree_vm_function_t function;
+
+ // Argument buffer in the format described above.
+ // This is only read on beginning the function and need not live beyond that.
+ //
+ // Refs contained are retained by the caller and callees must retain them if
+ // they need them to live beyond the call.
+ iree_byte_span_t arguments;
+
+ // Storage for the result buffer; assumed undefined and then populated with
+ // data in a format described above. This is required for both the beginning
+ // of function invocation as well as each resume (as any may actually return
+ // control flow).
+ //
+ // Refs contained will be retained in the results buffer and callers must
+ // either move or release them upon return from the call.
+ iree_byte_span_t results;
+} iree_vm_function_call_t;
+
+#define IREE_VM_CCONV_TYPE_VOID 'v'
+#define IREE_VM_CCONV_TYPE_I32 'i'
+#define IREE_VM_CCONV_TYPE_I64 'I'
+#define IREE_VM_CCONV_TYPE_F32 'f'
+#define IREE_VM_CCONV_TYPE_F64 'F'
+#define IREE_VM_CCONV_TYPE_REF 'r'
+#define IREE_VM_CCONV_TYPE_SPAN_START 'C'
+#define IREE_VM_CCONV_TYPE_SPAN_END 'D'
+
+// Returns the arguments and results fragments from the function signature.
+// Either may be empty if they have no values.
+//
+// Example:
+// `` -> arguments = ``, results = ``
+// `0` -> arguments = ``, results = ``
+// `0v` -> arguments = ``, results = ``
+// `0ri` -> arguments = `ri`, results = ``
+// `0_ir` -> arguments = ``, results = `ir`
+// `0v_ir` -> arguments = ``, results = `ir`
+// `0iCiD_rr` -> arguments = `iCiD`, results = `rr`
+IREE_API_EXPORT iree_status_t iree_vm_function_call_get_cconv_fragments(
+ const iree_vm_function_signature_t* signature,
+ iree_string_view_t* out_arguments, iree_string_view_t* out_results);
+
+// Returns true if the given cconv contains one or more variadic types.
+IREE_API_EXPORT bool iree_vm_function_call_is_variadic_cconv(
+ iree_string_view_t cconv);
+
+// Counts the total number of arguments and results of a function.
+IREE_API_EXPORT iree_status_t iree_vm_function_call_count_arguments_and_results(
+ const iree_vm_function_signature_t* signature,
+ iree_host_size_t* out_argument_count, iree_host_size_t* out_result_count);
+
+// Returns the required size, in bytes, to store the data in the given cconv
+// fragment (like `iICriDr`).
+//
+// The provided |segment_size_list| is used for variadic arguments/results. Each
+// entry represents one of the top level arguments with spans being flattened.
+IREE_API_EXPORT iree_status_t iree_vm_function_call_compute_cconv_fragment_size(
+ iree_string_view_t cconv_fragment,
+ const iree_vm_register_list_t* segment_size_list,
+ iree_host_size_t* out_required_size);
+
+// Releases any retained refs within the call (either arguments or results).
+// This needs only be called if a call fails as implementations are required to
+// clean up the arguments as they are marshaled in and callers are required to
+// clean up the results as they are marshaled out.
+IREE_API_EXPORT void iree_vm_function_call_release(
+ iree_vm_function_call_t* call,
+ const iree_vm_function_signature_t* signature);
+
+// Results of an iree_vm_module_execute request.
+typedef struct iree_vm_execution_result_t {
+ // TODO(benvanik): yield information.
+ // Yield modes:
+ // - yield (yield instruction)
+ // - await (with 1+ wait handles)
+ // - break
+ int reserved;
+} iree_vm_execution_result_t;
+
+//===----------------------------------------------------------------------===//
+// Source locations
+//===----------------------------------------------------------------------===//
+
+// An opaque offset into a source map that a source resolver can calculate.
+// Do not assume that iree_vm_source_offset_t+1 means the next byte offset as
+// backends are free to treat these as everything from pointers to machine code
+// to hash codes.
+typedef int64_t iree_vm_source_offset_t;
+
+// Controls how source locations are formatted into strings.
+enum iree_vm_source_location_format_flag_bits_e {
+ IREE_VM_SOURCE_LOCATION_FORMAT_FLAG_NONE = 0u,
+ // Only formats a single line (excluding \n) for the source location, even
+ // if the full location information (such as a backtrace) is available.
+ IREE_VM_SOURCE_LOCATION_FORMAT_FLAG_SINGLE_LINE = 1u << 0,
+};
+typedef uint32_t iree_vm_source_location_format_flags_t;
+
+// Source location interface.
+typedef struct iree_vm_source_location_t {
+ IREE_API_UNSTABLE
+
+ // Implementation-specified fields. Do not use directly.
+ void* self;
+ uint64_t data[2];
+
+ iree_status_t(IREE_API_PTR* format)(
+ void* self, uint64_t data[2],
+ iree_vm_source_location_format_flags_t flags,
+ iree_string_builder_t* builder);
+} iree_vm_source_location_t;
+
+// Formats the |source_location| to its canonical string form.
+IREE_API_EXPORT iree_status_t
+iree_vm_source_location_format(iree_vm_source_location_t* source_location,
+ iree_vm_source_location_format_flags_t flags,
+ iree_string_builder_t* builder);
+
+//===----------------------------------------------------------------------===//
+// iree_vm_module_t
+//===----------------------------------------------------------------------===//
+
+// Indicates an event that can be signaled in modules from the hosting program.
+typedef enum iree_vm_signal_e {
+ // Program is resuming from a suspended state.
+ // Modules may reallocate memory for pools and caches.
+ //
+ // Modules are walked in registration order (A->B->C).
+ IREE_VM_SIGNAL_RESUME = 0,
+
+ // Program is entering a suspended state.
+ // Modules should drop any transient memory that is possible to reallocate
+ // upon resume.
+ //
+ // Modules are walked in reverse registration order (C->B->A).
+ IREE_VM_SIGNAL_SUSPEND = 1,
+
+ // Program has received a low memory alert.
+ // Modules must aggressively drop all possible memory even if expensive to
+ // rematerialize it. On some platforms this is sent as a threat that if
+ // sufficient memory is not unwired/freed ASAP the process will be killed.
+ //
+ // Modules are walked in reverse registration order (C->B->A).
+ IREE_VM_SIGNAL_LOW_MEMORY = 2,
+} iree_vm_signal_t;
+
+// Defines an interface that can be used to reflect and execute functions on a
+// module.
+//
+// Module implementations must be thread-safe as lookups and executions may
+// occur in any order from any thread.
+// TODO(benvanik): version this interface.
+typedef struct iree_vm_module_t {
+ IREE_API_UNSTABLE
+
+ void* self;
+ iree_atomic_ref_count_t ref_count;
+
+ // Destroys |self| when all references to the module have been released.
+ void(IREE_API_PTR* destroy)(void* self);
+
+ // Returns the name of the module (used during resolution).
+ iree_string_view_t(IREE_API_PTR* name)(void* self);
+
+ // Returns the reflected signature of the module.
+ iree_vm_module_signature_t(IREE_API_PTR* signature)(void* self);
+
+ // Gets one or more pieces of function information:
+ // - |out_function| set to the function reference.
+ // - |out_name| set to the function name.
+ // - |out_signature| set to the function signature.
+ iree_status_t(IREE_API_PTR* get_function)(
+ void* self, iree_vm_function_linkage_t linkage, iree_host_size_t ordinal,
+ iree_vm_function_t* out_function, iree_string_view_t* out_name,
+ iree_vm_function_signature_t* out_signature);
+
+ // Looks up a function with the given name and linkage in the module.
+ // This may perform a linear scan and results should be cached.
+ iree_status_t(IREE_API_PTR* lookup_function)(
+ void* self, iree_vm_function_linkage_t linkage, iree_string_view_t name,
+ iree_vm_function_t* out_function);
+
+ // Resolves a stack |frame| from the module to a |out_source_location|, if
+ // debug information is available.
+ iree_status_t(IREE_API_PTR* resolve_source_location)(
+ void* self, iree_vm_stack_frame_t* frame,
+ iree_vm_source_location_t* out_source_location);
+
+ // Allocates module state data.
+ iree_status_t(IREE_API_PTR* alloc_state)(
+ void* self, iree_allocator_t allocator,
+ iree_vm_module_state_t** out_module_state);
+
+ // Frees module state data.
+ void(IREE_API_PTR* free_state)(void* self,
+ iree_vm_module_state_t* module_state);
+
+ // Resolves the import with the given ordinal to |function|.
+ // The function is guaranteed to remain valid for the lifetime of the module
+ // state.
+ iree_status_t(IREE_API_PTR* resolve_import)(
+ void* self, iree_vm_module_state_t* module_state,
+ iree_host_size_t ordinal, const iree_vm_function_t* function,
+ const iree_vm_function_signature_t* signature);
+
+ // Notifies the module of a system signal.
+ iree_status_t(IREE_API_PTR* notify)(void* self,
+ iree_vm_module_state_t* module_state,
+ iree_vm_signal_t signal);
+
+ // Begins a function call with the given |call| arguments.
+ // Execution may yield in the case of asynchronous code and require one or
+ // more calls to the resume method to complete.
+ iree_status_t(IREE_API_PTR* begin_call)(
+ void* self, iree_vm_stack_t* stack, const iree_vm_function_call_t* call,
+ iree_vm_execution_result_t* out_result);
+
+ // Resumes execution of a previously-yielded call.
+ iree_status_t(IREE_API_PTR* resume_call)(
+ void* self, iree_vm_stack_t* stack,
+ iree_vm_execution_result_t* out_result);
+
+ // TODO(benvanik): move this/refactor.
+ // Gets a reflection attribute for a function by index.
+ // The returned key and value strings are guaranteed valid for the life
+ // of the module. Note that not all modules and functions have reflection
+ // attributes.
+ // Returns IREE_STATUS_NOT_FOUND if index >= the number of attributes for
+ // the function.
+ // See: docs/developers/design_docs/function_abi.md
+ iree_status_t(IREE_API_PTR* get_function_reflection_attr)(
+ void* self, iree_vm_function_linkage_t linkage, iree_host_size_t ordinal,
+ iree_host_size_t index, iree_string_view_t* key,
+ iree_string_view_t* value);
+} iree_vm_module_t;
+
+// Initializes the interface of a module handle.
+// This should be called by module implementations after they allocate
+// themselves to properly initialize the module interface prior to populating
+// interface function pointers. This ensures that version adaptation can be
+// performed by the library as needed.
+// TODO(benvanik): version/module size.
+IREE_API_EXPORT iree_status_t
+iree_vm_module_initialize(iree_vm_module_t* module, void* self);
+
+// Retains the given |module| for the caller.
+IREE_API_EXPORT void iree_vm_module_retain(iree_vm_module_t* module);
+
+// Releases the given |module| from the caller.
+IREE_API_EXPORT void iree_vm_module_release(iree_vm_module_t* module);
+
+// Returns the name of the module (used during resolution).
+IREE_API_EXPORT iree_string_view_t
+iree_vm_module_name(const iree_vm_module_t* module);
+
+// Returns the signature of the module describing the contents.
+IREE_API_EXPORT iree_vm_module_signature_t
+iree_vm_module_signature(const iree_vm_module_t* module);
+
+// Looks up a function with the given name and linkage in the |module|.
+// This may perform a linear scan and results should be cached.
+IREE_API_EXPORT iree_status_t iree_vm_module_lookup_function_by_name(
+ const iree_vm_module_t* module, iree_vm_function_linkage_t linkage,
+ iree_string_view_t name, iree_vm_function_t* out_function);
+
+// Looks up a function with the given ordinal and linkage in the |module|.
+IREE_API_EXPORT iree_status_t iree_vm_module_lookup_function_by_ordinal(
+ const iree_vm_module_t* module, iree_vm_function_linkage_t linkage,
+ iree_host_size_t ordinal, iree_vm_function_t* out_function);
+
+// Resolves a stack |frame| from the module to a |out_source_location|, if
+// debug information is available.
+IREE_API_EXPORT iree_status_t iree_vm_module_resolve_source_location(
+ const iree_vm_module_t* module, iree_vm_stack_frame_t* frame,
+ iree_vm_source_location_t* out_source_location);
+
+// Returns the name of the given function or empty string if not available.
+IREE_API_EXPORT iree_string_view_t
+iree_vm_function_name(const iree_vm_function_t* function);
+
+// Returns the signature of the function if reflection metadata is available.
+IREE_API_EXPORT iree_vm_function_signature_t
+iree_vm_function_signature(const iree_vm_function_t* function);
+
+// Returns a value for the given reflection attribute |key|, if found.
+// Returns the empty string if the reflection data in general or the specific
+// key is not found.
+//
+// See: docs/developers/design_docs/function_abi.md for documentation on the
+// ABI.
+IREE_API_EXPORT iree_string_view_t iree_vm_function_reflection_attr(
+ const iree_vm_function_t* function, iree_string_view_t key);
+
+// TODO(#1979): remove this and use iree_vm_function_reflection_attr.
+// Gets a reflection attribute for a function by index.
+// The returned key and value strings are guaranteed valid for the life
+// of the module. Note that not all modules and functions have reflection
+// attributes.
+// Returns IREE_STATUS_NOT_FOUND if index >= the number of attributes for
+// the function.
+// See: docs/developers/design_docs/function_abi.md
+IREE_API_EXPORT iree_status_t iree_vm_get_function_reflection_attr(
+ iree_vm_function_t function, iree_host_size_t index,
+ iree_string_view_t* key, iree_string_view_t* value);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_VM_MODULE_H_
diff --git a/runtime/src/iree/vm/module_impl_emitc.c b/runtime/src/iree/vm/module_impl_emitc.c
new file mode 100644
index 0000000..c03694c
--- /dev/null
+++ b/runtime/src/iree/vm/module_impl_emitc.c
@@ -0,0 +1,7 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include EMITC_IMPLEMENTATION
diff --git a/runtime/src/iree/vm/native_module.c b/runtime/src/iree/vm/native_module.c
new file mode 100644
index 0000000..eff076a
--- /dev/null
+++ b/runtime/src/iree/vm/native_module.c
@@ -0,0 +1,449 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/native_module.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/vm/stack.h"
+
+// Native module implementation allocated for all modules.
+typedef struct iree_vm_native_module_t {
+ // Interface containing default function pointers.
+ // base_interface.self will be the self pointer to iree_vm_native_module_t.
+ //
+ // Must be first in the struct as we dereference the interface to find our
+ // members below.
+ iree_vm_module_t base_interface;
+
+ // Interface with optional user-provided function pointers.
+ iree_vm_module_t user_interface;
+
+ // The self passed to user_interface functions. Will either be the value of
+ // user_interface.self when initialized and the base pointer of the base
+ // native module otherwise.
+ void* self;
+
+ // Allocator this module was allocated with and must be freed with.
+ iree_allocator_t allocator;
+
+ // Module descriptor used for reflection.
+ const iree_vm_native_module_descriptor_t* descriptor;
+} iree_vm_native_module_t;
+
+IREE_API_EXPORT iree_host_size_t iree_vm_native_module_size(void) {
+ return sizeof(iree_vm_native_module_t);
+}
+
+#if defined(NDEBUG)
+static iree_status_t iree_vm_native_module_verify_descriptor(
+ const iree_vm_native_module_descriptor_t* module_descriptor) {
+ return iree_ok_status();
+}
+#else
+static iree_status_t iree_vm_native_module_verify_descriptor(
+ const iree_vm_native_module_descriptor_t* module_descriptor) {
+ // Verify the export table is sorted by name. This will help catch issues with
+ // people appending to tables instead of inserting in the proper order.
+ for (iree_host_size_t i = 1; i < module_descriptor->export_count; ++i) {
+ iree_string_view_t prev_export_name =
+ module_descriptor->exports[i - 1].local_name;
+ iree_string_view_t export_name = module_descriptor->exports[i].local_name;
+ int cmp = iree_string_view_compare(prev_export_name, export_name);
+ if (IREE_UNLIKELY(cmp >= 0)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "module export table is not sorted by name "
+ "(export %zu ('%.*s') >= %zu ('%.*s'))",
+ i - 1, (int)prev_export_name.size,
+ prev_export_name.data, i, (int)export_name.size,
+ export_name.data);
+ }
+ }
+ return iree_ok_status();
+}
+#endif // NDEBUG
+
+static void IREE_API_PTR iree_vm_native_module_destroy(void* self) {
+ iree_vm_native_module_t* module = (iree_vm_native_module_t*)self;
+ iree_allocator_t allocator = module->allocator;
+
+ // Destroy the optional user-provided self.
+ if (module->user_interface.destroy) {
+ module->user_interface.destroy(module->self);
+ }
+
+ iree_allocator_free(allocator, module);
+}
+
+static iree_string_view_t IREE_API_PTR iree_vm_native_module_name(void* self) {
+ iree_vm_native_module_t* module = (iree_vm_native_module_t*)self;
+ if (module->user_interface.name) {
+ return module->user_interface.name(module->self);
+ }
+ return module->descriptor->module_name;
+}
+
+static iree_vm_module_signature_t IREE_API_PTR
+iree_vm_native_module_signature(void* self) {
+ iree_vm_native_module_t* module = (iree_vm_native_module_t*)self;
+ if (module->user_interface.signature) {
+ return module->user_interface.signature(module->self);
+ }
+ iree_vm_module_signature_t signature;
+ memset(&signature, 0, sizeof(signature));
+ signature.import_function_count = module->descriptor->import_count;
+ signature.export_function_count = module->descriptor->export_count;
+ signature.internal_function_count = 0; // unused
+ return signature;
+}
+
+static iree_status_t IREE_API_PTR iree_vm_native_module_get_import_function(
+ iree_vm_native_module_t* module, iree_host_size_t ordinal,
+ iree_vm_function_t* out_function, iree_string_view_t* out_name,
+ iree_vm_function_signature_t* out_signature) {
+ if (IREE_UNLIKELY(ordinal >= module->descriptor->import_count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "import ordinal out of range (0 < %zu < %zu)",
+ ordinal, module->descriptor->import_count);
+ }
+ const iree_vm_native_import_descriptor_t* import_descriptor =
+ &module->descriptor->imports[ordinal];
+ if (out_function) {
+ out_function->module = &module->base_interface;
+ out_function->linkage = iree_all_bits_set(import_descriptor->flags,
+ IREE_VM_NATIVE_IMPORT_OPTIONAL)
+ ? IREE_VM_FUNCTION_LINKAGE_IMPORT_OPTIONAL
+ : IREE_VM_FUNCTION_LINKAGE_IMPORT;
+ out_function->ordinal = (uint16_t)ordinal;
+ }
+ if (out_name) {
+ *out_name = import_descriptor->full_name;
+ }
+ // TODO(#1979): signature queries when info is useful.
+ return iree_ok_status();
+}
+
+static iree_status_t IREE_API_PTR iree_vm_native_module_get_export_function(
+ iree_vm_native_module_t* module, iree_host_size_t ordinal,
+ iree_vm_function_t* out_function, iree_string_view_t* out_name,
+ iree_vm_function_signature_t* out_signature) {
+ if (IREE_UNLIKELY(ordinal >= module->descriptor->export_count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "export ordinal out of range (0 < %zu < %zu)",
+ ordinal, module->descriptor->export_count);
+ }
+ if (out_function) {
+ out_function->module = &module->base_interface;
+ out_function->linkage = IREE_VM_FUNCTION_LINKAGE_EXPORT;
+ out_function->ordinal = (uint16_t)ordinal;
+ }
+ const iree_vm_native_export_descriptor_t* export_descriptor =
+ &module->descriptor->exports[ordinal];
+ if (out_name) {
+ *out_name = export_descriptor->local_name;
+ }
+ if (out_signature) {
+ out_signature->calling_convention = export_descriptor->calling_convention;
+ }
+ return iree_ok_status();
+}
+
+static iree_status_t IREE_API_PTR iree_vm_native_module_get_function(
+ void* self, iree_vm_function_linkage_t linkage, iree_host_size_t ordinal,
+ iree_vm_function_t* out_function, iree_string_view_t* out_name,
+ iree_vm_function_signature_t* out_signature) {
+ iree_vm_native_module_t* module = (iree_vm_native_module_t*)self;
+ if (out_function) memset(out_function, 0, sizeof(*out_function));
+ if (out_name) memset(out_name, 0, sizeof(*out_name));
+ if (out_signature) memset(out_signature, 0, sizeof(*out_signature));
+ if (module->user_interface.get_function) {
+ return module->user_interface.get_function(
+ module->self, linkage, ordinal, out_function, out_name, out_signature);
+ }
+ switch (linkage) {
+ case IREE_VM_FUNCTION_LINKAGE_IMPORT:
+ case IREE_VM_FUNCTION_LINKAGE_IMPORT_OPTIONAL:
+ return iree_vm_native_module_get_import_function(
+ module, ordinal, out_function, out_name, out_signature);
+ case IREE_VM_FUNCTION_LINKAGE_EXPORT:
+ return iree_vm_native_module_get_export_function(
+ module, ordinal, out_function, out_name, out_signature);
+ default:
+ return iree_make_status(
+ IREE_STATUS_UNIMPLEMENTED,
+ "native modules do not support internal function queries");
+ }
+}
+
+static iree_status_t IREE_API_PTR
+iree_vm_native_module_get_function_reflection_attr(
+ void* self, iree_vm_function_linkage_t linkage, iree_host_size_t ordinal,
+ iree_host_size_t index, iree_string_view_t* key,
+ iree_string_view_t* value) {
+ iree_vm_native_module_t* module = (iree_vm_native_module_t*)self;
+ if (module->user_interface.get_function_reflection_attr) {
+ return module->user_interface.get_function_reflection_attr(
+ module->self, linkage, ordinal, index, key, value);
+ }
+ // TODO(benvanik): implement native module reflection.
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "reflection not yet implemented");
+}
+
+static iree_status_t IREE_API_PTR iree_vm_native_module_lookup_function(
+ void* self, iree_vm_function_linkage_t linkage, iree_string_view_t name,
+ iree_vm_function_t* out_function) {
+ iree_vm_native_module_t* module = (iree_vm_native_module_t*)self;
+ memset(out_function, 0, sizeof(*out_function));
+ if (module->user_interface.lookup_function) {
+ return module->user_interface.lookup_function(module->self, linkage, name,
+ out_function);
+ }
+
+ if (IREE_UNLIKELY(linkage != IREE_VM_FUNCTION_LINKAGE_EXPORT)) {
+ // NOTE: we could support imports if required.
+ return iree_make_status(
+ IREE_STATUS_UNIMPLEMENTED,
+ "native modules do not support import/internal function queries");
+ }
+
+ // Binary search through the export descriptors.
+ ptrdiff_t min_ordinal = 0;
+ ptrdiff_t max_ordinal = module->descriptor->export_count - 1;
+ const iree_vm_native_export_descriptor_t* exports =
+ module->descriptor->exports;
+ while (min_ordinal <= max_ordinal) {
+ ptrdiff_t ordinal = (min_ordinal + max_ordinal) / 2;
+ int cmp = iree_string_view_compare(exports[ordinal].local_name, name);
+ if (cmp == 0) {
+ return iree_vm_native_module_get_function(self, linkage, ordinal,
+ out_function, NULL, NULL);
+ } else if (cmp < 0) {
+ min_ordinal = ordinal + 1;
+ } else {
+ max_ordinal = ordinal - 1;
+ }
+ }
+ return iree_make_status(
+ IREE_STATUS_NOT_FOUND, "no function %.*s.%.*s exported by module",
+ (int)module->descriptor->module_name.size,
+ module->descriptor->module_name.data, (int)name.size, name.data);
+}
+
+static iree_status_t IREE_API_PTR
+iree_vm_native_module_alloc_state(void* self, iree_allocator_t allocator,
+ iree_vm_module_state_t** out_module_state) {
+ iree_vm_native_module_t* module = (iree_vm_native_module_t*)self;
+ *out_module_state = NULL;
+ if (module->user_interface.alloc_state) {
+ return module->user_interface.alloc_state(module->self, allocator,
+ out_module_state);
+ }
+ // Default to no state.
+ return iree_ok_status();
+}
+
+static void IREE_API_PTR iree_vm_native_module_free_state(
+ void* self, iree_vm_module_state_t* module_state) {
+ iree_vm_native_module_t* module = (iree_vm_native_module_t*)self;
+ if (module->user_interface.free_state) {
+ module->user_interface.free_state(module->self, module_state);
+ return;
+ }
+ // No-op in the default implementation.
+ // TODO(#2843): IREE_DCHECK_EQ(NULL, module_state);
+ assert(!module_state);
+}
+
+static iree_status_t IREE_API_PTR iree_vm_native_module_resolve_import(
+ void* self, iree_vm_module_state_t* module_state, iree_host_size_t ordinal,
+ const iree_vm_function_t* function,
+ const iree_vm_function_signature_t* signature) {
+ iree_vm_native_module_t* module = (iree_vm_native_module_t*)self;
+ if (module->user_interface.resolve_import) {
+ return module->user_interface.resolve_import(module->self, module_state,
+ ordinal, function, signature);
+ }
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "native module does not support imports");
+}
+
+static iree_status_t IREE_API_PTR iree_vm_native_module_notify(
+ void* self, iree_vm_module_state_t* module_state, iree_vm_signal_t signal) {
+ iree_vm_native_module_t* module = (iree_vm_native_module_t*)self;
+ if (module->user_interface.notify) {
+ return module->user_interface.notify(module->self, module_state, signal);
+ }
+ return iree_ok_status();
+}
+
+static iree_status_t IREE_API_PTR iree_vm_native_module_begin_call(
+ void* self, iree_vm_stack_t* stack, const iree_vm_function_call_t* call,
+ iree_vm_execution_result_t* out_result) {
+ iree_vm_native_module_t* module = (iree_vm_native_module_t*)self;
+ if (IREE_UNLIKELY(call->function.linkage !=
+ IREE_VM_FUNCTION_LINKAGE_EXPORT) ||
+ IREE_UNLIKELY(call->function.ordinal >=
+ module->descriptor->export_count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "function ordinal out of bounds: 0 < %u < %zu",
+ call->function.ordinal,
+ module->descriptor->export_count);
+ }
+ if (module->user_interface.begin_call) {
+ return module->user_interface.begin_call(module->self, stack, call,
+ out_result);
+ }
+
+ // NOTE: VM stack is currently unused. We could stash things here for the
+ // debugger or use it for coroutine state.
+ iree_host_size_t frame_size = 0;
+
+ iree_vm_stack_frame_t* callee_frame = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_stack_function_enter(
+ stack, &call->function, IREE_VM_STACK_FRAME_NATIVE, frame_size,
+ /*frame_cleanup_fn=*/NULL, &callee_frame));
+
+ // Call the target function using the shim.
+ const iree_vm_native_function_ptr_t* function_ptr =
+ &module->descriptor->functions[call->function.ordinal];
+ iree_vm_module_state_t* module_state = callee_frame->module_state;
+ iree_status_t status = function_ptr->shim(stack, call, function_ptr->target,
+ module, module_state, out_result);
+ if (IREE_UNLIKELY(!iree_status_is_ok(status))) {
+#if IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_ANNOTATIONS
+ iree_string_view_t module_name IREE_ATTRIBUTE_UNUSED =
+ iree_vm_native_module_name(module);
+ iree_string_view_t function_name IREE_ATTRIBUTE_UNUSED =
+ iree_string_view_empty();
+ iree_status_ignore(iree_vm_native_module_get_export_function(
+ module, call->function.ordinal, NULL, &function_name, NULL));
+ return iree_status_annotate_f(status,
+ "while invoking native function %.*s.%.*s",
+ (int)module_name.size, module_name.data,
+ (int)function_name.size, function_name.data);
+#else
+ return status;
+#endif // IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_ANNOTATIONS
+ }
+
+ return iree_vm_stack_function_leave(stack);
+}
+
+static iree_status_t IREE_API_PTR
+iree_vm_native_module_resume_call(void* self, iree_vm_stack_t* stack,
+ iree_vm_execution_result_t* out_result) {
+ iree_vm_native_module_t* module = (iree_vm_native_module_t*)self;
+ if (module->user_interface.resume_call) {
+ return module->user_interface.resume_call(module->self, stack, out_result);
+ }
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "native module does not support resume");
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_native_module_create(
+ const iree_vm_module_t* interface,
+ const iree_vm_native_module_descriptor_t* module_descriptor,
+ iree_allocator_t allocator, iree_vm_module_t** out_module) {
+ IREE_ASSERT_ARGUMENT(out_module);
+ *out_module = NULL;
+
+ if (IREE_UNLIKELY(!interface->begin_call) &&
+ IREE_UNLIKELY(!module_descriptor->functions)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "native modules must provide call support or function pointers");
+ } else if (IREE_UNLIKELY(!interface->begin_call) &&
+ IREE_UNLIKELY(module_descriptor->export_count !=
+ module_descriptor->function_count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "native modules using the default call support "
+ "must have 1:1 exports:function pointers");
+ }
+
+ // Perform some optional debug-only verification of the descriptor.
+ // Since native modules are designed to be compiled in we don't need to do
+ // this in release builds.
+ IREE_RETURN_IF_ERROR(
+ iree_vm_native_module_verify_descriptor(module_descriptor));
+
+ // TODO(benvanik): invert allocation such that caller allocates and we init.
+ // This would avoid the need for any dynamic memory allocation in the common
+ // case as the outer user module interface could nest us. Note that we'd need
+ // to expose this via a query_size function so that we could adjust the size
+ // of our storage independent of the definition of the user module.
+ iree_vm_native_module_t* module = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, sizeof(*module), (void**)&module));
+
+ iree_status_t status = iree_vm_native_module_initialize(
+ interface, module_descriptor, allocator, (iree_vm_module_t*)module);
+ if (!iree_status_is_ok(status)) {
+ iree_allocator_free(allocator, module);
+ return status;
+ }
+
+ *out_module = &module->base_interface;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_native_module_initialize(
+ const iree_vm_module_t* interface,
+ const iree_vm_native_module_descriptor_t* module_descriptor,
+ iree_allocator_t allocator, iree_vm_module_t* base_module) {
+ IREE_ASSERT_ARGUMENT(interface);
+ IREE_ASSERT_ARGUMENT(module_descriptor);
+ IREE_ASSERT_ARGUMENT(base_module);
+ iree_vm_native_module_t* module = (iree_vm_native_module_t*)base_module;
+
+ if (IREE_UNLIKELY(!interface->begin_call) &&
+ IREE_UNLIKELY(!module_descriptor->functions)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "native modules must provide call support or function pointers");
+ } else if (IREE_UNLIKELY(!interface->begin_call) &&
+ IREE_UNLIKELY(module_descriptor->export_count !=
+ module_descriptor->function_count)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "native modules using the default call support "
+ "must have 1:1 exports:function pointers");
+ }
+
+ // Perform some optional debug-only verification of the descriptor.
+ // Since native modules are designed to be compiled in we don't need to do
+ // this in release builds.
+ IREE_RETURN_IF_ERROR(
+ iree_vm_native_module_verify_descriptor(module_descriptor));
+ module->allocator = allocator;
+ module->descriptor = module_descriptor;
+
+ // TODO(benvanik): version interface and copy only valid bytes.
+ memcpy(&module->user_interface, interface, sizeof(*interface));
+ module->self =
+ module->user_interface.self ? module->user_interface.self : module;
+
+ // Base interface that routes through our thunks.
+ iree_vm_module_initialize(&module->base_interface, module);
+ module->base_interface.destroy = iree_vm_native_module_destroy;
+ module->base_interface.name = iree_vm_native_module_name;
+ module->base_interface.signature = iree_vm_native_module_signature;
+ module->base_interface.get_function = iree_vm_native_module_get_function;
+ module->base_interface.get_function_reflection_attr =
+ iree_vm_native_module_get_function_reflection_attr;
+ module->base_interface.lookup_function =
+ iree_vm_native_module_lookup_function;
+ module->base_interface.alloc_state = iree_vm_native_module_alloc_state;
+ module->base_interface.free_state = iree_vm_native_module_free_state;
+ module->base_interface.resolve_import = iree_vm_native_module_resolve_import;
+ module->base_interface.notify = iree_vm_native_module_notify;
+ module->base_interface.begin_call = iree_vm_native_module_begin_call;
+ module->base_interface.resume_call = iree_vm_native_module_resume_call;
+
+ return iree_ok_status();
+}
diff --git a/runtime/src/iree/vm/native_module.h b/runtime/src/iree/vm/native_module.h
new file mode 100644
index 0000000..a569452
--- /dev/null
+++ b/runtime/src/iree/vm/native_module.h
@@ -0,0 +1,136 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// NOTE: native_module_test.h contains documented examples of how to use this!
+
+#ifndef IREE_VM_NATIVE_MODULE_H_
+#define IREE_VM_NATIVE_MODULE_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/vm/module.h"
+#include "iree/vm/stack.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+enum iree_vm_native_import_flag_bits_e {
+ IREE_VM_NATIVE_IMPORT_REQUIRED = 1u << 0,
+ IREE_VM_NATIVE_IMPORT_OPTIONAL = 1u << 1,
+};
+typedef uint32_t iree_vm_native_import_flags_t;
+
+// Describes an imported native function in a native module.
+// All of this information is assumed read-only and will be referenced for the
+// lifetime of any module created with the descriptor.
+typedef struct iree_vm_native_import_descriptor_t {
+ // Flags controlling import resolution.
+ iree_vm_native_import_flags_t flags;
+ // Fully-qualified function name (for example, 'other_module.foo').
+ iree_string_view_t full_name;
+} iree_vm_native_import_descriptor_t;
+
+// Describes an exported native function in a native module.
+// All of this information is assumed read-only and will be referenced for the
+// lifetime of any module created with the descriptor.
+typedef struct iree_vm_native_export_descriptor_t {
+ // Module-local function name (for example, 'foo' for function 'module.foo').
+ iree_string_view_t local_name;
+
+ // Calling convention string; see iree/vm/module.h for details.
+ iree_string_view_t calling_convention;
+
+ // An optional list of function-level reflection attributes.
+ iree_host_size_t reflection_attr_count;
+ const iree_vm_reflection_attr_t* reflection_attrs;
+} iree_vm_native_export_descriptor_t;
+
+typedef iree_status_t(IREE_API_PTR* iree_vm_native_function_target_t)(
+ iree_vm_stack_t* stack, void* module, void* module_state);
+
+typedef iree_status_t(IREE_API_PTR* iree_vm_native_function_shim_t)(
+ iree_vm_stack_t* stack, const iree_vm_function_call_t* call,
+ iree_vm_native_function_target_t target_fn, void* module,
+ void* module_state, iree_vm_execution_result_t* out_result);
+
+// An entry in the function pointer table.
+typedef struct iree_vm_native_function_ptr_t {
+ // A shim function that takes the VM ABI and maps it to the target ABI.
+ iree_vm_native_function_shim_t shim;
+ // Target function passed to the shim.
+ iree_vm_native_function_target_t target;
+} iree_vm_native_function_ptr_t;
+
+// Describes a native module implementation by way of descriptor tables.
+// All of this information is assumed read-only and will be referenced for the
+// lifetime of any module created with the descriptor.
+//
+// The common native module code will use this descriptor to return metadata on
+// query, lookup exported functions, and call module-provided implementation
+// functions for state and call management.
+typedef struct iree_vm_native_module_descriptor_t {
+ IREE_API_UNSTABLE
+
+ // Name of the module prefixed on all exported functions.
+ iree_string_view_t module_name;
+
+ // All imported function descriptors.
+ // interface.resolve_import will be called for each import.
+ // Imports must be in order sorted by name compatible with
+ // iree_string_view_compare.
+ iree_host_size_t import_count;
+ const iree_vm_native_import_descriptor_t* imports;
+
+ // All exported function descriptors.
+ // Exports must be in order sorted by name compatible with
+ // iree_string_view_compare.
+ iree_host_size_t export_count;
+ const iree_vm_native_export_descriptor_t* exports;
+
+ // All function shims and target function pointers.
+ // These must match 1:1 with the exports if using the default begin_call
+ // implementation and are optional if overriding begin_call.
+ iree_host_size_t function_count;
+ const iree_vm_native_function_ptr_t* functions;
+
+ // An optional list of module-level reflection attributes.
+ iree_host_size_t reflection_attr_count;
+ const iree_vm_reflection_attr_t* reflection_attrs;
+} iree_vm_native_module_descriptor_t;
+
+// Returns the size, in bytes, of the allocation required for native modules.
+// Callers may allocate more memory if they need additional storage.
+IREE_API_EXPORT iree_host_size_t iree_vm_native_module_size(void);
+
+// Creates a new native module with the metadata tables in |descriptor|.
+// These tables will be used for reflection and function lookup, and the
+// provided function pointers will be called when state needs to be managed or
+// exported functions need to be called.
+//
+// An implementation |interface| providing functions for state management and
+// function calls can be provided to override default implementations of
+// functions. The structure will be copied and the self pointer will be passed
+// to all |interface| functions.
+//
+// The provided |descriptor| will be referenced by the created module and must
+// be kept live for the lifetime of the module.
+IREE_API_EXPORT iree_status_t iree_vm_native_module_create(
+ const iree_vm_module_t* interface,
+ const iree_vm_native_module_descriptor_t* module_descriptor,
+ iree_allocator_t allocator, iree_vm_module_t** out_module);
+
+IREE_API_EXPORT iree_status_t iree_vm_native_module_initialize(
+ const iree_vm_module_t* interface,
+ const iree_vm_native_module_descriptor_t* module_descriptor,
+ iree_allocator_t allocator, iree_vm_module_t* module);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_VM_NATIVE_MODULE_H_
diff --git a/runtime/src/iree/vm/native_module_benchmark.cc b/runtime/src/iree/vm/native_module_benchmark.cc
new file mode 100644
index 0000000..14da29b
--- /dev/null
+++ b/runtime/src/iree/vm/native_module_benchmark.cc
@@ -0,0 +1,19 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "benchmark/benchmark.h"
+#include "iree/base/api.h"
+#include "iree/base/logging.h"
+#include "iree/vm/module.h"
+#include "iree/vm/native_module.h"
+#include "iree/vm/native_module_test.h"
+#include "iree/vm/stack.h"
+
+namespace {
+
+// TODO(benvanik): native module benchmarks.
+
+} // namespace
diff --git a/runtime/src/iree/vm/native_module_cc.h b/runtime/src/iree/vm/native_module_cc.h
new file mode 100644
index 0000000..015fdb8
--- /dev/null
+++ b/runtime/src/iree/vm/native_module_cc.h
@@ -0,0 +1,263 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_NATIVE_MODULE_CC_H_
+#define IREE_VM_NATIVE_MODULE_CC_H_
+
+#include <cstring>
+#include <memory>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/span.h"
+#include "iree/base/status_cc.h"
+#include "iree/vm/module.h"
+#include "iree/vm/native_module_packing.h" // IWYU pragma: export
+#include "iree/vm/stack.h"
+
+#ifndef __cplusplus
+#error "This header is meant for use with C++ module implementations."
+#endif // __cplusplus
+
+namespace iree {
+namespace vm {
+
+// A native module as exported to the VM dynamic module linking API.
+// This allows easy wrapping of C++ module implementations and removes a
+// majority of the boilerplate required with marshaling args/results out/in of
+// the VM via the ABI.
+//
+// Functions are defined on the State type as member functions returning either
+// Status or StatusOr. Arguments are passed as primitive types (int32_t),
+// wrapped ref objects (vm::ref<my_type_t>&), or some nesting of std::array,
+// std::tuple, and std::span to match fixed-length arrays of the same type,
+// tuples of mixed types, or dynamic arrays (variadic arguments). Results may be
+// returned as either their type or an std::tuple/std::array of types.
+//
+// Usage:
+// // Per-context module state that must only be thread-compatible.
+// // Define
+// struct MyState final {
+// StatusOr<std::tuple<int32_t, int32_t>> MyMethod1(vm::ref<my_type_t> t);
+// };
+//
+// // Table of functions mapped to their name in the IR.
+// static const vm::NativeFunction<MyState> kMyFunctions[] = {
+// vm::MakeNativeFunction("my_method_1", &MyState::MyMethod1),
+// };
+//
+// // The outer module wrapper shared across contexts.
+// // Must be thread-safe.
+// struct MyModule : public NativeModule<MyState> {
+// StatusOr<std::unique_ptr<MyState>> CreateState(iree_allocator_t) {
+// // You could pass in thread-safe shared resources to MyState.
+// return std::make_unique<MyState>();
+// }
+// };
+//
+// // Creates the module and exposes it as a C interface.
+// // Ownership transfers to the caller.
+// iree_vm_module_t* create_my_module(iree_allocator_t allocator) {
+// return std::make_unique<MyModule>("my_module", allocator,
+// std::span{kCustomModuleFunctions}).release()->interface();
+// }
+template <typename State>
+class NativeModule {
+ public:
+ NativeModule(const char* name, iree_allocator_t allocator,
+ iree::span<const NativeFunction<State>> dispatch_table)
+ : name_(name), allocator_(allocator), dispatch_table_(dispatch_table) {
+ IREE_CHECK_OK(iree_vm_module_initialize(&interface_, this));
+ interface_.destroy = NativeModule::ModuleDestroy;
+ interface_.name = NativeModule::ModuleName;
+ interface_.signature = NativeModule::ModuleSignature;
+ interface_.get_function = NativeModule::ModuleGetFunction;
+ interface_.lookup_function = NativeModule::ModuleLookupFunction;
+ interface_.alloc_state = NativeModule::ModuleAllocState;
+ interface_.free_state = NativeModule::ModuleFreeState;
+ interface_.resolve_import = NativeModule::ModuleResolveImport;
+ interface_.notify = NativeModule::ModuleNotify;
+ interface_.begin_call = NativeModule::ModuleBeginCall;
+ }
+
+ virtual ~NativeModule() = default;
+
+ // C API module interface bound to this NativeModule instance.
+ iree_vm_module_t* interface() { return &interface_; }
+
+ protected:
+ // Creates a new per-context module State holder.
+ virtual StatusOr<std::unique_ptr<State>> CreateState(
+ iree_allocator_t allocator) = 0;
+
+ // Notifies the module a signal has been raised.
+ virtual Status Notify(State* state, iree_vm_signal_t signal) {
+ return OkStatus();
+ }
+
+ private:
+ static NativeModule* FromModulePointer(void* self) {
+ return reinterpret_cast<NativeModule*>(self);
+ }
+ static State* FromStatePointer(void* self) {
+ return reinterpret_cast<State*>(self);
+ }
+
+ static void ModuleDestroy(void* self) { delete FromModulePointer(self); }
+
+ static iree_string_view_t ModuleName(void* self) {
+ auto* module = FromModulePointer(self);
+ return iree_make_cstring_view(module->name_);
+ }
+
+ static iree_vm_module_signature_t ModuleSignature(void* self) {
+ auto* module = FromModulePointer(self);
+ iree_vm_module_signature_t signature = {0};
+ signature.import_function_count = 0;
+ signature.export_function_count = module->dispatch_table_.size();
+ signature.internal_function_count = 0;
+ return signature;
+ }
+
+ static iree_status_t ModuleGetFunction(
+ void* self, iree_vm_function_linkage_t linkage, iree_host_size_t ordinal,
+ iree_vm_function_t* out_function, iree_string_view_t* out_name,
+ iree_vm_function_signature_t* out_signature) {
+ if (out_function) {
+ std::memset(out_function, 0, sizeof(*out_function));
+ }
+ if (out_name) {
+ out_name->data = nullptr;
+ out_name->size = 0;
+ }
+ if (out_signature) {
+ std::memset(out_signature, 0, sizeof(*out_signature));
+ }
+ auto* module = FromModulePointer(self);
+ if (IREE_UNLIKELY(ordinal > module->dispatch_table_.size())) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "function out of bounds: 0 < %zu < %zu", ordinal,
+ module->dispatch_table_.size());
+ }
+ const auto& dispatch_function = module->dispatch_table_[ordinal];
+ if (out_function) {
+ out_function->module = module->interface();
+ out_function->linkage = IREE_VM_FUNCTION_LINKAGE_EXPORT;
+ out_function->ordinal = static_cast<uint16_t>(ordinal);
+ }
+ if (out_name) {
+ *out_name = dispatch_function.name;
+ }
+ if (out_signature) {
+ out_signature->calling_convention = dispatch_function.cconv;
+ }
+ return iree_ok_status();
+ }
+
+ static iree_status_t ModuleLookupFunction(void* self,
+ iree_vm_function_linkage_t linkage,
+ iree_string_view_t name,
+ iree_vm_function_t* out_function) {
+ IREE_ASSERT_ARGUMENT(out_function);
+ std::memset(out_function, 0, sizeof(*out_function));
+ if (IREE_UNLIKELY(!name.data || !name.size)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "function name empty");
+ }
+
+ auto* module = FromModulePointer(self);
+ out_function->module = module->interface();
+ out_function->linkage = IREE_VM_FUNCTION_LINKAGE_EXPORT;
+ for (int i = 0; i < module->dispatch_table_.size(); ++i) {
+ if (iree_string_view_equal(name, module->dispatch_table_[i].name)) {
+ out_function->ordinal = i;
+ return iree_ok_status();
+ }
+ }
+ return iree_make_status(IREE_STATUS_NOT_FOUND, "function %.*s not exported",
+ (int)name.size, name.data);
+ }
+
+ static iree_status_t ModuleAllocState(
+ void* self, iree_allocator_t allocator,
+ iree_vm_module_state_t** out_module_state) {
+ IREE_ASSERT_ARGUMENT(out_module_state);
+ *out_module_state = nullptr;
+
+ auto* module = FromModulePointer(self);
+ IREE_ASSIGN_OR_RETURN(auto module_state, module->CreateState(allocator));
+
+ *out_module_state =
+ reinterpret_cast<iree_vm_module_state_t*>(module_state.release());
+ return iree_ok_status();
+ }
+
+ static void ModuleFreeState(void* self,
+ iree_vm_module_state_t* module_state) {
+ if (module_state) delete FromStatePointer(module_state);
+ }
+
+ static iree_status_t ModuleResolveImport(
+ void* self, iree_vm_module_state_t* module_state,
+ iree_host_size_t ordinal, const iree_vm_function_t* function,
+ const iree_vm_function_signature_t* signature) {
+ return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+ "C++ API does not support imports");
+ }
+
+ static iree_status_t ModuleNotify(void* self,
+ iree_vm_module_state_t* module_state,
+ iree_vm_signal_t signal) {
+ auto* module = FromModulePointer(self);
+ return module->Notify(FromStatePointer(module_state), signal);
+ }
+
+ static iree_status_t ModuleBeginCall(void* self, iree_vm_stack_t* stack,
+ const iree_vm_function_call_t* call,
+ iree_vm_execution_result_t* out_result) {
+ IREE_ASSERT_ARGUMENT(out_result);
+ std::memset(out_result, 0, sizeof(*out_result));
+ auto* module = FromModulePointer(self);
+ if (IREE_UNLIKELY(call->function.ordinal >=
+ module->dispatch_table_.size())) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "function ordinal out of bounds: 0 < %u < %zu",
+ call->function.ordinal,
+ module->dispatch_table_.size());
+ }
+ const auto& info = module->dispatch_table_[call->function.ordinal];
+
+ // NOTE: VM stack is currently unused. We could stash things here for the
+ // debugger or use it for coroutine state.
+ iree_host_size_t frame_size = 0;
+
+ iree_vm_stack_frame_t* callee_frame = NULL;
+ IREE_RETURN_IF_ERROR(iree_vm_stack_function_enter(
+ stack, &call->function, IREE_VM_STACK_FRAME_NATIVE, frame_size,
+ /*frame_cleanup_fn=*/nullptr, &callee_frame));
+
+ auto* state = FromStatePointer(callee_frame->module_state);
+ iree_status_t status = info.call(info.ptr, state, stack, call, out_result);
+ if (IREE_UNLIKELY(!iree_status_is_ok(status))) {
+ status = iree_status_annotate_f(
+ status, "while invoking C++ function %s.%.*s", module->name_,
+ (int)info.name.size, info.name.data);
+ return status;
+ }
+
+ return iree_vm_stack_function_leave(stack);
+ }
+
+ const char* name_;
+ const iree_allocator_t allocator_;
+ iree_vm_module_t interface_;
+
+ const iree::span<const NativeFunction<State>> dispatch_table_;
+};
+
+} // namespace vm
+} // namespace iree
+
+#endif // IREE_VM_NATIVE_MODULE_CC_H_
diff --git a/runtime/src/iree/vm/native_module_packing.h b/runtime/src/iree/vm/native_module_packing.h
new file mode 100644
index 0000000..db98523
--- /dev/null
+++ b/runtime/src/iree/vm/native_module_packing.h
@@ -0,0 +1,705 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_MODULE_ABI_PACKING_H_
+#define IREE_VM_MODULE_ABI_PACKING_H_
+
+#include <memory>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/span.h"
+#include "iree/base/status_cc.h"
+#include "iree/vm/builtin_types.h"
+#include "iree/vm/module.h"
+#include "iree/vm/ref.h"
+#include "iree/vm/ref_cc.h"
+#include "iree/vm/stack.h"
+
+// std::string_view is available starting in C++17.
+// Prior to that only IREE's C iree_string_view_t is available.
+#if defined(__has_include)
+#if __has_include(<string_view>) && __cplusplus >= 201703L
+#define IREE_HAVE_STD_STRING_VIEW 1
+#include <string_view>
+#endif // __has_include(<string_view>)
+#endif // __has_include
+
+namespace iree {
+namespace vm {
+namespace packing {
+
+namespace impl {
+
+// Workaround required to ensure proper evaluation order of parameter packs.
+// MSVC (and other compilers, like clang-cl in MSVC compat mode) may evaluate
+// parameter pack function arguments in any order. This shim allows us to expand
+// the parameter pack inside of an initializer list, which unlike function
+// arguments must be evaluated by the compiler in the order the elements appear
+// in the list.
+//
+// Example:
+// impl::order_sequence{(ExpandedAction(), 0)...};
+//
+// More information:
+// https://stackoverflow.com/questions/29194858/order-of-function-calls-in-variadic-template-expansion
+struct order_sequence {
+ template <typename... T>
+ order_sequence(T&&...) {}
+};
+
+// Coming in C++20, but not widely available yet.
+template <class T>
+struct remove_cvref {
+ typedef std::remove_cv_t<std::remove_reference_t<T>> type;
+};
+
+} // namespace impl
+
+template <typename T>
+using enable_if_primitive =
+ typename std::enable_if<std::is_arithmetic<T>::value ||
+ std::is_enum<T>::value>::type;
+template <typename T>
+using enable_if_not_primitive = typename std::enable_if<!(
+ std::is_arithmetic<T>::value || std::is_enum<T>::value)>::type;
+
+//===----------------------------------------------------------------------===//
+// Compile-time string literals
+//===----------------------------------------------------------------------===//
+
+// Compile-time constant string.
+// This allows us to concat string literals and produce a single flattened
+// char[] containing the results. Includes a \0 so the character storage is
+// length N + 1 and can be accessed as a c_str.
+//
+// Use the `literal` helper function to define a const string literal without
+// needing the size.
+//
+// Example:
+// // produces: const_string<2>("ab")
+// constexpr const auto str = literal("a") + literal("b");
+template <size_t N>
+class const_string {
+ public:
+ constexpr const_string(const char (&data)[N + 1])
+ : const_string(data, std::make_index_sequence<N>()) {}
+ template <size_t N1, typename std::enable_if<(N1 <= N), bool>::type = true>
+ constexpr const_string(const const_string<N1>& lhs,
+ const const_string<N - N1>& rhs)
+ : const_string{lhs, rhs, std::make_index_sequence<N1>{},
+ std::make_index_sequence<N - N1>{}} {}
+
+ constexpr std::size_t size() const { return N; }
+ constexpr const char* data() const { return data_; }
+ constexpr const char* c_str() const { return data_; }
+ constexpr operator const char*() const { return data_; }
+ constexpr char operator[](size_t i) const { return data_[i]; }
+
+ private:
+ template <size_t... PACK>
+ constexpr const_string(const char (&data)[N + 1],
+ std::index_sequence<PACK...>)
+ : data_{data[PACK]..., '\0'} {}
+ template <size_t N1, size_t... PACK1, size_t... PACK2>
+ constexpr const_string(const const_string<N1>& lhs,
+ const const_string<N - N1>& rhs,
+ std::index_sequence<PACK1...>,
+ std::index_sequence<PACK2...>)
+ : data_{lhs[PACK1]..., rhs[PACK2]..., '\0'} {}
+
+ const char data_[N + 1];
+};
+
+template <size_t N1, size_t N2>
+constexpr auto operator+(const const_string<N1>& lhs,
+ const const_string<N2>& rhs) {
+ return const_string<N1 + N2>(lhs, rhs);
+}
+
+// Defines a compile-time constant string literal.
+template <size_t N_PLUS_1>
+constexpr auto literal(const char (&data)[N_PLUS_1]) {
+ return const_string<N_PLUS_1 - 1>(data);
+}
+
+constexpr auto concat_impl() { return literal(""); }
+template <typename T>
+constexpr auto concat_impl(const T& lhs) {
+ return lhs;
+}
+template <typename T, typename... Ts>
+constexpr auto concat_impl(const T& lhs, const Ts&... s) {
+ return lhs + concat_impl(s...);
+}
+
+// Concatenates one or more const_string values into a new const_string.
+//
+// Example:
+// constexpr const auto abc = concat_literals(literal("a"),
+// literal("b"),
+// literal("c"));
+template <typename... Ts>
+constexpr auto concat_literals(const Ts&... s) {
+ return concat_impl(s...);
+}
+
+template <size_t C, typename T>
+struct splat_impl {
+ static constexpr auto apply(const T& v) {
+ return concat_literals(v, splat_impl<C - 1, T>::apply(v));
+ }
+};
+template <typename T>
+struct splat_impl<1, T> {
+ static constexpr auto apply(const T& v) { return v; }
+};
+
+// Splats a single const_string value C times.
+//
+// Example:
+// constexpr const auto aaa = splat_literal<3>(literal("a"));
+template <size_t C, typename T>
+constexpr auto splat_literal(const T& v) {
+ return splat_impl<C, T>::apply(v);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling convention format generation
+//===----------------------------------------------------------------------===//
+// Prototyped here: https://godbolt.org/z/Tvhh7M
+
+template <typename T>
+struct cconv_map;
+
+template <typename T>
+struct cconv_map {
+ static constexpr const auto conv_chars = literal("i");
+};
+
+template <>
+struct cconv_map<int64_t> {
+ static constexpr const auto conv_chars = literal("I");
+};
+template <>
+struct cconv_map<uint64_t> {
+ static constexpr const auto conv_chars = literal("I");
+};
+
+template <>
+struct cconv_map<opaque_ref> {
+ static constexpr const auto conv_chars = literal("r");
+};
+template <typename T>
+struct cconv_map<ref<T>> {
+ static constexpr const auto conv_chars = literal("r");
+};
+template <>
+struct cconv_map<iree_string_view_t> {
+ static constexpr const auto conv_chars = literal("r");
+};
+#if defined(IREE_HAVE_STD_STRING_VIEW)
+template <>
+struct cconv_map<std::string_view> {
+ static constexpr const auto conv_chars = literal("r");
+};
+#endif // IREE_HAVE_STD_STRING_VIEW
+
+template <typename U, size_t S>
+struct cconv_map<std::array<U, S>> {
+ static constexpr const auto conv_chars = splat_literal<S>(
+ cconv_map<typename impl::remove_cvref<U>::type>::conv_chars);
+};
+
+template <typename... Ts>
+struct cconv_map<std::tuple<Ts...>> {
+ static constexpr const auto conv_chars = concat_literals(
+ cconv_map<typename impl::remove_cvref<Ts>::type>::conv_chars...);
+};
+
+template <typename U>
+struct cconv_map<iree::span<U>> {
+ static constexpr const auto conv_chars = concat_literals(
+ literal("C"), cconv_map<typename impl::remove_cvref<U>::type>::conv_chars,
+ literal("D"));
+};
+
+template <typename Result, size_t ParamsCount, typename... Params>
+struct cconv_storage {
+ static const iree_string_view_t value() {
+ static constexpr const auto value = concat_literals(
+ literal("0"),
+ concat_literals(
+ cconv_map<
+ typename impl::remove_cvref<Params>::type>::conv_chars...),
+ literal("_"),
+ concat_literals(
+ cconv_map<typename impl::remove_cvref<Result>::type>::conv_chars));
+ static constexpr const auto str =
+ iree_string_view_t{value.data(), value.size()};
+ return str;
+ }
+};
+
+template <typename Result>
+struct cconv_storage<Result, 0> {
+ static const iree_string_view_t value() {
+ static constexpr const auto value = concat_literals(
+ literal("0v_"),
+ concat_literals(
+ cconv_map<typename impl::remove_cvref<Result>::type>::conv_chars));
+ static constexpr const auto str =
+ iree_string_view_t{value.data(), value.size()};
+ return str;
+ }
+};
+
+template <size_t ParamsCount, typename... Params>
+struct cconv_storage_void {
+ static const iree_string_view_t value() {
+ static constexpr const auto value = concat_literals(
+ literal("0"),
+ concat_literals(
+ cconv_map<
+ typename impl::remove_cvref<Params>::type>::conv_chars...),
+ literal("_v"));
+ static constexpr const auto str =
+ iree_string_view_t{value.data(), value.size()};
+ return str;
+ }
+};
+
+template <>
+struct cconv_storage_void<0> {
+ static const iree_string_view_t value() {
+ static constexpr const auto value = concat_literals(literal("0v_v"));
+ static constexpr const auto str =
+ iree_string_view_t{value.data(), value.size()};
+ return str;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Parameter unpacking
+//===----------------------------------------------------------------------===//
+
+// TODO(benvanik): see if we can't use `extern template` to share
+// implementations of these and prevent code bloat across many modules.
+// We can also try some non-templated base functions (like "UnpackI32") that the
+// templated ones simply wrap with type casts.
+
+namespace impl {
+
+using params_ptr_t = uint8_t*;
+
+template <typename T, typename EN = void>
+struct ParamUnpack;
+template <>
+struct ParamUnpack<opaque_ref>;
+template <typename T>
+struct ParamUnpack<ref<T>>;
+template <typename T>
+struct ParamUnpack<const ref<T>>;
+template <>
+struct ParamUnpack<iree_string_view_t>;
+#if defined(IREE_HAVE_STD_STRING_VIEW)
+template <>
+struct ParamUnpack<std::string_view>;
+#endif // IREE_HAVE_STD_STRING_VIEW
+template <typename U, size_t S>
+struct ParamUnpack<std::array<U, S>>;
+template <typename... Ts>
+struct ParamUnpack<std::tuple<Ts...>>;
+template <typename U>
+struct ParamUnpack<iree::span<U>, enable_if_not_primitive<U>>;
+template <typename U>
+struct ParamUnpack<iree::span<U>, enable_if_primitive<U>>;
+
+struct Unpacker {
+ template <typename... Ts>
+ static StatusOr<std::tuple<typename impl::ParamUnpack<
+ typename std::remove_reference<Ts>::type>::storage_type...>>
+ LoadSequence(iree_byte_span_t storage) {
+ auto params = std::make_tuple(
+ typename impl::ParamUnpack<
+ typename impl::remove_cvref<Ts>::type>::storage_type()...);
+ Status status;
+ params_ptr_t ptr = storage.data;
+ ApplyLoad<Ts...>(status, ptr, params,
+ std::make_index_sequence<sizeof...(Ts)>());
+ IREE_RETURN_IF_ERROR(std::move(status));
+ params_ptr_t limit = storage.data + storage.data_length;
+ if (IREE_UNLIKELY(ptr != limit)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "argument buffer unpacking failure; consumed %zu of %zu bytes",
+ (reinterpret_cast<intptr_t>(ptr) -
+ reinterpret_cast<intptr_t>(storage.data)),
+ storage.data_length);
+ }
+ return std::move(params);
+ }
+
+ private:
+ template <typename... Ts, typename T, size_t... I>
+ static void ApplyLoad(Status& status, params_ptr_t& ptr, T&& params,
+ std::index_sequence<I...>) {
+ impl::order_sequence{
+ (impl::ParamUnpack<typename impl::remove_cvref<
+ typename std::tuple_element<I, std::tuple<Ts...>>::type>::type>::
+ Load(status, ptr, std::get<I>(params)),
+ 0)...};
+ }
+};
+
+// Common primitive types (`i32`, `i64`, `f32`, enums, etc).
+template <typename T>
+struct ParamUnpack<T, enable_if_primitive<T>> {
+ using storage_type = T;
+ static void Load(Status& status, params_ptr_t& ptr, storage_type& out_param) {
+ out_param = *reinterpret_cast<const T*>(ptr);
+ ptr += sizeof(T);
+ }
+};
+
+// An opaque ref type (`vm.ref<?>`), possibly null.
+template <>
+struct ParamUnpack<opaque_ref> {
+ using storage_type = opaque_ref;
+ static void Load(Status& status, params_ptr_t& ptr, storage_type& out_param) {
+ iree_vm_ref_retain(reinterpret_cast<iree_vm_ref_t*>(ptr), &out_param);
+ ptr += sizeof(iree_vm_ref_t);
+ }
+};
+
+// A `vm.ref<T>` type, possibly null.
+// Ownership is transferred to the parameter.
+template <typename T>
+struct ParamUnpack<ref<T>> {
+ using storage_type = ref<T>;
+ static void Load(Status& status, params_ptr_t& ptr, storage_type& out_param) {
+ auto* reg_ptr = reinterpret_cast<iree_vm_ref_t*>(ptr);
+ ptr += sizeof(iree_vm_ref_t);
+ if (reg_ptr->type == ref_type_descriptor<T>::get()->type) {
+ out_param = vm::retain_ref(reinterpret_cast<T*>(reg_ptr->ptr));
+ memset(reg_ptr, 0, sizeof(*reg_ptr));
+ } else if (IREE_UNLIKELY(reg_ptr->type != IREE_VM_REF_TYPE_NULL)) {
+ status =
+ iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "parameter contains a reference to the wrong type; "
+ "have %.*s but expected %.*s",
+ (int)iree_vm_ref_type_name(reg_ptr->type).size,
+ iree_vm_ref_type_name(reg_ptr->type).data,
+ (int)ref_type_descriptor<T>::get()->type_name.size,
+ ref_type_descriptor<T>::get()->type_name.data);
+ } else {
+ out_param = {};
+ }
+ }
+};
+
+// TODO(benvanik): merge with above somehow?
+template <typename T>
+struct ParamUnpack<const ref<T>> {
+ using storage_type = ref<T>;
+ static void Load(Status& status, params_ptr_t& ptr, storage_type& out_param) {
+ auto* reg_ptr = reinterpret_cast<iree_vm_ref_t*>(ptr);
+ ptr += sizeof(iree_vm_ref_t);
+ if (reg_ptr->type == ref_type_descriptor<T>::get()->type) {
+ out_param = vm::retain_ref(reinterpret_cast<T*>(reg_ptr->ptr));
+ memset(reg_ptr, 0, sizeof(*reg_ptr));
+ } else if (IREE_UNLIKELY(reg_ptr->type != IREE_VM_REF_TYPE_NULL)) {
+ status =
+ iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "parameter contains a reference to the wrong type; "
+ "have %.*s but expected %.*s",
+ (int)iree_vm_ref_type_name(reg_ptr->type).size,
+ iree_vm_ref_type_name(reg_ptr->type).data,
+ (int)ref_type_descriptor<T>::get()->type_name.size,
+ ref_type_descriptor<T>::get()->type_name.data);
+ } else {
+ out_param = {};
+ }
+ }
+};
+
+// An `util.byte_buffer` containing a string.
+// The string view is aliased directly into the underlying byte buffer.
+template <>
+struct ParamUnpack<iree_string_view_t> {
+ using storage_type = iree_string_view_t;
+ static void Load(Status& status, params_ptr_t& ptr, storage_type& out_param) {
+ auto* reg_ptr = reinterpret_cast<iree_vm_ref_t*>(ptr);
+ ptr += sizeof(iree_vm_ref_t);
+ if (reg_ptr->type == ref_type_descriptor<iree_vm_buffer_t>::get()->type) {
+ auto byte_span = reinterpret_cast<iree_vm_buffer_t*>(reg_ptr->ptr)->data;
+ out_param = iree_make_string_view(
+ reinterpret_cast<const char*>(byte_span.data), byte_span.data_length);
+ } else if (IREE_UNLIKELY(reg_ptr->type != IREE_VM_REF_TYPE_NULL)) {
+ status = iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "parameter contains a reference to the wrong type; "
+ "have %.*s but expected %.*s",
+ (int)iree_vm_ref_type_name(reg_ptr->type).size,
+ iree_vm_ref_type_name(reg_ptr->type).data,
+ (int)ref_type_descriptor<iree_vm_buffer_t>::get()->type_name.size,
+ ref_type_descriptor<iree_vm_buffer_t>::get()->type_name.data);
+ } else {
+ // NOTE: empty string is allowed here!
+ out_param = iree_string_view_empty();
+ }
+ }
+};
+#if defined(IREE_HAVE_STD_STRING_VIEW)
+template <>
+struct ParamUnpack<std::string_view> {
+ using storage_type = std::string_view;
+ static void Load(Status& status, params_ptr_t& ptr, storage_type& out_param) {
+ auto* reg_ptr = reinterpret_cast<iree_vm_ref_t*>(ptr);
+ ptr += sizeof(iree_vm_ref_t);
+ if (reg_ptr->type == ref_type_descriptor<iree_vm_buffer_t>::get()->type) {
+ auto byte_span = reinterpret_cast<iree_vm_buffer_t*>(reg_ptr->ptr)->data;
+ out_param = std::string_view{
+ reinterpret_cast<const char*>(byte_span.data), byte_span.data_length};
+ } else if (IREE_UNLIKELY(reg_ptr->type != IREE_VM_REF_TYPE_NULL)) {
+ status = iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "parameter contains a reference to the wrong type; "
+ "have %.*s but expected %.*s",
+ (int)iree_vm_ref_type_name(reg_ptr->type).size,
+ iree_vm_ref_type_name(reg_ptr->type).data,
+ (int)ref_type_descriptor<iree_vm_buffer_t>::get()->type_name.size,
+ ref_type_descriptor<iree_vm_buffer_t>::get()->type_name.data);
+ } else {
+ // NOTE: empty string is allowed here!
+ out_param = {};
+ }
+ }
+};
+#endif // IREE_HAVE_STD_STRING_VIEW
+
+// Arrays are C++ ABI only representing a fixed repeated field (`i32, i32`).
+template <typename U, size_t S>
+struct ParamUnpack<std::array<U, S>> {
+ using element_type = typename impl::remove_cvref<U>::type;
+ using storage_type = std::array<element_type, S>;
+ static void Load(Status& status, params_ptr_t& ptr, storage_type& out_param) {
+ for (size_t i = 0; i < S; ++i) {
+ ParamUnpack::Load(status, ptr, out_param[i]);
+ }
+ }
+};
+
+// Tuples (`tuple<i32, i64>`) expand to just their flattened contents.
+template <typename... Ts>
+struct ParamUnpack<std::tuple<Ts...>> {
+ using storage_type = std::tuple<typename impl::remove_cvref<Ts>::type...>;
+ static void Load(Status& status, params_ptr_t& ptr, storage_type& out_param) {
+ UnpackTuple(status, ptr, out_param,
+ std::make_index_sequence<sizeof...(Ts)>());
+ }
+ template <size_t... I>
+ static void UnpackTuple(Status& status, params_ptr_t& ptr,
+ storage_type& params, std::index_sequence<I...>) {
+ impl::order_sequence{
+ (ParamUnpack<typename std::tuple_element<I, std::tuple<Ts...>>::type>::
+ Load(status, ptr, std::get<I>(params)),
+ 0)...};
+ }
+};
+
+// Complex variadic span (like `tuple<i32, tuple<ref<...>, i64>>...`).
+// We need to allocate storage here so that we can marshal the element type out.
+// In the future we could check that all subelements are primitives and alias if
+// the host machine endianness is the same.
+template <typename U>
+struct ParamUnpack<iree::span<U>, enable_if_not_primitive<U>> {
+ using element_type = typename impl::remove_cvref<U>::type;
+ using storage_type = std::vector<element_type>;
+ static void Load(Status& status, params_ptr_t& ptr, storage_type& out_param) {
+ iree_host_size_t count = *reinterpret_cast<const int32_t*>(ptr);
+ ptr += sizeof(int32_t);
+ out_param.resize(count);
+ for (iree_host_size_t i = 0; i < count; ++i) {
+ ParamUnpack<element_type>::Load(status, ptr, out_param[i]);
+ }
+ }
+};
+
+// Simple primitive variadic span (like `i32...`). We can alias directly into
+// the argument buffer so long as endianness matches.
+template <typename U>
+struct ParamUnpack<iree::span<U>, enable_if_primitive<U>> {
+ using element_type = U;
+ using storage_type = iree::span<const element_type>;
+ static void Load(Status& status, params_ptr_t& ptr, storage_type& out_param) {
+ iree_host_size_t count = *reinterpret_cast<const int32_t*>(ptr);
+ ptr += sizeof(int32_t);
+ out_param =
+ iree::span<U>(reinterpret_cast<const element_type*>(ptr), count);
+ ptr += sizeof(element_type) * count;
+ }
+};
+
+} // namespace impl
+
+//===----------------------------------------------------------------------===//
+// Result packing
+//===----------------------------------------------------------------------===//
+
+namespace impl {
+
+using result_ptr_t = uint8_t*;
+
+template <typename T>
+struct ResultPack {
+ static void Store(result_ptr_t& ptr, T value) {
+ *reinterpret_cast<T*>(ptr) = value;
+ ptr += sizeof(T);
+ }
+};
+
+template <>
+struct ResultPack<opaque_ref> {
+ static void Store(result_ptr_t& ptr, opaque_ref value) {
+ iree_vm_ref_move(value.get(), reinterpret_cast<iree_vm_ref_t*>(ptr));
+ ptr += sizeof(iree_vm_ref_t);
+ }
+};
+
+template <typename T>
+struct ResultPack<ref<T>> {
+ static void Store(result_ptr_t& ptr, ref<T> value) {
+ iree_vm_ref_wrap_assign(value.release(), value.type(),
+ reinterpret_cast<iree_vm_ref_t*>(ptr));
+ ptr += sizeof(iree_vm_ref_t);
+ }
+};
+
+template <typename U, size_t S>
+struct ResultPack<std::array<U, S>>;
+template <typename... Ts>
+struct ResultPack<std::tuple<Ts...>>;
+
+template <typename U, size_t S>
+struct ResultPack<std::array<U, S>> {
+ static void Store(result_ptr_t& ptr, std::array<U, S> value) {
+ for (size_t i = 0; i < S; ++i) {
+ ResultPack<U>::Store(ptr, std::move(value[i]));
+ }
+ }
+};
+
+template <typename... Ts>
+struct ResultPack<std::tuple<Ts...>> {
+ static void Store(result_ptr_t& ptr, std::tuple<Ts...> results) {
+ PackTuple(ptr, results, std::make_index_sequence<sizeof...(Ts)>());
+ }
+ template <typename... T, size_t... I>
+ static inline void PackTuple(result_ptr_t& ptr, std::tuple<T...>& value,
+ std::index_sequence<I...>) {
+ impl::order_sequence{
+ (ResultPack<typename std::tuple_element<I, std::tuple<T...>>::type>::
+ Store(ptr, std::move(std::get<I>(value))),
+ 0)...};
+ }
+};
+
+} // namespace impl
+
+//===----------------------------------------------------------------------===//
+// Function wrapping
+//===----------------------------------------------------------------------===//
+
+template <typename Owner, typename Results, typename... Params>
+struct DispatchFunctor {
+ using FnPtr = StatusOr<Results> (Owner::*)(Params...);
+
+ static Status Call(void (Owner::*ptr)(), Owner* self, iree_vm_stack_t* stack,
+ const iree_vm_function_call_t* call,
+ iree_vm_execution_result_t* out_result) {
+ // Marshal arguments into types/locals we can forward to the function.
+ IREE_ASSIGN_OR_RETURN(
+ auto params, impl::Unpacker::LoadSequence<Params...>(call->arguments));
+
+ // Call the target function with the params.
+ IREE_ASSIGN_OR_RETURN(
+ auto results,
+ ApplyFn(reinterpret_cast<FnPtr>(ptr), self, std::move(params),
+ std::make_index_sequence<sizeof...(Params)>()));
+
+ // Marshal call results back into the ABI results buffer.
+ impl::result_ptr_t result_ptr = call->results.data;
+ impl::ResultPack<Results>::Store(result_ptr, std::move(results));
+
+ return OkStatus();
+ }
+
+ template <typename T, size_t... I>
+ static StatusOr<Results> ApplyFn(FnPtr ptr, Owner* self, T&& params,
+ std::index_sequence<I...>) {
+ return (self->*ptr)(std::move(std::get<I>(params))...);
+ }
+};
+
+// A DispatchFunctor specialization for methods with no return values.
+template <typename Owner, typename... Params>
+struct DispatchFunctorVoid {
+ using FnPtr = Status (Owner::*)(Params...);
+
+ static Status Call(void (Owner::*ptr)(), Owner* self, iree_vm_stack_t* stack,
+ const iree_vm_function_call_t* call,
+ iree_vm_execution_result_t* out_result) {
+ IREE_ASSIGN_OR_RETURN(
+ auto params, impl::Unpacker::LoadSequence<Params...>(call->arguments));
+ return ApplyFn(reinterpret_cast<FnPtr>(ptr), self, std::move(params),
+ std::make_index_sequence<sizeof...(Params)>());
+ }
+
+ template <typename T, size_t... I>
+ static Status ApplyFn(FnPtr ptr, Owner* self, T&& params,
+ std::index_sequence<I...>) {
+ return (self->*ptr)(std::move(std::get<I>(params))...);
+ }
+};
+
+} // namespace packing
+
+template <typename Owner>
+struct NativeFunction {
+ iree_string_view_t name;
+ iree_string_view_t cconv;
+ void (Owner::*const ptr)();
+ Status (*const call)(void (Owner::*ptr)(), Owner* self,
+ iree_vm_stack_t* stack,
+ const iree_vm_function_call_t* call,
+ iree_vm_execution_result_t* out_result);
+};
+
+template <typename Owner, typename Result, typename... Params>
+constexpr NativeFunction<Owner> MakeNativeFunction(
+ const char* name, StatusOr<Result> (Owner::*fn)(Params...)) {
+ using dispatch_functor_t = packing::DispatchFunctor<Owner, Result, Params...>;
+ return {iree_make_cstring_view(name),
+ packing::cconv_storage<Result, sizeof...(Params), Params...>::value(),
+ (void (Owner::*)())fn, &dispatch_functor_t::Call};
+}
+
+template <typename Owner, typename... Params>
+constexpr NativeFunction<Owner> MakeNativeFunction(
+ const char* name, Status (Owner::*fn)(Params...)) {
+ using dispatch_functor_t = packing::DispatchFunctorVoid<Owner, Params...>;
+ return {iree_make_cstring_view(name),
+ packing::cconv_storage_void<sizeof...(Params), Params...>::value(),
+ (void (Owner::*)())fn, &dispatch_functor_t::Call};
+}
+
+} // namespace vm
+} // namespace iree
+
+#endif // IREE_VM_MODULE_ABI_PACKING_H_
diff --git a/runtime/src/iree/vm/native_module_test.cc b/runtime/src/iree/vm/native_module_test.cc
new file mode 100644
index 0000000..84202d0
--- /dev/null
+++ b/runtime/src/iree/vm/native_module_test.cc
@@ -0,0 +1,110 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/native_module_test.h"
+
+#include <vector>
+
+#include "iree/base/status_cc.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+#include "iree/vm/context.h"
+#include "iree/vm/instance.h"
+#include "iree/vm/invocation.h"
+#include "iree/vm/list.h"
+#include "iree/vm/ref_cc.h"
+#include "iree/vm/value.h"
+
+namespace iree {
+namespace {
+
+// Test suite that uses module_a and module_b defined in native_module_test.h.
+// Both modules are put in a context and the module_b.entry function can be
+// executed with RunFunction.
+class VMNativeModuleTest : public ::testing::Test {
+ protected:
+ virtual void SetUp() {
+ IREE_CHECK_OK(iree_vm_instance_create(iree_allocator_system(), &instance_));
+
+ // Create both modules shared instances. These are generally immutable and
+ // can be shared by multiple contexts.
+ iree_vm_module_t* module_a = nullptr;
+ IREE_CHECK_OK(module_a_create(iree_allocator_system(), &module_a));
+ iree_vm_module_t* module_b = nullptr;
+ IREE_CHECK_OK(module_b_create(iree_allocator_system(), &module_b));
+
+ // Create the context with both modules and perform runtime linkage.
+ // Imports from module_a -> module_b will be resolved and per-context state
+ // will be allocated.
+ std::vector<iree_vm_module_t*> modules = {module_a, module_b};
+ IREE_CHECK_OK(iree_vm_context_create_with_modules(
+ instance_, IREE_VM_CONTEXT_FLAG_NONE, modules.data(), modules.size(),
+ iree_allocator_system(), &context_));
+
+ // No longer need the modules as the context retains them.
+ iree_vm_module_release(module_a);
+ iree_vm_module_release(module_b);
+ }
+
+ virtual void TearDown() {
+ iree_vm_context_release(context_);
+ iree_vm_instance_release(instance_);
+ }
+
+ StatusOr<int32_t> RunFunction(iree_string_view_t function_name,
+ int32_t arg0) {
+ // Lookup the entry function. This can be cached in an application if
+ // multiple calls will be made.
+ iree_vm_function_t function;
+ IREE_RETURN_IF_ERROR(
+ iree_vm_context_resolve_function(
+ context_, iree_make_cstring_view("module_b.entry"), &function),
+ "unable to resolve entry point");
+
+ // Setup I/O lists and pass in the argument. The result list will be
+ // populated upon return.
+ vm::ref<iree_vm_list_t> input_list;
+ IREE_RETURN_IF_ERROR(iree_vm_list_create(
+ /*element_type=*/nullptr, 1, iree_allocator_system(), &input_list));
+ auto arg0_value = iree_vm_value_make_i32(arg0);
+ IREE_RETURN_IF_ERROR(
+ iree_vm_list_push_value(input_list.get(), &arg0_value));
+ vm::ref<iree_vm_list_t> output_list;
+ IREE_RETURN_IF_ERROR(iree_vm_list_create(
+ /*element_type=*/nullptr, 1, iree_allocator_system(), &output_list));
+
+ // Invoke the entry function to do our work. Runs synchronously.
+ IREE_RETURN_IF_ERROR(
+ iree_vm_invoke(context_, function, IREE_VM_INVOCATION_FLAG_NONE,
+ /*policy=*/nullptr, input_list.get(), output_list.get(),
+ iree_allocator_system()));
+
+ // Load the output result.
+ iree_vm_value_t ret0_value;
+ IREE_RETURN_IF_ERROR(
+ iree_vm_list_get_value(output_list.get(), 0, &ret0_value));
+ return ret0_value.i32;
+ }
+
+ private:
+ iree_vm_instance_t* instance_ = nullptr;
+ iree_vm_context_t* context_ = nullptr;
+};
+
+TEST_F(VMNativeModuleTest, Example) {
+ IREE_ASSERT_OK_AND_ASSIGN(
+ int32_t v0, RunFunction(iree_make_cstring_view("module_b.entry"), 1));
+ ASSERT_EQ(v0, 1);
+ IREE_ASSERT_OK_AND_ASSIGN(
+ int32_t v1, RunFunction(iree_make_cstring_view("module_b.entry"), 2));
+ ASSERT_EQ(v1, 4);
+ IREE_ASSERT_OK_AND_ASSIGN(
+ int32_t v2, RunFunction(iree_make_cstring_view("module_b.entry"), 3));
+ ASSERT_EQ(v2, 8);
+}
+
+} // namespace
+} // namespace iree
diff --git a/runtime/src/iree/vm/native_module_test.h b/runtime/src/iree/vm/native_module_test.h
new file mode 100644
index 0000000..4585223
--- /dev/null
+++ b/runtime/src/iree/vm/native_module_test.h
@@ -0,0 +1,307 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/vm/context.h"
+#include "iree/vm/instance.h"
+#include "iree/vm/module.h"
+#include "iree/vm/native_module.h"
+#include "iree/vm/ref.h"
+#include "iree/vm/stack.h"
+
+// Wrapper for calling the import functions with type (i32)->i32.
+// NOTE: we should have some common ones prebuilt or can generate and rely on
+// LTO to strip duplicates across the entire executable.
+// TODO(benvanik): generate/export these shims/call functions in stack.h.
+static iree_status_t call_import_i32_i32(iree_vm_stack_t* stack,
+ const iree_vm_function_t* import,
+ int32_t arg0, int32_t* out_ret0) {
+ iree_vm_function_call_t call;
+ call.function = *import;
+ call.arguments = iree_make_byte_span(&arg0, sizeof(arg0));
+ call.results = iree_make_byte_span(out_ret0, sizeof(*out_ret0));
+
+ iree_vm_execution_result_t result;
+ memset(&result, 0, sizeof(result));
+ return import->module->begin_call(import->module, stack, &call, &result);
+}
+
+typedef iree_status_t (*call_i32_i32_t)(iree_vm_stack_t* stack,
+ void* module_ptr, void* module_state,
+ int32_t arg0, int32_t* out_ret0);
+
+// Wrapper for calling a |target_fn| C function from the VM ABI.
+// It's optional to bounce through like this; if the function can more
+// efficiently directly access the arguments from the |call| then it can do so.
+// This approach is most useful when the function may also be exported/used by
+// non-VM code or may be internally referenced using a target-specific ABI.
+// TODO(benvanik): generate/export these shims/call functions in stack.h.
+static iree_status_t call_shim_i32_i32(iree_vm_stack_t* stack,
+ const iree_vm_function_call_t* call,
+ call_i32_i32_t target_fn, void* module,
+ void* module_state,
+ iree_vm_execution_result_t* out_result) {
+ // We can use structs to allow compiler-controlled indexing optimizations,
+ // though this won't work for variadic cases.
+ // TODO(benvanik): packed attributes.
+ typedef struct {
+ int32_t arg0;
+ } args_t;
+ typedef struct {
+ int32_t ret0;
+ } results_t;
+
+ const args_t* args = (const args_t*)call->arguments.data;
+ results_t* results = (results_t*)call->results.data;
+
+ // For simple cases like this (zero or 1 result) we can tail-call.
+ return target_fn(stack, module, module_state, args->arg0, &results->ret0);
+}
+
+//===----------------------------------------------------------------------===//
+// module_a
+//===----------------------------------------------------------------------===//
+// This simple stateless module exports two functions that can be imported by
+// other modules or called directly by the user. When no imports, custom types,
+// or per-context state is required this simplifies module definitions.
+//
+// module_b below imports these functions and demonstrates a more complex module
+// with state.
+
+typedef struct module_a_t module_a_t;
+typedef struct module_a_state_t module_a_state_t;
+
+// vm.import @module_a.add_1(%arg0 : i32) -> i32
+static iree_status_t module_a_add_1(iree_vm_stack_t* stack, module_a_t* module,
+ module_a_state_t* module_state,
+ int32_t arg0, int32_t* out_ret0) {
+ // Add 1 to arg0 and return.
+ *out_ret0 = arg0 + 1;
+ return iree_ok_status();
+}
+
+// vm.import @module_a.sub_1(%arg0 : i32) -> i32
+static iree_status_t module_a_sub_1(iree_vm_stack_t* stack, module_a_t* module,
+ module_a_state_t* module_state,
+ int32_t arg0, int32_t* out_ret0) {
+ // Sub 1 to arg0 and return. Fail if < 0.
+ *out_ret0 = arg0 - 1;
+ return iree_ok_status();
+}
+
+static const iree_vm_native_export_descriptor_t module_a_exports_[] = {
+ {iree_make_cstring_view("add_1"), iree_make_cstring_view("0i_i"), 0, NULL},
+ {iree_make_cstring_view("sub_1"), iree_make_cstring_view("0i_i"), 0, NULL},
+};
+static const iree_vm_native_function_ptr_t module_a_funcs_[] = {
+ {(iree_vm_native_function_shim_t)call_shim_i32_i32,
+ (iree_vm_native_function_target_t)module_a_add_1},
+ {(iree_vm_native_function_shim_t)call_shim_i32_i32,
+ (iree_vm_native_function_target_t)module_a_sub_1},
+};
+static_assert(IREE_ARRAYSIZE(module_a_funcs_) ==
+ IREE_ARRAYSIZE(module_a_exports_),
+ "function pointer table must be 1:1 with exports");
+static const iree_vm_native_module_descriptor_t module_a_descriptor_ = {
+ iree_make_cstring_view("module_a"),
+ 0,
+ NULL,
+ IREE_ARRAYSIZE(module_a_exports_),
+ module_a_exports_,
+ IREE_ARRAYSIZE(module_a_funcs_),
+ module_a_funcs_,
+ 0,
+ NULL,
+};
+
+static iree_status_t module_a_create(iree_allocator_t allocator,
+ iree_vm_module_t** out_module) {
+ // NOTE: this module has neither shared or per-context module state.
+ iree_vm_module_t interface;
+ IREE_RETURN_IF_ERROR(iree_vm_module_initialize(&interface, NULL));
+ return iree_vm_native_module_create(&interface, &module_a_descriptor_,
+ allocator, out_module);
+}
+
+//===----------------------------------------------------------------------===//
+// module_b
+//===----------------------------------------------------------------------===//
+// A more complex module that holds state for resolved types (shared across
+// all instances), imported functions (stored per-context), per-context user
+// data, and reflection metadata.
+
+typedef struct module_b_t module_b_t;
+typedef struct module_b_state_t module_b_state_t;
+
+// Stores shared state across all instances of the module.
+// This should generally be treated as read-only and if mutation is possible
+// then users must synchronize themselves.
+typedef struct module_b_t {
+ // Allocator the module must be freed with and that can be used for any other
+ // shared dynamic allocations.
+ iree_allocator_t allocator;
+ // Resolved types; these never change once queried and are safe to store on
+ // the shared structure to avoid needing to look them up again.
+ const iree_vm_ref_type_descriptor_t* types[1];
+} module_b_t;
+
+// Stores per-context state; at the minimum imports, but possibly other user
+// state data. No synchronization is required as the VM will not call functions
+// with the same state from multiple threads concurrently.
+typedef struct module_b_state_t {
+ // Allocator the state must be freed with and that can be used for any other
+ // per-context dynamic allocations.
+ iree_allocator_t allocator;
+ // Resolved import functions matching 1:1 with the module import descriptors.
+ iree_vm_function_t imports[2];
+ // Example user data stored per-state.
+ int counter;
+} module_b_state_t;
+
+// Frees the shared module; by this point all per-context states have been
+// freed and no more shared data is required.
+static void IREE_API_PTR module_b_destroy(void* self) {
+ module_b_t* module = (module_b_t*)self;
+ iree_allocator_free(module->allocator, module);
+}
+
+// Allocates per-context state, which stores resolved import functions and any
+// other non-shared user state.
+static iree_status_t IREE_API_PTR
+module_b_alloc_state(void* self, iree_allocator_t allocator,
+ iree_vm_module_state_t** out_module_state) {
+ module_b_state_t* state = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, sizeof(*state), (void**)&state));
+ memset(state, 0, sizeof(*state));
+ state->allocator = allocator;
+ *out_module_state = (iree_vm_module_state_t*)state;
+ return iree_ok_status();
+}
+
+// Frees the per-context state.
+static void IREE_API_PTR
+module_b_free_state(void* self, iree_vm_module_state_t* module_state) {
+ module_b_state_t* state = (module_b_state_t*)module_state;
+ iree_allocator_free(state->allocator, state);
+}
+
+// Called once per import function so the module can store the function ref.
+static iree_status_t IREE_API_PTR module_b_resolve_import(
+ void* self, iree_vm_module_state_t* module_state, iree_host_size_t ordinal,
+ const iree_vm_function_t* function,
+ const iree_vm_function_signature_t* signature) {
+ module_b_state_t* state = (module_b_state_t*)module_state;
+ state->imports[ordinal] = *function;
+ return iree_ok_status();
+}
+
+// Our actual function. Here we directly access the registers but one could also
+// use this as a trampoline into user code with a native signature (such as
+// fetching the args, calling the function as a normal C function, and stashing
+// back the results).
+//
+// vm.import @module_b.entry(%arg0 : i32) -> i32
+static iree_status_t module_b_entry(iree_vm_stack_t* stack, module_b_t* module,
+ module_b_state_t* module_state,
+ int32_t arg0, int32_t* out_ret0) {
+ // NOTE: if we needed to use ref types here we have them under module->types.
+ assert(module->types[0]);
+
+ // Call module_a.add_1.
+ IREE_RETURN_IF_ERROR(
+ call_import_i32_i32(stack, &module_state->imports[0], arg0, &arg0));
+
+ // Increment per-context state (persists across calls). No need for a mutex as
+ // only one thread can be using the per-context state at a time.
+ module_state->counter += arg0;
+ int32_t ret0 = module_state->counter;
+
+ // Call module_a.sub_1.
+ IREE_RETURN_IF_ERROR(
+ call_import_i32_i32(stack, &module_state->imports[1], ret0, &ret0));
+
+ *out_ret0 = ret0;
+ return iree_ok_status();
+}
+
+// Table of exported function pointers. Note that this table could be read-only
+// (like here) or shared/per-context to allow exposing different functions based
+// on versions, access rights, etc.
+static const iree_vm_native_function_ptr_t module_b_funcs_[] = {
+ {(iree_vm_native_function_shim_t)call_shim_i32_i32,
+ (iree_vm_native_function_target_t)module_b_entry},
+};
+
+static const iree_vm_native_import_descriptor_t module_b_imports_[] = {
+ {IREE_VM_NATIVE_IMPORT_REQUIRED, iree_make_cstring_view("module_a.add_1")},
+ {IREE_VM_NATIVE_IMPORT_REQUIRED, iree_make_cstring_view("module_a.sub_1")},
+};
+static_assert(IREE_ARRAYSIZE(module_b_state_t::imports) ==
+ IREE_ARRAYSIZE(module_b_imports_),
+ "import storage must be able to hold all imports");
+static const iree_vm_reflection_attr_t module_b_entry_attrs_[] = {
+ {iree_make_cstring_view("key1"), iree_make_cstring_view("value1")},
+};
+static const iree_vm_native_export_descriptor_t module_b_exports_[] = {
+ {iree_make_cstring_view("entry"), iree_make_cstring_view("0i_i"),
+ IREE_ARRAYSIZE(module_b_entry_attrs_), module_b_entry_attrs_},
+};
+static_assert(IREE_ARRAYSIZE(module_b_funcs_) ==
+ IREE_ARRAYSIZE(module_b_exports_),
+ "function pointer table must be 1:1 with exports");
+static const iree_vm_native_module_descriptor_t module_b_descriptor_ = {
+ iree_make_cstring_view("module_b"),
+ IREE_ARRAYSIZE(module_b_imports_),
+ module_b_imports_,
+ IREE_ARRAYSIZE(module_b_exports_),
+ module_b_exports_,
+ IREE_ARRAYSIZE(module_b_funcs_),
+ module_b_funcs_,
+ 0,
+ NULL,
+};
+
+static iree_status_t module_b_create(iree_allocator_t allocator,
+ iree_vm_module_t** out_module) {
+ // Allocate shared module state.
+ module_b_t* module = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, sizeof(*module), (void**)&module));
+ memset(module, 0, sizeof(*module));
+ module->allocator = allocator;
+
+ // Resolve types used by the module once so that we can share it across all
+ // instances of the module.
+ module->types[0] =
+ iree_vm_ref_lookup_registered_type(iree_make_cstring_view("vm.buffer"));
+ if (!module->types[0]) {
+ iree_allocator_free(allocator, module);
+ return iree_make_status(
+ IREE_STATUS_NOT_FOUND,
+ "required type vm.buffer not registered with the type system");
+ }
+
+ // Setup the interface with the functions we implement ourselves. Any function
+ // we omit will be handled by the base native module.
+ iree_vm_module_t interface;
+ iree_status_t status = iree_vm_module_initialize(&interface, module);
+ if (!iree_status_is_ok(status)) {
+ iree_allocator_free(allocator, module);
+ return status;
+ }
+ interface.destroy = module_b_destroy;
+ interface.alloc_state = module_b_alloc_state;
+ interface.free_state = module_b_free_state;
+ interface.resolve_import = module_b_resolve_import;
+ return iree_vm_native_module_create(&interface, &module_b_descriptor_,
+ allocator, out_module);
+}
diff --git a/runtime/src/iree/vm/ops.h b/runtime/src/iree/vm/ops.h
new file mode 100644
index 0000000..8395f83
--- /dev/null
+++ b/runtime/src/iree/vm/ops.h
@@ -0,0 +1,365 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_OPS_H_
+#define IREE_VM_OPS_H_
+
+#include <math.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/vm/value.h"
+
+//===------------------------------------------------------------------===//
+// Globals
+//===------------------------------------------------------------------===//
+
+static inline int32_t vm_global_load_i32(uint8_t* base, uint32_t byte_offset) {
+ const int32_t* global_ptr = (const int32_t*)(base + byte_offset);
+ return *global_ptr;
+}
+
+static inline void vm_global_store_i32(uint8_t* base, uint32_t byte_offset,
+ int32_t value) {
+ int32_t* global_ptr = (int32_t*)(base + byte_offset);
+ *global_ptr = value;
+}
+
+//===------------------------------------------------------------------===//
+// Conditional assignment
+//===------------------------------------------------------------------===//
+
+static inline int32_t vm_select_i32(int32_t condition, int32_t true_value,
+ int32_t false_value) {
+ return condition ? true_value : false_value;
+}
+
+//===------------------------------------------------------------------===//
+// Native integer arithmetic
+//===------------------------------------------------------------------===//
+
+static inline int32_t vm_add_i32(int32_t lhs, int32_t rhs) { return lhs + rhs; }
+static inline int32_t vm_sub_i32(int32_t lhs, int32_t rhs) { return lhs - rhs; }
+static inline int32_t vm_mul_i32(int32_t lhs, int32_t rhs) { return lhs * rhs; }
+static inline int32_t vm_div_i32s(int32_t lhs, int32_t rhs) {
+ return lhs / rhs;
+}
+static inline int32_t vm_div_i32u(int32_t lhs, int32_t rhs) {
+ return (int32_t)(((uint32_t)lhs) / ((uint32_t)rhs));
+}
+static inline int32_t vm_rem_i32s(int32_t lhs, int32_t rhs) {
+ return lhs % rhs;
+}
+static inline int32_t vm_rem_i32u(int32_t lhs, int32_t rhs) {
+ return (int32_t)(((uint32_t)lhs) % ((uint32_t)rhs));
+}
+static inline int32_t vm_fma_i32(int32_t a, int32_t b, int32_t c) {
+ return a * b + c;
+}
+static inline int32_t vm_not_i32(int32_t operand) {
+ return (int32_t)(~((uint32_t)operand));
+}
+static inline int32_t vm_and_i32(int32_t lhs, int32_t rhs) { return lhs & rhs; }
+static inline int32_t vm_or_i32(int32_t lhs, int32_t rhs) { return lhs | rhs; }
+static inline int32_t vm_xor_i32(int32_t lhs, int32_t rhs) { return lhs ^ rhs; }
+
+//===------------------------------------------------------------------===//
+// Casting and type conversion/emulation
+//===------------------------------------------------------------------===//
+
+static inline int32_t vm_trunc_i32i8(int32_t operand) {
+ return (uint8_t)((uint32_t)operand);
+}
+static inline int32_t vm_trunc_i32i16(int32_t operand) {
+ return (uint16_t)((uint32_t)operand);
+}
+static inline int32_t vm_ext_i8i32s(int32_t operand) {
+ return (int32_t)((int8_t)operand);
+}
+static inline int32_t vm_ext_i8i32u(int32_t operand) {
+ return (uint32_t)((uint8_t)operand);
+}
+static inline int32_t vm_ext_i16i32s(int32_t operand) {
+ return (int32_t)((int16_t)operand);
+}
+static inline int32_t vm_ext_i16i32u(int32_t operand) {
+ return (uint32_t)((uint16_t)operand);
+}
+
+//===------------------------------------------------------------------===//
+// Native bitwise shifts and rotates
+//===------------------------------------------------------------------===//
+
+static inline int32_t vm_shl_i32(int32_t operand, int32_t amount) {
+ amount &= 0x1F;
+ return (int32_t)(operand << amount);
+}
+static inline int32_t vm_shr_i32s(int32_t operand, int32_t amount) {
+ amount &= 0x1F;
+ return (int32_t)(operand >> amount);
+}
+static inline int32_t vm_shr_i32u(int32_t operand, int32_t amount) {
+ amount &= 0x1F;
+ return (int32_t)(((uint32_t)operand) >> amount);
+}
+
+//===------------------------------------------------------------------===//
+// Comparison ops
+//===------------------------------------------------------------------===//
+
+static inline int32_t vm_cmp_eq_i32(int32_t lhs, int32_t rhs) {
+ return (lhs == rhs) ? 1 : 0;
+}
+static inline int32_t vm_cmp_ne_i32(int32_t lhs, int32_t rhs) {
+ return (lhs != rhs) ? 1 : 0;
+}
+static inline int32_t vm_cmp_lt_i32s(int32_t lhs, int32_t rhs) {
+ return (lhs < rhs) ? 1 : 0;
+}
+static inline int32_t vm_cmp_lt_i32u(int32_t lhs, int32_t rhs) {
+ return (((uint32_t)lhs) < ((uint32_t)rhs)) ? 1 : 0;
+}
+static inline int32_t vm_cmp_nz_i32(int32_t operand) {
+ return (operand != 0) ? 1 : 0;
+}
+static inline int32_t vm_cmp_eq_ref(iree_vm_ref_t* lhs, iree_vm_ref_t* rhs) {
+ return iree_vm_ref_equal(lhs, rhs) ? 1 : 0;
+}
+static inline int32_t vm_cmp_ne_ref(iree_vm_ref_t* lhs, iree_vm_ref_t* rhs) {
+ return (!iree_vm_ref_equal(lhs, rhs)) ? 1 : 0;
+}
+static inline int32_t vm_cmp_nz_ref(iree_vm_ref_t* operand) {
+ return (operand->ptr != NULL) ? 1 : 0;
+}
+
+//===------------------------------------------------------------------===//
+// ExtI64: Globals
+//===------------------------------------------------------------------===//
+
+static inline int64_t vm_global_load_i64(uint8_t* base, uint32_t byte_offset) {
+ const int64_t* global_ptr = (const int64_t*)(base + byte_offset);
+ return *global_ptr;
+}
+
+static inline void vm_global_store_i64(uint8_t* base, uint32_t byte_offset,
+ int64_t value) {
+ int64_t* global_ptr = (int64_t*)(base + byte_offset);
+ *global_ptr = value;
+}
+
+//===------------------------------------------------------------------===//
+// ExtI64: Conditional assignment
+//===------------------------------------------------------------------===//
+
+static inline int64_t vm_select_i64(int32_t condition, int64_t true_value,
+ int64_t false_value) {
+ return condition ? true_value : false_value;
+}
+
+//===------------------------------------------------------------------===//
+// ExtI64: Native integer arithmetic ops
+//===------------------------------------------------------------------===//
+
+static inline int64_t vm_add_i64(int64_t lhs, int64_t rhs) { return lhs + rhs; }
+static inline int64_t vm_sub_i64(int64_t lhs, int64_t rhs) { return lhs - rhs; }
+static inline int64_t vm_mul_i64(int64_t lhs, int64_t rhs) { return lhs * rhs; }
+static inline int64_t vm_div_i64s(int64_t lhs, int64_t rhs) {
+ return lhs / rhs;
+}
+static inline int64_t vm_div_i64u(int64_t lhs, int64_t rhs) {
+ return (int64_t)(((uint64_t)lhs) / ((uint64_t)rhs));
+}
+static inline int64_t vm_rem_i64s(int64_t lhs, int64_t rhs) {
+ return lhs % rhs;
+}
+static inline int64_t vm_rem_i64u(int64_t lhs, int64_t rhs) {
+ return (int64_t)(((uint64_t)lhs) % ((uint64_t)rhs));
+}
+static inline int64_t vm_fma_i64(int64_t a, int64_t b, int64_t c) {
+ return a * b + c;
+}
+static inline int64_t vm_not_i64(int64_t operand) {
+ return (int64_t)(~((uint64_t)operand));
+}
+static inline int64_t vm_and_i64(int64_t lhs, int64_t rhs) { return lhs & rhs; }
+static inline int64_t vm_or_i64(int64_t lhs, int64_t rhs) { return lhs | rhs; }
+static inline int64_t vm_xor_i64(int64_t lhs, int64_t rhs) { return lhs ^ rhs; }
+
+//===------------------------------------------------------------------===//
+// ExtI64: Casting and type conversion/emulation
+//===------------------------------------------------------------------===//
+
+static inline int32_t vm_trunc_i64i32(int64_t operand) {
+ return (uint32_t)((uint64_t)operand);
+}
+static inline int64_t vm_ext_i32i64s(int32_t operand) {
+ return (int64_t)((int32_t)operand);
+}
+static inline int64_t vm_ext_i32i64u(int32_t operand) {
+ return (uint64_t)((uint32_t)operand);
+}
+
+//===------------------------------------------------------------------===//
+// ExtI64: Native bitwise shifts and rotates
+//===------------------------------------------------------------------===//
+
+static inline int64_t vm_shl_i64(int64_t operand, int32_t amount) {
+ amount &= 0x3F;
+ return (int64_t)(operand << amount);
+}
+static inline int64_t vm_shr_i64s(int64_t operand, int32_t amount) {
+ amount &= 0x3F;
+ return (int64_t)(operand >> amount);
+}
+static inline int64_t vm_shr_i64u(int64_t operand, int32_t amount) {
+ amount &= 0x3F;
+ return (int64_t)(((uint64_t)operand) >> amount);
+}
+
+//===------------------------------------------------------------------===//
+// ExtI64: Comparison ops
+//===------------------------------------------------------------------===//
+
+static inline int32_t vm_cmp_eq_i64(int64_t lhs, int64_t rhs) {
+ return (lhs == rhs) ? 1 : 0;
+}
+static inline int32_t vm_cmp_ne_i64(int64_t lhs, int64_t rhs) {
+ return (lhs != rhs) ? 1 : 0;
+}
+static inline int32_t vm_cmp_lt_i64s(int64_t lhs, int64_t rhs) {
+ return (lhs < rhs) ? 1 : 0;
+}
+static inline int32_t vm_cmp_lt_i64u(int64_t lhs, int64_t rhs) {
+ return (((uint64_t)lhs) < ((uint64_t)rhs)) ? 1 : 0;
+}
+static inline int32_t vm_cmp_nz_i64(int64_t operand) {
+ return (operand != 0) ? 1 : 0;
+}
+
+//===------------------------------------------------------------------===//
+// ExtF32: Globals
+//===------------------------------------------------------------------===//
+
+static inline float vm_global_load_f32(uint8_t* base, uint32_t byte_offset) {
+ const float* global_ptr = (const float*)(base + byte_offset);
+ return *global_ptr;
+}
+
+static inline void vm_global_store_f32(uint8_t* base, uint32_t byte_offset,
+ float value) {
+ float* global_ptr = (float*)(base + byte_offset);
+ *global_ptr = value;
+}
+
+//===------------------------------------------------------------------===//
+// ExtF32: Conditional assignment
+//===------------------------------------------------------------------===//
+
+static inline float vm_select_f32(int32_t condition, float true_value,
+ float false_value) {
+ return condition ? true_value : false_value;
+}
+
+//===------------------------------------------------------------------===//
+// ExtF32: Native floating-point arithmetic
+//===------------------------------------------------------------------===//
+
+static inline float vm_add_f32(float lhs, float rhs) { return lhs + rhs; }
+static inline float vm_sub_f32(float lhs, float rhs) { return lhs - rhs; }
+static inline float vm_mul_f32(float lhs, float rhs) { return lhs * rhs; }
+static inline float vm_div_f32(float lhs, float rhs) { return lhs / rhs; }
+static inline float vm_rem_f32(float lhs, float rhs) {
+ return remainderf(lhs, rhs);
+}
+static inline float vm_fma_f32(float a, float b, float c) {
+#ifdef FP_FAST_FMAF
+ return fmaf(a, b, c);
+#else
+ return a * b + c;
+#endif // FP_FAST_FMAF
+}
+static inline float vm_abs_f32(float operand) { return fabsf(operand); }
+static inline float vm_neg_f32(float operand) { return -operand; }
+static inline float vm_ceil_f32(float operand) { return ceilf(operand); }
+static inline float vm_floor_f32(float operand) { return floorf(operand); }
+
+static inline float vm_atan_f32(float operand) { return atanf(operand); }
+static inline float vm_atan2_f32(float y, float x) { return atan2f(y, x); }
+static inline float vm_cos_f32(float operand) { return cosf(operand); }
+static inline float vm_sin_f32(float operand) { return sinf(operand); }
+static inline float vm_exp_f32(float operand) { return expf(operand); }
+static inline float vm_exp2_f32(float operand) { return exp2f(operand); }
+static inline float vm_expm1_f32(float operand) { return expm1f(operand); }
+static inline float vm_log_f32(float operand) { return logf(operand); }
+static inline float vm_log10_f32(float operand) { return log10f(operand); }
+static inline float vm_log1p_f32(float operand) { return log1pf(operand); }
+static inline float vm_log2_f32(float operand) { return log2f(operand); }
+static inline float vm_pow_f32(float b, float e) { return powf(b, e); }
+static inline float vm_rsqrt_f32(float operand) {
+ return 1.0f / sqrtf(operand);
+}
+static inline float vm_sqrt_f32(float operand) { return sqrtf(operand); }
+static inline float vm_tanh_f32(float operand) { return tanhf(operand); }
+static inline float vm_erf_f32(float operand) { return erff(operand); }
+
+//===------------------------------------------------------------------===//
+// ExtF32: Casting and type conversion/emulation
+//===------------------------------------------------------------------===//
+
+static inline float vm_cast_si32f32(int32_t operand) { return (float)operand; }
+static inline float vm_cast_ui32f32(int32_t operand) {
+ return (float)(uint32_t)operand;
+}
+static inline int32_t vm_cast_f32si32(float operand) {
+ return (int32_t)lroundf(operand);
+}
+static inline int32_t vm_cast_f32ui32(float operand) {
+ return (uint32_t)lroundf(operand);
+}
+static inline float vm_bitcast_i32f32(int32_t operand) {
+ float result;
+ memcpy(&result, &operand, sizeof(result));
+ return result;
+}
+static inline int32_t vm_bitcast_f32i32(float operand) {
+ int32_t result;
+ memcpy(&result, &operand, sizeof(result));
+ return result;
+}
+
+//===------------------------------------------------------------------===//
+// ExtF32: Comparison ops
+//===------------------------------------------------------------------===//
+
+static inline int32_t vm_cmp_eq_f32o(float lhs, float rhs) {
+ return (lhs == rhs) ? 1 : 0;
+}
+static inline int32_t vm_cmp_eq_f32u(float lhs, float rhs) {
+ return (isunordered(lhs, rhs) || (lhs == rhs)) ? 1 : 0;
+}
+static inline int32_t vm_cmp_ne_f32o(float lhs, float rhs) {
+ return (lhs != rhs) ? 1 : 0;
+}
+static inline int32_t vm_cmp_ne_f32u(float lhs, float rhs) {
+ return (isunordered(lhs, rhs) || (lhs != rhs)) ? 1 : 0;
+}
+static inline int32_t vm_cmp_lt_f32o(float lhs, float rhs) {
+ return isless(lhs, rhs) ? 1 : 0;
+}
+static inline int32_t vm_cmp_lt_f32u(float lhs, float rhs) {
+ return (isunordered(lhs, rhs) || isless(lhs, rhs)) ? 1 : 0;
+}
+static inline int32_t vm_cmp_lte_f32o(float lhs, float rhs) {
+ return islessequal(lhs, rhs) ? 1 : 0;
+}
+static inline int32_t vm_cmp_lte_f32u(float lhs, float rhs) {
+ return (isunordered(lhs, rhs) || islessequal(lhs, rhs)) ? 1 : 0;
+}
+static inline int32_t vm_cmp_nan_f32(float operand) {
+ return isnan(operand) ? 1 : 0;
+}
+
+#endif // IREE_VM_OPS_H_
diff --git a/runtime/src/iree/vm/ops_emitc.h b/runtime/src/iree/vm/ops_emitc.h
new file mode 100644
index 0000000..7ed52a0
--- /dev/null
+++ b/runtime/src/iree/vm/ops_emitc.h
@@ -0,0 +1,64 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_OPS_EMITC_H_
+#define IREE_VM_OPS_EMITC_H_
+
+// This file contains utility macros used for things that EmitC can't handle
+// directly.
+
+// Assign a value through a pointer variable
+#define EMITC_DEREF_ASSIGN_VALUE(ptr, value) *(ptr) = (value)
+
+// Assign a value pointed to by `ptr` through a pointer variable
+#define EMITC_DEREF_ASSIGN_PTR(ptr, value) *(ptr) = *(value)
+
+// Access a member of a struct
+#define EMITC_STRUCT_MEMBER(struct, member) (struct).member
+
+// Access the address of a member of a struct
+#define EMITC_STRUCT_MEMBER_ADDRESS(struct, member) &(struct).member
+
+// Assign a value to a member of a struct
+#define EMITC_STRUCT_MEMBER_ASSIGN(struct, member, value) \
+ (struct).member = (value)
+
+// Access a member of a pointer to a struct
+#define EMITC_STRUCT_PTR_MEMBER(struct, member) (struct)->member
+
+// Call a function pointer of a pointer to a struct with the given arguments
+#define EMITC_STRUCT_PTR_MEMBER_CALL(struct, member, ...) \
+ (struct)->member(__VA_ARGS__)
+
+// Access the address of a member of a pointer to a struct
+#define EMITC_STRUCT_PTR_MEMBER_ADDRESS(struct, member) &(struct)->member
+
+// Assign a value to a member of a pointer to a struct
+#define EMITC_STRUCT_PTR_MEMBER_ASSIGN(struct, member, value) \
+ (struct)->member = (value)
+
+// Create a typdef struct
+#define EMITC_TYPEDEF_STRUCT(typename, body) \
+ typedef struct { \
+ body \
+ } typename;
+
+// Get the address of an array element
+#define EMITC_ARRAY_ELEMENT_ADDRESS(array, index) &(array)[index]
+
+// Unary operations
+#define EMITC_CAST(arg, type) ((type)(arg))
+#define EMITC_NOT(arg) (!(arg))
+
+// Binary operations
+#define EMITC_AND(lhs, rhs) ((lhs) && (rhs))
+#define EMITC_EQ(lhs, rhs) ((lhs) == (rhs))
+#define EMITC_NE(lhs, rhs) ((lhs) != (rhs))
+#define EMITC_OR(lhs, rhs) ((lhs) || (rhs))
+
+#define EMITC_ADD(lhs, rhs) ((lhs) + (rhs))
+
+#endif // IREE_VM_OPS_EMITC_H_
diff --git a/runtime/src/iree/vm/ref.c b/runtime/src/iree/vm/ref.c
new file mode 100644
index 0000000..7c2a966
--- /dev/null
+++ b/runtime/src/iree/vm/ref.c
@@ -0,0 +1,272 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/ref.h"
+
+#include <string.h>
+
+#include "iree/base/internal/atomics.h"
+
+// TODO(benvanik): dynamic, if we care - otherwise keep small.
+// After a dozen or so types the linear scan will likely start to spill the
+// DCACHE and need to be reworked. I suspect at the time we have >=64 types
+// we'll want to rewrite all of this anyway (using externalized type ID storage
+// or something more complex).
+#define IREE_VM_MAX_TYPE_ID 64
+
+static inline volatile iree_atomic_ref_count_t* iree_vm_get_raw_counter_ptr(
+ void* ptr, const iree_vm_ref_type_descriptor_t* type_descriptor) {
+ return (volatile iree_atomic_ref_count_t*)(((uintptr_t)(ptr)) +
+ type_descriptor->offsetof_counter);
+}
+
+static inline volatile iree_atomic_ref_count_t* iree_vm_get_ref_counter_ptr(
+ iree_vm_ref_t* ref) {
+ return (volatile iree_atomic_ref_count_t*)(((uintptr_t)ref->ptr) +
+ ref->offsetof_counter);
+}
+
+IREE_API_EXPORT void iree_vm_ref_object_retain(
+ void* ptr, const iree_vm_ref_type_descriptor_t* type_descriptor) {
+ if (!ptr) return;
+ volatile iree_atomic_ref_count_t* counter =
+ iree_vm_get_raw_counter_ptr(ptr, type_descriptor);
+ iree_atomic_ref_count_inc(counter);
+}
+
+IREE_API_EXPORT void iree_vm_ref_object_release(
+ void* ptr, const iree_vm_ref_type_descriptor_t* type_descriptor) {
+ if (!ptr) return;
+ volatile iree_atomic_ref_count_t* counter =
+ iree_vm_get_raw_counter_ptr(ptr, type_descriptor);
+ if (iree_atomic_ref_count_dec(counter) == 1) {
+ if (type_descriptor->destroy) {
+ // NOTE: this makes us not re-entrant, but I think that's OK.
+ type_descriptor->destroy(ptr);
+ }
+ }
+}
+
+// A table of type descriptors registered at startup.
+// These provide quick dereferencing of destruction functions and type names for
+// debugging. Note that this just points to registered descriptors (or NULL) for
+// each type ID in the type range and does not own the descriptors.
+//
+// Note that [0] is always the NULL type and has a NULL descriptor. We don't
+// allow types to be registered there.
+static const iree_vm_ref_type_descriptor_t*
+ iree_vm_ref_type_descriptors[IREE_VM_MAX_TYPE_ID] = {0};
+
+// Returns the type descriptor (or NULL) for the given type ID.
+static const iree_vm_ref_type_descriptor_t* iree_vm_ref_get_type_descriptor(
+ iree_vm_ref_type_t type) {
+ if (type >= IREE_VM_MAX_TYPE_ID) {
+ return NULL;
+ }
+ return iree_vm_ref_type_descriptors[type];
+}
+
+IREE_API_EXPORT iree_status_t
+iree_vm_ref_register_type(iree_vm_ref_type_descriptor_t* descriptor) {
+ for (int i = 1; i <= IREE_VM_MAX_TYPE_ID; ++i) {
+ if (!iree_vm_ref_type_descriptors[i]) {
+ iree_vm_ref_type_descriptors[i] = descriptor;
+ descriptor->type = i;
+ return iree_ok_status();
+ }
+ }
+ // Too many user-defined types registered; need to increase
+ // IREE_VM_MAX_TYPE_ID.
+ return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "too many user-defined types registered; new type "
+ "would exceed maximum of %d",
+ IREE_VM_MAX_TYPE_ID);
+}
+
+IREE_API_EXPORT iree_string_view_t
+iree_vm_ref_type_name(iree_vm_ref_type_t type) {
+ if (type == 0 || type >= IREE_VM_MAX_TYPE_ID) {
+ return iree_string_view_empty();
+ }
+ return iree_vm_ref_type_descriptors[type]->type_name;
+}
+
+IREE_API_EXPORT const iree_vm_ref_type_descriptor_t*
+iree_vm_ref_lookup_registered_type(iree_string_view_t full_name) {
+ for (int i = 1; i <= IREE_VM_MAX_TYPE_ID; ++i) {
+ if (!iree_vm_ref_type_descriptors[i]) break;
+ if (iree_string_view_equal(iree_vm_ref_type_descriptors[i]->type_name,
+ full_name)) {
+ return iree_vm_ref_type_descriptors[i];
+ }
+ }
+ return NULL;
+}
+
+// Useful debugging tool:
+#if 0
+static void iree_vm_ref_trace(const char* msg, iree_vm_ref_t* ref) {
+ volatile iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref);
+ iree_string_view_t name = iree_vm_ref_type_name(ref->type);
+ fprintf(stderr, "%s %.*s 0x%p %d\n", msg, (int)name.size, name.data, ref->ptr,
+ counter->__val);
+}
+#else
+#define iree_vm_ref_trace(...)
+#endif // 0
+
+IREE_API_EXPORT iree_status_t iree_vm_ref_wrap_assign(void* ptr,
+ iree_vm_ref_type_t type,
+ iree_vm_ref_t* out_ref) {
+ const iree_vm_ref_type_descriptor_t* type_descriptor =
+ iree_vm_ref_get_type_descriptor(type);
+ if (!type_descriptor) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "type not registered");
+ }
+
+ if (out_ref->ptr != NULL && out_ref->ptr != ptr) {
+ // Release existing value.
+ iree_vm_ref_release(out_ref);
+ }
+
+ // NOTE: we do not manipulate the counter here as we assume it starts at 1
+ // or it's already coming in with some references.
+ out_ref->ptr = ptr;
+ out_ref->offsetof_counter = type_descriptor->offsetof_counter;
+ out_ref->type = type;
+
+ iree_vm_ref_trace("WRAP ASSIGN", out_ref);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_ref_wrap_retain(void* ptr,
+ iree_vm_ref_type_t type,
+ iree_vm_ref_t* out_ref) {
+ IREE_RETURN_IF_ERROR(iree_vm_ref_wrap_assign(ptr, type, out_ref));
+ if (out_ref->ptr) {
+ volatile iree_atomic_ref_count_t* counter =
+ iree_vm_get_ref_counter_ptr(out_ref);
+ iree_atomic_ref_count_inc(counter);
+ iree_vm_ref_trace("WRAP RETAIN", out_ref);
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT void iree_vm_ref_retain(iree_vm_ref_t* ref,
+ iree_vm_ref_t* out_ref) {
+ // NOTE: ref and out_ref may alias or be nested so we retain before we
+ // potentially release.
+ iree_vm_ref_t temp_ref = *ref;
+ if (ref->ptr) {
+ volatile iree_atomic_ref_count_t* counter =
+ iree_vm_get_ref_counter_ptr(ref);
+ iree_atomic_ref_count_inc(counter);
+ iree_vm_ref_trace("RETAIN", ref);
+ }
+ if (out_ref->ptr) {
+ // Output ref contains a value that should be released first.
+ // Note that we check above for it being the same as the new value so we
+ // don't do extra work unless we have to.
+ iree_vm_ref_release(out_ref);
+ }
+ *out_ref = temp_ref;
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_ref_retain_checked(
+ iree_vm_ref_t* ref, iree_vm_ref_type_t type, iree_vm_ref_t* out_ref) {
+ if (ref->type != IREE_VM_REF_TYPE_NULL && ref->type != type) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "source ref type mismatch");
+ }
+ iree_vm_ref_retain(ref, out_ref);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT void iree_vm_ref_retain_or_move(int is_move, iree_vm_ref_t* ref,
+ iree_vm_ref_t* out_ref) {
+ if (is_move) {
+ iree_vm_ref_move(ref, out_ref);
+ } else {
+ iree_vm_ref_retain(ref, out_ref);
+ }
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_ref_retain_or_move_checked(
+ int is_move, iree_vm_ref_t* ref, iree_vm_ref_type_t type,
+ iree_vm_ref_t* out_ref) {
+ if (ref->type != IREE_VM_REF_TYPE_NULL && ref->type != type) {
+ // Make no changes on failure.
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "source ref type mismatch");
+ }
+ iree_vm_ref_retain_or_move(is_move, ref, out_ref);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT void iree_vm_ref_release(iree_vm_ref_t* ref) {
+ if (ref->type == IREE_VM_REF_TYPE_NULL || ref->ptr == NULL) return;
+
+ iree_vm_ref_trace("RELEASE", ref);
+ volatile iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref);
+ if (iree_atomic_ref_count_dec(counter) == 1) {
+ const iree_vm_ref_type_descriptor_t* type_descriptor =
+ iree_vm_ref_get_type_descriptor(ref->type);
+ if (type_descriptor->destroy) {
+ // NOTE: this makes us not re-entrant, but I think that's OK.
+ iree_vm_ref_trace("DESTROY", ref);
+ type_descriptor->destroy(ref->ptr);
+ }
+ }
+
+ // Reset ref to point at nothing.
+ memset(ref, 0, sizeof(*ref));
+}
+
+IREE_API_EXPORT void iree_vm_ref_assign(iree_vm_ref_t* ref,
+ iree_vm_ref_t* out_ref) {
+ // NOTE: ref and out_ref may alias.
+ iree_vm_ref_t temp_ref = *ref;
+ if (ref == out_ref) {
+ // Source == target; ignore entirely.
+ return;
+ } else if (out_ref->ptr != NULL) {
+ // Release existing value.
+ iree_vm_ref_release(out_ref);
+ }
+
+ // Assign ref to out_ref (without incrementing counter).
+ *out_ref = temp_ref;
+}
+
+IREE_API_EXPORT void iree_vm_ref_move(iree_vm_ref_t* ref,
+ iree_vm_ref_t* out_ref) {
+ // NOTE: ref and out_ref may alias.
+ if (ref == out_ref) {
+ // Source == target; ignore entirely.
+ return;
+ }
+
+ // Reset input ref so it points at nothing.
+ iree_vm_ref_t temp_ref = *ref;
+ memset(ref, 0, sizeof(*ref));
+
+ if (out_ref->ptr != NULL) {
+ // Release existing value.
+ iree_vm_ref_release(out_ref);
+ }
+
+ // Assign ref to out_ref (without incrementing counter).
+ *out_ref = temp_ref;
+}
+
+IREE_API_EXPORT bool iree_vm_ref_is_null(iree_vm_ref_t* ref) {
+ return ref->type == IREE_VM_REF_TYPE_NULL;
+}
+
+IREE_API_EXPORT bool iree_vm_ref_equal(iree_vm_ref_t* lhs, iree_vm_ref_t* rhs) {
+ return lhs == rhs || memcmp(lhs, rhs, sizeof(*lhs)) == 0;
+}
diff --git a/runtime/src/iree/vm/ref.h b/runtime/src/iree/vm/ref.h
new file mode 100644
index 0000000..5ee6343
--- /dev/null
+++ b/runtime/src/iree/vm/ref.h
@@ -0,0 +1,310 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_REF_H_
+#define IREE_VM_REF_H_
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/atomics.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Defines the type of the reference-counted pointer.
+// This is used to verify that operations dealing with the variant ref struct
+// are correct at runtime. We don't allow control over the ref types from the
+// VM ops and as such we can use the type specified as a safe way to avoid
+// reinterpreting memory incorrectly.
+enum iree_vm_ref_type_bits_t {
+ IREE_VM_REF_TYPE_NULL = 0,
+
+ // NOTE: these type values are assigned dynamically right now. Treat them as
+ // opaque and unstable across process invocations.
+
+ // Maximum type ID value. Type IDs are limited to 24-bits.
+ IREE_VM_REF_TYPE_MAX_VALUE = 0x00FFFFFEu,
+
+ // Wildcard type that indicates that a value may be a ref type but of an
+ // unspecified internal type.
+ IREE_VM_REF_TYPE_ANY = 0x00FFFFFFu,
+};
+typedef uint32_t iree_vm_ref_type_t;
+
+// Base for iree_vm_ref_t object targets.
+//
+// Usage (C):
+// typedef struct my_type_t {
+// iree_vm_ref_object_t ref_object;
+// int my_fields;
+// } my_type_t;
+// void my_type_destroy(void* ptr) {
+// free(ptr);
+// }
+// static iree_vm_ref_type_descriptor_t my_type_descriptor;
+// my_type_descriptor.type_name = iree_string_view_t{"my_type", 7};
+// my_type_descriptor.destroy = my_type_destroy;
+// my_type_descriptor.offsetof_counter = offsetof(my_type_t,
+// ref_object.counter);
+// iree_vm_ref_register_defined_type(&my_type_descriptor);
+//
+// Usage (C++):
+// Prefer using iree::vm::RefObject as a base type.
+typedef struct iree_vm_ref_object_t {
+ iree_atomic_ref_count_t counter;
+} iree_vm_ref_object_t;
+
+// A pointer reference to a reference-counted object.
+// The counter is stored within the target object itself ala intrusive_ptr.
+//
+// NOTE: we try to keep this small so that we aren't wasting stack space or
+// copying around too much when we pass it to functions by value. This also
+// helps make the CPU caches happier as we need no indirections to check the
+// type and adjusting the counter happens without needing to query a descriptor.
+// Ideally the iree_vm_ref_t is in-cache on the stack and the target ptr is
+// either in cache from a previous use or will be used again after manipulating
+// its ref count.
+typedef struct iree_vm_ref_t {
+ // Pointer to the object. Type is resolved based on the |type| field.
+ // Will be NULL if the reference points to nothing.
+ void* ptr;
+ // Offset from ptr, in bytes, to the start of an atomic_int32_t representing
+ // the current reference count. We store this here to avoid the need for an
+ // indirection in the (extremely common) case of just reference count inc/dec.
+ uint32_t offsetof_counter : 8;
+ // Registered type of the object pointed to by ptr.
+ iree_vm_ref_type_t type : 24;
+} iree_vm_ref_t;
+static_assert(
+ sizeof(iree_vm_ref_t) <= sizeof(void*) * 2,
+ "iree_vm_ref_t dominates stack space usage and should be kept tiny");
+
+typedef void(IREE_API_PTR* iree_vm_ref_destroy_t)(void* ptr);
+
+// Describes a type for the VM.
+typedef struct iree_vm_ref_type_descriptor_t {
+ // Function called when references of this type reach 0 and should be
+ // destroyed.
+ iree_vm_ref_destroy_t destroy;
+ // Offset from ptr, in bytes, to the start of an atomic_int32_t representing
+ // the current reference count.
+ uint32_t offsetof_counter : 8;
+ // The type ID assigned to this type from the iree_vm_ref_type_t table (or an
+ // external user source).
+ iree_vm_ref_type_t type : 24;
+ // Unretained type name that can be used for debugging.
+ iree_string_view_t type_name;
+} iree_vm_ref_type_descriptor_t;
+
+// Directly retains the object with base |ptr| with the given |type_descriptor|.
+//
+// Note that this avoids any kind of type checking; for untrusted inputs use
+// the iree_vm_ref_t-based methods.
+IREE_API_EXPORT void iree_vm_ref_object_retain(
+ void* ptr, const iree_vm_ref_type_descriptor_t* type_descriptor);
+
+// Directly release the object with base |ptr| with the given |type_descriptor|,
+// possibly destroying it if it is the last reference. Assume that |ptr| is
+// invalid after this function returns.
+//
+// Note that this avoids any kind of type checking; for untrusted inputs use
+// the iree_vm_ref_t-based methods.
+IREE_API_EXPORT void iree_vm_ref_object_release(
+ void* ptr, const iree_vm_ref_type_descriptor_t* type_descriptor);
+
+// Registers a user-defined type with the IREE C ref system.
+// The provided destroy function will be used to destroy objects when their
+// reference count goes to 0. NULL can be used to no-op the destruction if the
+// type is not owned by the VM.
+//
+// TODO(benvanik): keep names alive for user types?
+// NOTE: the name is not retained and must be kept live by the caller. Ideally
+// it is stored in static read-only memory in the binary.
+//
+// WARNING: this function is not thread-safe and should only be used at startup
+// to register the types. Do not call this while any refs may be alive.
+IREE_API_EXPORT iree_status_t
+iree_vm_ref_register_type(iree_vm_ref_type_descriptor_t* descriptor);
+
+// Returns the type name for the given type, if found.
+IREE_API_EXPORT iree_string_view_t
+iree_vm_ref_type_name(iree_vm_ref_type_t type);
+
+// Returns the registered type descriptor for the given type, if found.
+IREE_API_EXPORT const iree_vm_ref_type_descriptor_t*
+iree_vm_ref_lookup_registered_type(iree_string_view_t full_name);
+
+// Wraps a raw pointer in a iree_vm_ref_t reference and assigns it to |out_ref|.
+// |out_ref| will be released if it already contains a reference. The target
+// object will not be retained and must come in with a count >= 1.
+//
+// Usage (C):
+// my_type_t* my_type = (my_type_t*)malloc(sizeof(my_type_t));
+// my_type.ref_object.counter = IREE_ATOMIC_VAR_INIT(1);
+// iree_vm_ref_t my_ref;
+// iree_vm_ref_wrap_assign(my_type, IREE_VM_REF_TYPE_MY_TYPE, &my_ref);
+// iree_vm_ref_release(&my_ref);
+//
+// Usage (C++):
+// iree_vm_ref_t my_ref;
+// iree_vm_ref_wrap_assign(new MyType(), IREE_VM_REF_TYPE_MY_TYPE, &my_ref);
+// iree_vm_ref_release(&my_ref);
+IREE_API_EXPORT iree_status_t iree_vm_ref_wrap_assign(void* ptr,
+ iree_vm_ref_type_t type,
+ iree_vm_ref_t* out_ref);
+
+// Wraps a raw pointer in a iree_vm_ref_t reference and retains it in |out_ref|.
+// |out_ref| will be released if it already contains a reference.
+IREE_API_EXPORT iree_status_t iree_vm_ref_wrap_retain(void* ptr,
+ iree_vm_ref_type_t type,
+ iree_vm_ref_t* out_ref);
+
+// Checks that the given reference-counted pointer |ref| is of |type|.
+static inline iree_status_t iree_vm_ref_check(const iree_vm_ref_t ref,
+ iree_vm_ref_type_t type) {
+ return IREE_LIKELY(ref.type == type)
+ ? iree_ok_status()
+ : iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ ref.type == IREE_VM_REF_TYPE_NULL
+ ? "ref is null"
+ : "ref type mismatch");
+}
+
+// Retains the reference-counted pointer |ref|.
+// |out_ref| will be released if it already contains a reference.
+IREE_API_EXPORT void iree_vm_ref_retain(iree_vm_ref_t* ref,
+ iree_vm_ref_t* out_ref);
+
+// Retains the reference-counted pointer |ref| and checks that it is of |type|.
+// |out_ref| will be released if it already contains a reference.
+IREE_API_EXPORT iree_status_t iree_vm_ref_retain_checked(
+ iree_vm_ref_t* ref, iree_vm_ref_type_t type, iree_vm_ref_t* out_ref);
+
+// Retains or moves |ref| to |out_ref|.
+// |out_ref| will be released if it already contains a reference.
+IREE_API_EXPORT void iree_vm_ref_retain_or_move(int is_move, iree_vm_ref_t* ref,
+ iree_vm_ref_t* out_ref);
+
+// Retains or moves |ref| to |out_ref| and checks that |ref| is of |type|.
+// |out_ref| will be released if it already contains a reference.
+IREE_API_EXPORT iree_status_t iree_vm_ref_retain_or_move_checked(
+ int is_move, iree_vm_ref_t* ref, iree_vm_ref_type_t type,
+ iree_vm_ref_t* out_ref);
+
+// Releases the reference-counted pointer |ref|, possibly freeing it.
+IREE_API_EXPORT void iree_vm_ref_release(iree_vm_ref_t* ref);
+
+// Assigns the reference-counted pointer |ref| without incrementing the count.
+// |out_ref| will be released if it already contains a reference.
+IREE_API_EXPORT void iree_vm_ref_assign(iree_vm_ref_t* ref,
+ iree_vm_ref_t* out_ref);
+
+// Moves one reference to another without changing the reference count.
+// |out_ref| will be released if it already contains a reference.
+IREE_API_EXPORT void iree_vm_ref_move(iree_vm_ref_t* ref,
+ iree_vm_ref_t* out_ref);
+
+// Returns true if the given |ref| is NULL.
+IREE_API_EXPORT bool iree_vm_ref_is_null(iree_vm_ref_t* ref);
+
+// Returns true if the two references point at the same value (or are both
+// null).
+IREE_API_EXPORT bool iree_vm_ref_equal(iree_vm_ref_t* lhs, iree_vm_ref_t* rhs);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// Type adapter utilities for interfacing with the VM
+//===----------------------------------------------------------------------===//
+
+#ifdef __cplusplus
+namespace iree {
+namespace vm {
+template <typename T>
+struct ref_type_descriptor {
+ static const iree_vm_ref_type_descriptor_t* get();
+};
+} // namespace vm
+} // namespace iree
+#define IREE_VM_DECLARE_CC_TYPE_LOOKUP(name, T) \
+ namespace iree { \
+ namespace vm { \
+ template <> \
+ struct ref_type_descriptor<T> { \
+ static const iree_vm_ref_type_descriptor_t* get() { \
+ return name##_get_descriptor(); \
+ } \
+ }; \
+ } \
+ }
+
+#define IREE_VM_REGISTER_CC_TYPE(type, name, descriptor) \
+ descriptor.type_name = iree_make_cstring_view(name); \
+ descriptor.offsetof_counter = type::offsetof_counter(); \
+ descriptor.destroy = type::DirectDestroy; \
+ IREE_RETURN_IF_ERROR(iree_vm_ref_register_type(&descriptor));
+#else
+#define IREE_VM_DECLARE_CC_TYPE_LOOKUP(name, T)
+#define IREE_VM_REGISTER_CC_TYPE(type, name, descriptor)
+#endif // __cplusplus
+
+// TODO(benvanik): make these macros standard/document them.
+#define IREE_VM_DECLARE_TYPE_ADAPTERS(name, T) \
+ IREE_API_EXPORT iree_vm_ref_t name##_retain_ref(T* value); \
+ IREE_API_EXPORT iree_vm_ref_t name##_move_ref(T* value); \
+ IREE_API_EXPORT T* name##_deref(const iree_vm_ref_t ref); \
+ IREE_API_EXPORT iree_status_t name##_check_deref(const iree_vm_ref_t ref, \
+ T** out_ptr); \
+ IREE_API_EXPORT const iree_vm_ref_type_descriptor_t* \
+ name##_get_descriptor(); \
+ static inline bool name##_isa(const iree_vm_ref_t ref) { \
+ return name##_get_descriptor()->type == ref.type; \
+ } \
+ IREE_API_EXPORT iree_vm_ref_type_t name##_type_id(); \
+ IREE_VM_DECLARE_CC_TYPE_LOOKUP(name, T)
+
+// TODO(benvanik): make these macros standard/document them.
+#define IREE_VM_DEFINE_TYPE_ADAPTERS(name, T) \
+ IREE_API_EXPORT iree_vm_ref_t name##_retain_ref(T* value) { \
+ iree_vm_ref_t ref = {0}; \
+ iree_vm_ref_wrap_retain(value, name##_descriptor.type, &ref); \
+ return ref; \
+ } \
+ IREE_API_EXPORT iree_vm_ref_t name##_move_ref(T* value) { \
+ iree_vm_ref_t ref = {0}; \
+ iree_vm_ref_wrap_assign(value, name##_descriptor.type, &ref); \
+ return ref; \
+ } \
+ IREE_API_EXPORT T* name##_deref(const iree_vm_ref_t ref) { \
+ iree_status_t status = iree_vm_ref_check(ref, name##_descriptor.type); \
+ if (IREE_UNLIKELY(!iree_status_is_ok(status))) { \
+ IREE_IGNORE_ERROR(status); \
+ return NULL; \
+ } \
+ return (T*)ref.ptr; \
+ } \
+ IREE_API_EXPORT iree_status_t name##_check_deref(const iree_vm_ref_t ref, \
+ T** out_ptr) { \
+ IREE_RETURN_IF_ERROR(iree_vm_ref_check(ref, name##_descriptor.type)); \
+ *out_ptr = (T*)ref.ptr; \
+ return iree_ok_status(); \
+ } \
+ IREE_API_EXPORT const iree_vm_ref_type_descriptor_t* \
+ name##_get_descriptor() { \
+ return &name##_descriptor; \
+ } \
+ IREE_API_EXPORT iree_vm_ref_type_t name##_type_id() { \
+ return name##_descriptor.type; \
+ }
+
+#endif // IREE_VM_REF_H_
diff --git a/runtime/src/iree/vm/ref_cc.h b/runtime/src/iree/vm/ref_cc.h
new file mode 100644
index 0000000..ac5fe02
--- /dev/null
+++ b/runtime/src/iree/vm/ref_cc.h
@@ -0,0 +1,466 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_REF_CC_H_
+#define IREE_VM_REF_CC_H_
+
+#include <atomic>
+#include <memory>
+#include <utility>
+
+#include "iree/base/api.h"
+#include "iree/base/attributes.h"
+#include "iree/vm/ref.h"
+
+#ifndef __cplusplus
+#error "This header is meant for use with C++ implementations."
+#endif // __cplusplus
+
+namespace iree {
+namespace vm {
+
+//===----------------------------------------------------------------------===//
+// iree::vm::RefObject C++ base type equivalent of iree_vm_ref_t
+//===----------------------------------------------------------------------===//
+
+// TODO(benvanik): make this automatic for most types, or use type lookup.
+// This could be done with SFINAE to detect iree_vm_ref_object_t or RefObject
+// types. We may still need the iree_vm_ref_type_t exposed but that's relatively
+// simple compared to getting the typed retain/release functions.
+
+// Users may override this with their custom types to allow the packing code to
+// access their registered type ID at runtime.
+template <typename T>
+IREE_ATTRIBUTE_ALWAYS_INLINE void ref_type_retain(T* p) {
+ iree_vm_ref_object_retain(p, ref_type_descriptor<T>::get());
+}
+
+template <typename T>
+IREE_ATTRIBUTE_ALWAYS_INLINE void ref_type_release(T* p) {
+ iree_vm_ref_object_release(p, ref_type_descriptor<T>::get());
+}
+
+// Base class for reference counted objects.
+// Reference counted objects should be used with the iree::vm::ref<T> pointer
+// type. As reference counting can be tricky always prefer to use unique_ptr and
+// avoid this type. Only use this when unique_ptr is not possible, such as
+// when round-tripping objects through marshaling boundaries (v8/Java) or
+// any objects that may have their lifetime tied to a garbage collected
+// object.
+//
+// Subclasses should protect their dtor so that reference counting must
+// be used.
+//
+// This is designed to avoid the need for extra vtable space or for adding
+// methods to the vtable of subclasses. This differs from the boost Pointable
+// version of this object.
+// Inspiration for this comes from Peter Weinert's Dr. Dobb's article:
+// http://www.drdobbs.com/cpp/a-base-class-for-intrusively-reference-c/229218807
+//
+// RefObjects are thread safe and may be used with iree::vm::ref<T>s from
+// multiple threads.
+//
+// Subclasses may implement a custom Delete operator to handle their
+// deallocation. It should be thread safe as it may be called from any thread.
+//
+// Usage:
+// class MyRefObject : public RefObject<MyRefObject> {
+// public:
+// MyRefObject() = default;
+// // Optional; can be used to return to pool/etc - must be public:
+// static void Delete(MyRefObject* ptr) {
+// ::operator delete(ptr);
+// }
+// };
+template <class T>
+class RefObject {
+ static_assert(!std::is_array<T>::value, "T must not be an array");
+
+ // value is true if a static Delete(T*) function is present.
+ struct has_custom_deleter {
+ template <typename C>
+ static auto Test(C* p) -> decltype(C::Delete(nullptr), std::true_type());
+ template <typename>
+ static std::false_type Test(...);
+ static constexpr bool value =
+ std::is_same<std::true_type, decltype(Test<T>(nullptr))>::value;
+ };
+
+ template <typename V, bool has_custom_deleter>
+ struct delete_thunk {
+ static void Delete(V* p) {
+ auto ref_obj = static_cast<RefObject<V>*>(p);
+ int previous_count = ref_obj->counter_.fetch_sub(1);
+ if (previous_count == 1) {
+ // We delete type T pointer here to avoid the need for a virtual dtor.
+ V::Delete(p);
+ }
+ }
+ static void Destroy(V* p) { V::Delete(p); }
+ };
+
+ template <typename V>
+ struct delete_thunk<V, false> {
+ static void Delete(V* p) {
+ auto ref_obj = static_cast<RefObject<V>*>(p);
+ int previous_count = ref_obj->counter_.fetch_sub(1);
+ if (previous_count == 1) {
+ // We delete type T pointer here to avoid the need for a virtual dtor.
+ delete p;
+ }
+ }
+ static void Destroy(V* p) { delete p; }
+ };
+
+ public:
+ // Adds a reference; used by ref_ptr.
+ friend void ref_ptr_add_ref(T* p) {
+ auto ref_obj = static_cast<RefObject*>(p);
+ ++ref_obj->counter_;
+ }
+
+ // Releases a reference, potentially deleting the object; used by ref_ptr.
+ friend void ref_ptr_release_ref(T* p) {
+ delete_thunk<T, has_custom_deleter::value>::Delete(p);
+ }
+
+ // Deletes the object (precondition: ref count is zero).
+ friend void ref_ptr_destroy_ref(T* p) {
+ delete_thunk<T, has_custom_deleter::value>::Destroy(p);
+ }
+
+ // Deletes the object (precondition: ref count is zero).
+ static void DirectDestroy(void* p) {
+ ref_ptr_destroy_ref(reinterpret_cast<T*>(p));
+ }
+
+ // Adds a reference.
+ // ref_ptr should be used instead of this in most cases. This is required
+ // for when interoperating with marshaling APIs.
+ void AddReference() { ref_ptr_add_ref(static_cast<T*>(this)); }
+
+ // Releases a reference, potentially deleting the object.
+ // ref_ptr should be used instead of this in most cases. This is required
+ // for when interoperating with marshaling APIs.
+ void ReleaseReference() { ref_ptr_release_ref(static_cast<T*>(this)); }
+
+ // Returns the offset of the reference counter field from the start of the
+ // type T.
+ //
+ // This is generally unsafe to use and is here for support of the
+ // iree_vm_ref_t glue that allows RefObject-derived types to be round-tripped
+ // through the VM.
+ //
+ // For simple POD types or non-virtual classes we expect this to return 0.
+ // If the type has virtual methods (dtors/etc) then it should be 4 or 8
+ // (depending on pointer width). It may be other things, and instead of too
+ // much crazy magic we just rely on offsetof doing the right thing here.
+ static constexpr size_t offsetof_counter() { return offsetof(T, counter_); }
+
+ protected:
+ RefObject() { ref_ptr_add_ref(static_cast<T*>(this)); }
+ RefObject(const RefObject&) = default;
+ RefObject& operator=(const RefObject&) { return *this; }
+
+ // TODO(benvanik): replace this with just iree_vm_ref_object_t.
+ // That would allow us to remove a lot of these methods and reuse the C ones.
+ std::atomic<int32_t> counter_{0};
+};
+
+//===----------------------------------------------------------------------===//
+// iree::vm::ref<T> RAII equivalent of iree_vm_ref_t
+//===----------------------------------------------------------------------===//
+
+// Reference counted pointer container wrapping iree_vm_ref_t.
+// This is modeled on boost::instrusive_ptr in that it requires no
+// extra storage over the pointer type and should compile to almost
+// no additional code. It also allows us to round-trip object pointers
+// through regular pointers, which is critical when having to round-trip
+// them through JNI/etc where we can't use things like unique_ptr/shared_ptr.
+//
+// The ref wrapper calls the iree_vm_ref_* functions and uses the
+// iree_vm_ref_type_descriptor_t registered for the type T to manipulate the
+// reference counter and, when needed, destroy the object using
+// iree_vm_ref_destroy_t. Any iree_vm_ref_t can be used interchangably with
+// ref<T> when RAII is needed.
+//
+// Example:
+// ref<Foo> p1(new Foo()); // ref count 1
+// ref<Foo> p2(p1); // ref count 2
+// p1.reset(); // ref count 1
+// p2.reset(); // ref count 0, deleted
+//
+// When round-tripping the pointer through external APIs, use release():
+// ref<Foo> p1(new Foo()); // ref count 1
+// Foo* raw_p = p1.release(); // ref count 1
+// // pass to API
+// ref<Foo> p2(raw_p); // ref count 1 (don't add ref)
+// p2.reset(); // ref count 0, deleted
+//
+// See the boost intrusive_ptr docs for details of behavior:
+// http://www.boost.org/doc/libs/1_55_0/libs/smart_ptr/intrusive_ptr.html
+//
+// The retain_ref and assign_ref helpers can be used to make it easier to
+// declare and use ref types:
+// ref<Foo> p = assign_ref(new Foo()); // ref count 1
+// PassRefWithRetain(retain_ref(p));
+// PassRefWithMove(std::move(p)); // ala unique_ptr/shared_ptr
+//
+// ref manages the target objects in a thread-safe way, though you'll want
+// to take care with objects that may have pinned threads for deallocation. If
+// you release the last reference to an object on a thread other than what it
+// was expecting you're gonna have a bad time.
+//
+// Compatible only with types that implement the following methods:
+// ref_type_retain(T*)
+// ref_type_release(T*)
+// ref_type_descriptor<T>::get()
+//
+// If you get link errors pertaining to ref_type_descriptor then ensure that you
+// have included the header file containing the IREE_VM_DECLARE_TYPE_ADAPTERS
+// for the given type.
+//
+// TODO(benvanik): reconcile RefObject, iree_vm_ref_t, and this.
+template <typename T>
+class ref {
+ private:
+ typedef ref this_type;
+ typedef T* this_type::*unspecified_bool_type;
+
+ public:
+ IREE_ATTRIBUTE_ALWAYS_INLINE iree_vm_ref_type_t type() const noexcept {
+ return ref_type_descriptor<T>::get()->type;
+ }
+
+ IREE_ATTRIBUTE_ALWAYS_INLINE ref() noexcept
+ : ref_({
+ 0,
+ ref_type_descriptor<T>::get()->offsetof_counter,
+ ref_type_descriptor<T>::get()->type,
+ }) {}
+ IREE_ATTRIBUTE_ALWAYS_INLINE ref(std::nullptr_t) noexcept // NOLINT
+ : ref_({
+ 0,
+ ref_type_descriptor<T>::get()->offsetof_counter,
+ ref_type_descriptor<T>::get()->type,
+ }) {}
+ IREE_ATTRIBUTE_ALWAYS_INLINE ref(T* p) noexcept // NOLINT
+ : ref_({
+ p,
+ ref_type_descriptor<T>::get()->offsetof_counter,
+ ref_type_descriptor<T>::get()->type,
+ }) {}
+ IREE_ATTRIBUTE_ALWAYS_INLINE ~ref() noexcept { ref_type_release<T>(get()); }
+
+ // Don't use implicit ref copying; use retain_ref instead to make things more
+ // readable. We can't delete the ctor (or, I couldn't find a way not to)
+ // because the templated parameter packing magic needs it.
+ ref(const ref& rhs) noexcept : ref_(rhs.ref_) { ref_type_retain<T>(get()); }
+ ref& operator=(const ref&) noexcept = delete;
+
+ // Move support to transfer ownership from one ref to another.
+ ref(ref&& rhs) noexcept : ref_(rhs.ref_) { rhs.release(); }
+ ref& operator=(ref&& rhs) noexcept {
+ if (get() != rhs.get()) {
+ ref_type_release<T>(get());
+ ref_ = rhs.ref_;
+ rhs.release();
+ }
+ return *this;
+ }
+
+ // Move support from another compatible type.
+ template <typename U>
+ ref(ref<U>&& rhs) noexcept { // NOLINT
+ ref_.ptr = static_cast<T*>(rhs.release());
+ ref_.offsetof_counter = rhs.ref_.offsetof_counter;
+ ref_.type = rhs.ref_.type;
+ }
+ template <typename U>
+ ref& operator=(ref<U>&& rhs) noexcept {
+ if (get() != rhs.get()) {
+ ref_type_release<T>(get());
+ ref_.ptr = static_cast<T*>(rhs.release());
+ }
+ return *this;
+ }
+
+ // Resets the object to nullptr and decrements the reference count, possibly
+ // deleting it.
+ void reset() noexcept {
+ ref_type_release<T>(get());
+ ref_.ptr = nullptr;
+ }
+
+ // Releases a pointer.
+ // Returns the current pointer held by this object without having
+ // its reference count decremented and resets the ref to empty.
+ // Returns nullptr if the ref holds no value.
+ // To re-wrap in a ref use either ref<T>(value) or assign().
+ IREE_ATTRIBUTE_ALWAYS_INLINE T* release() noexcept {
+ T* p = get();
+ ref_.ptr = nullptr;
+ return p;
+ }
+
+ // Assigns a pointer.
+ // The pointer will be accepted by the ref and its reference count will
+ // not be incremented.
+ IREE_ATTRIBUTE_ALWAYS_INLINE void assign(T* value) noexcept {
+ reset();
+ ref_.ptr = value;
+ }
+
+ // Gets the pointer referenced by this instance.
+ // operator* and operator-> will assert() if there is no current object.
+ constexpr T* get() const noexcept { return reinterpret_cast<T*>(ref_.ptr); }
+ constexpr T& operator*() const noexcept { return *get(); }
+ constexpr T* operator->() const noexcept { return get(); }
+
+ // Returns a pointer to the inner pointer storage.
+ // This allows passing a pointer to the ref as an output argument to C-style
+ // creation functions.
+ constexpr T** operator&() noexcept { // NOLINT
+ return reinterpret_cast<T**>(&ref_.ptr);
+ }
+
+ // Support boolean expression evaluation ala unique_ptr/shared_ptr:
+ // https://en.cppreference.com/w/cpp/memory/shared_ptr/operator_bool
+ constexpr operator unspecified_bool_type() const noexcept { // NOLINT
+ return get() ? reinterpret_cast<unspecified_bool_type>(&this_type::ref_.ptr)
+ : nullptr;
+ }
+ // Supports unary expression evaluation.
+ constexpr bool operator!() const noexcept { return !get(); }
+
+ // Swap support.
+ void swap(ref& rhs) { std::swap(ref_.ptr, rhs.ref_.ptr); }
+
+ // Allows directly passing the ref to a C-API function for creation.
+ // Example:
+ // iree::vm::ref<my_type_t> value;
+ // my_type_create(..., &value);
+ constexpr operator iree_vm_ref_t*() const noexcept { // NOLINT
+ return &ref_;
+ }
+
+ private:
+ mutable iree_vm_ref_t ref_;
+};
+
+// Adds a reference to the given ref and returns the same ref.
+//
+// Usage:
+// ref<MyType> a = AcquireRefFromSomewhere();
+// ref<MyType> b = retain_ref(a); // ref count + 1
+// retain_ref(b); // ref count + 1
+template <typename T>
+inline ref<T> retain_ref(const ref<T>& value) {
+ ref_type_retain<T>(value.get());
+ return ref<T>(value.get());
+}
+
+// Adds a reference to the given raw pointer and returns it wrapped in a ref.
+//
+// Usage:
+// MyType* raw_ptr = AcquirePointerFromSomewhere();
+// ref<MyType> p = retain_ref(raw_ptr); // ref count + 1
+template <typename T>
+inline ref<T> retain_ref(T* value) {
+ ref_type_retain<T>(value);
+ return ref<T>(value);
+}
+
+// Assigns a raw pointer to a ref without adding a reference.
+//
+// Usage:
+// ref<MyType> p = assign_ref(new MyType()); // ref count untouched
+template <typename T>
+inline ref<T> assign_ref(T* value) {
+ return ref<T>(value);
+}
+
+template <class T, class U>
+inline bool operator==(ref<T> const& a, ref<U> const& b) {
+ return a.get() == b.get();
+}
+
+template <class T, class U>
+inline bool operator!=(ref<T> const& a, ref<U> const& b) {
+ return a.get() != b.get();
+}
+
+template <class T, class U>
+inline bool operator==(ref<T> const& a, U* b) {
+ return a.get() == b;
+}
+
+template <class T, class U>
+inline bool operator!=(ref<T> const& a, U* b) {
+ return a.get() != b;
+}
+
+template <class T, class U>
+inline bool operator==(T* a, ref<U> const& b) {
+ return a == b.get();
+}
+
+template <class T, class U>
+inline bool operator!=(T* a, ref<U> const& b) {
+ return a != b.get();
+}
+
+template <class T>
+inline bool operator<(ref<T> const& a, ref<T> const& b) {
+ return a.get() < b.get();
+}
+
+// Swaps the pointers of two refs.
+template <class T>
+void swap(ref<T>& lhs, ref<T>& rhs) {
+ lhs.swap(rhs);
+}
+
+//===----------------------------------------------------------------------===//
+// iree::opaque_ref utility for type-erased ref values
+//===----------------------------------------------------------------------===//
+
+// An opaque reference that does not make any assertions about the type of the
+// ref contained. This can be used to accept arbitrary ref objects that are then
+// dynamically handled based on type.
+class opaque_ref {
+ public:
+ opaque_ref() = default;
+ opaque_ref(const opaque_ref&) = delete;
+ opaque_ref& operator=(const opaque_ref&) = delete;
+ opaque_ref(opaque_ref&& rhs) noexcept {
+ iree_vm_ref_move(&rhs.value_, &value_);
+ }
+ opaque_ref& operator=(opaque_ref&& rhs) noexcept {
+ iree_vm_ref_move(&rhs.value_, &value_);
+ return *this;
+ }
+ ~opaque_ref() { iree_vm_ref_release(&value_); }
+
+ constexpr iree_vm_ref_t* get() const noexcept { return &value_; }
+ constexpr operator iree_vm_ref_t*() const noexcept { // NOLINT
+ return &value_;
+ }
+ constexpr bool operator!() const noexcept { return !value_.ptr; }
+
+ // Returns a pointer to the inner pointer storage.
+ // This allows passing a pointer to the ref as an output argument to C-style
+ // creation functions.
+ constexpr iree_vm_ref_t* operator&() noexcept { return &value_; } // NOLINT
+
+ private:
+ mutable iree_vm_ref_t value_ = {0};
+};
+
+} // namespace vm
+} // namespace iree
+
+#endif // IREE_VM_REF_CC_H_
diff --git a/runtime/src/iree/vm/ref_test.cc b/runtime/src/iree/vm/ref_test.cc
new file mode 100644
index 0000000..d709d18
--- /dev/null
+++ b/runtime/src/iree/vm/ref_test.cc
@@ -0,0 +1,452 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/ref.h"
+
+#include <cstddef>
+
+#include "iree/base/api.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+#include "iree/vm/ref_cc.h"
+
+namespace {
+
+class A : public iree::vm::RefObject<A> {
+ public:
+ static iree_vm_ref_type_t kTypeID;
+
+ int data() const { return data_; }
+
+ private:
+ int data_ = 1;
+};
+iree_vm_ref_type_t A::kTypeID = IREE_VM_REF_TYPE_NULL;
+
+class B : public iree::vm::RefObject<B> {
+ public:
+ static iree_vm_ref_type_t kTypeID;
+
+ int data() const { return data_; }
+
+ private:
+ int data_ = 2;
+};
+iree_vm_ref_type_t B::kTypeID = IREE_VM_REF_TYPE_NULL;
+
+struct ref_object_c_t {
+ iree_vm_ref_object_t ref_object = {1};
+ int data = 1;
+};
+
+template <typename T>
+static iree_vm_ref_t MakeRef(const char* type_name) {
+ // Safe to do multiple times, so we do it to ensure the tests don't care what
+ // order they run in/don't need to preregister types.
+ static iree_vm_ref_type_descriptor_t descriptor = {0};
+ if (descriptor.type == IREE_VM_REF_TYPE_NULL) {
+ descriptor.type_name = iree_make_cstring_view(type_name);
+ descriptor.offsetof_counter = T::offsetof_counter();
+ descriptor.destroy = T::DirectDestroy;
+ IREE_CHECK_OK(iree_vm_ref_register_type(&descriptor));
+ T::kTypeID = descriptor.type;
+ }
+
+ iree_vm_ref_t ref = {0};
+ IREE_CHECK_OK(iree_vm_ref_wrap_assign(new T(), T::kTypeID, &ref));
+ return ref;
+}
+
+static int32_t ReadCounter(iree_vm_ref_t* ref) {
+ return iree_atomic_load_int32(
+ (iree_atomic_ref_count_t*)(((uintptr_t)ref->ptr) + ref->offsetof_counter),
+ iree_memory_order_seq_cst);
+}
+
+static iree_vm_ref_type_t kCTypeID = IREE_VM_REF_TYPE_NULL;
+static void RegisterTypeC() {
+ static iree_vm_ref_type_descriptor_t descriptor = {0};
+ if (descriptor.type == IREE_VM_REF_TYPE_NULL) {
+ descriptor.type_name = iree_make_cstring_view("CType");
+ descriptor.offsetof_counter = offsetof(ref_object_c_t, ref_object.counter);
+ descriptor.destroy =
+ +[](void* ptr) { delete reinterpret_cast<ref_object_c_t*>(ptr); };
+ IREE_CHECK_OK(iree_vm_ref_register_type(&descriptor));
+ kCTypeID = descriptor.type;
+ }
+}
+
+// Tests type registration and lookup.
+TEST(VMRefTest, TypeRegistration) {
+ RegisterTypeC();
+ ASSERT_NE(nullptr, iree_vm_ref_lookup_registered_type(
+ iree_make_cstring_view("CType")));
+ ASSERT_EQ(nullptr, iree_vm_ref_lookup_registered_type(
+ iree_make_cstring_view("asodjfaoisdjfaoisdfj")));
+}
+
+// Tests wrapping a simple C struct.
+TEST(VMRefTest, WrappingCStruct) {
+ RegisterTypeC();
+ iree_vm_ref_t ref = {0};
+ IREE_EXPECT_OK(iree_vm_ref_wrap_assign(new ref_object_c_t(), kCTypeID, &ref));
+ EXPECT_EQ(1, ReadCounter(&ref));
+ iree_vm_ref_release(&ref);
+}
+
+// Tests wrapping a C++ RefObject with a vtable.
+TEST(VMRefTest, WrappingSubclassedRefObject) {
+ struct BaseType : public iree::vm::RefObject<BaseType> {
+ virtual ~BaseType() = default;
+ virtual int DoSomething() = 0;
+ };
+ static int allocated_derived_types = 0;
+ struct DerivedType : public BaseType {
+ DerivedType() { ++allocated_derived_types; }
+ ~DerivedType() override { --allocated_derived_types; }
+ int DoSomething() override { return 123 + allocated_derived_types; }
+ };
+
+ static iree_vm_ref_type_descriptor_t descriptor;
+ descriptor.type_name = iree_make_cstring_view("BaseType");
+ descriptor.offsetof_counter = BaseType::offsetof_counter();
+ descriptor.destroy = BaseType::DirectDestroy;
+ IREE_ASSERT_OK(iree_vm_ref_register_type(&descriptor));
+
+ allocated_derived_types = 0;
+
+ iree_vm_ref_t ref = {0};
+ IREE_EXPECT_OK(
+ iree_vm_ref_wrap_assign(new DerivedType(), descriptor.type, &ref));
+ EXPECT_EQ(1, ReadCounter(&ref));
+ EXPECT_EQ(1, allocated_derived_types);
+
+ EXPECT_EQ(123 + 1, reinterpret_cast<BaseType*>(ref.ptr)->DoSomething());
+
+ iree_vm_ref_release(&ref);
+ EXPECT_EQ(0, allocated_derived_types);
+}
+
+// Tests that wrapping a type that has not been registered fails.
+TEST(VMRefTest, WrappingRequriesTypeRegistration) {
+ iree_vm_ref_t ref = {0};
+ int dummy = 0;
+ iree_status_t status = iree_vm_ref_wrap_assign(
+ &dummy, static_cast<iree_vm_ref_type_t>(1234), &ref);
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_INVALID_ARGUMENT, status);
+ iree_status_free(status);
+}
+
+// Tests that wrapping releases any existing ref in out_ref.
+TEST(VMRefTest, WrappingReleasesExisting) {
+ RegisterTypeC();
+ iree_vm_ref_t ref = {0};
+ iree_vm_ref_wrap_assign(new ref_object_c_t(), kCTypeID, &ref);
+ EXPECT_EQ(1, ReadCounter(&ref));
+ iree_vm_ref_release(&ref);
+}
+
+// Checking null refs is fine.
+TEST(VMRefTest, CheckNull) {
+ iree_vm_ref_t null_ref = {0};
+ IREE_EXPECT_OK(iree_vm_ref_check(null_ref, IREE_VM_REF_TYPE_NULL));
+ iree_status_t status =
+ iree_vm_ref_check(null_ref, static_cast<iree_vm_ref_type_t>(1234));
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_INVALID_ARGUMENT, status);
+ iree_status_free(status);
+}
+
+// Tests type checks.
+TEST(VMRefTest, Check) {
+ iree_vm_ref_t a_ref = MakeRef<A>("AType");
+ IREE_EXPECT_OK(iree_vm_ref_check(a_ref, A::kTypeID));
+ iree_status_t status = iree_vm_ref_check(a_ref, B::kTypeID);
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_INVALID_ARGUMENT, status);
+ iree_status_free(status);
+ iree_vm_ref_release(&a_ref);
+}
+
+// Tests retaining a null ref does nothing.
+TEST(VMRefTest, RetainNull) {
+ iree_vm_ref_t null_ref_0 = {0};
+ iree_vm_ref_t null_ref_1 = {0};
+ iree_vm_ref_retain(&null_ref_0, &null_ref_1);
+}
+
+// Tests that retaining into itself is a no-op.
+TEST(VMRefTest, RetainIntoSelf) {
+ iree_vm_ref_t a_ref = MakeRef<A>("AType");
+ EXPECT_EQ(1, ReadCounter(&a_ref));
+ iree_vm_ref_retain(&a_ref, &a_ref);
+ EXPECT_EQ(1, ReadCounter(&a_ref));
+ iree_vm_ref_release(&a_ref);
+}
+
+// Tests that retaining into out_ref releases the existing contents.
+TEST(VMRefTest, RetainReleasesExisting) {
+ iree_vm_ref_t a_ref = MakeRef<A>("AType");
+ iree_vm_ref_t b_ref = MakeRef<B>("BType");
+ iree_vm_ref_retain(&a_ref, &b_ref);
+ EXPECT_EQ(1, iree_vm_ref_equal(&a_ref, &b_ref));
+ EXPECT_EQ(2, ReadCounter(&a_ref));
+ iree_vm_ref_release(&a_ref);
+ iree_vm_ref_release(&b_ref);
+}
+
+// Tests that null refs are always fine.
+TEST(VMRefTest, RetainCheckedNull) {
+ iree_vm_ref_t null_ref_0 = {0};
+ iree_vm_ref_t null_ref_1 = {0};
+ IREE_EXPECT_OK(
+ iree_vm_ref_retain_checked(&null_ref_0, A::kTypeID, &null_ref_1));
+}
+
+// Tests that types are verified and retains fail if types don't match.
+TEST(VMRefTest, RetainChecked) {
+ iree_vm_ref_t a_ref_0 = MakeRef<A>("AType");
+ iree_vm_ref_t a_ref_1 = {0};
+ IREE_EXPECT_OK(iree_vm_ref_retain_checked(&a_ref_0, A::kTypeID, &a_ref_1));
+ iree_vm_ref_release(&a_ref_0);
+ iree_vm_ref_release(&a_ref_1);
+}
+
+// Tests that working with null refs is fine.
+TEST(VMRefTest, RetainOrMoveNull) {
+ iree_vm_ref_t null_ref_0 = {0};
+ iree_vm_ref_t null_ref_1 = {0};
+ iree_vm_ref_retain_or_move(/*is_move=*/0, &null_ref_0, &null_ref_1);
+ iree_vm_ref_retain_or_move(/*is_move=*/1, &null_ref_0, &null_ref_1);
+}
+
+// Tests that is_move=false increments the ref count.
+TEST(VMRefTest, RetainOrMoveRetaining) {
+ iree_vm_ref_t a_ref_0 = MakeRef<A>("AType");
+ iree_vm_ref_t a_ref_1 = {0};
+ iree_vm_ref_retain_or_move(/*is_move=*/0, &a_ref_0, &a_ref_1);
+ EXPECT_EQ(1, iree_vm_ref_equal(&a_ref_0, &a_ref_1));
+ EXPECT_EQ(2, ReadCounter(&a_ref_0));
+ iree_vm_ref_release(&a_ref_0);
+ iree_vm_ref_release(&a_ref_1);
+}
+
+// Tests that is_move=true does not increment the ref count.
+TEST(VMRefTest, RetainOrMoveMoving) {
+ iree_vm_ref_t a_ref_0 = MakeRef<A>("AType");
+ iree_vm_ref_t a_ref_1 = {0};
+ iree_vm_ref_retain_or_move(/*is_move=*/1, &a_ref_0, &a_ref_1);
+ IREE_EXPECT_OK(iree_vm_ref_check(a_ref_0, IREE_VM_REF_TYPE_NULL));
+ iree_vm_ref_release(&a_ref_1);
+}
+
+// Tests that retaining into itself just increments the ref count.
+TEST(VMRefTest, RetainOrMoveRetainingIntoSelf) {
+ iree_vm_ref_t a_ref = MakeRef<A>("AType");
+ EXPECT_EQ(1, ReadCounter(&a_ref));
+ iree_vm_ref_retain_or_move(/*is_move=*/0, &a_ref, &a_ref);
+ EXPECT_EQ(1, ReadCounter(&a_ref));
+ iree_vm_ref_release(&a_ref);
+}
+
+// Tests that moving into itself is a no-op.
+TEST(VMRefTest, RetainOrMoveMovingIntoSelf) {
+ iree_vm_ref_t a_ref = MakeRef<A>("AType");
+ iree_vm_ref_retain_or_move(/*is_move=*/1, &a_ref, &a_ref);
+ IREE_EXPECT_OK(iree_vm_ref_check(a_ref, A::kTypeID));
+ iree_vm_ref_release(&a_ref);
+}
+
+// Tests that retaining into out_ref releases the existing contents.
+TEST(VMRefTest, RetainOrMoveRetainingReleasesExisting) {
+ iree_vm_ref_t a_ref = MakeRef<A>("AType");
+ iree_vm_ref_t b_ref = MakeRef<B>("BType");
+ iree_vm_ref_retain_or_move(/*is_move=*/0, &a_ref, &b_ref);
+ EXPECT_EQ(1, iree_vm_ref_equal(&a_ref, &b_ref));
+ EXPECT_EQ(2, ReadCounter(&a_ref));
+ iree_vm_ref_release(&a_ref);
+ iree_vm_ref_release(&b_ref);
+}
+
+// Tests that moving into out_ref releases the existing contents.
+TEST(VMRefTest, RetainOrMoveMovingReleasesExisting) {
+ iree_vm_ref_t a_ref = MakeRef<A>("AType");
+ iree_vm_ref_t b_ref = MakeRef<B>("BType");
+ iree_vm_ref_retain_or_move(/*is_move=*/1, &a_ref, &b_ref);
+ EXPECT_EQ(0, iree_vm_ref_equal(&a_ref, &b_ref));
+ EXPECT_EQ(1, ReadCounter(&b_ref));
+ iree_vm_ref_release(&b_ref);
+}
+
+// Tests that null refs are always fine.
+TEST(VMRefTest, RetainOrMoveCheckedNull) {
+ iree_vm_ref_t null_ref_0 = {0};
+ iree_vm_ref_t null_ref_1 = {0};
+ IREE_EXPECT_OK(iree_vm_ref_retain_or_move_checked(
+ /*is_move=*/0, &null_ref_0, A::kTypeID, &null_ref_1));
+ IREE_EXPECT_OK(iree_vm_ref_retain_or_move_checked(
+ /*is_move=*/1, &null_ref_0, A::kTypeID, &null_ref_1));
+}
+
+// Tests that retains/moves work when types match.
+TEST(VMRefTest, RetainOrMoveCheckedMatch) {
+ // Retain.
+ iree_vm_ref_t a_ref_0 = MakeRef<A>("AType");
+ iree_vm_ref_t a_ref_1 = {0};
+ IREE_EXPECT_OK(iree_vm_ref_retain_or_move_checked(
+ /*is_move=*/0, &a_ref_0, A::kTypeID, &a_ref_1));
+ EXPECT_EQ(1, iree_vm_ref_equal(&a_ref_0, &a_ref_1));
+ EXPECT_EQ(2, ReadCounter(&a_ref_0));
+ iree_vm_ref_release(&a_ref_0);
+ iree_vm_ref_release(&a_ref_1);
+
+ // Move.
+ iree_vm_ref_t b_ref_0 = MakeRef<B>("BType");
+ iree_vm_ref_t b_ref_1 = {0};
+ IREE_EXPECT_OK(iree_vm_ref_retain_or_move_checked(
+ /*is_move=*/1, &b_ref_0, B::kTypeID, &b_ref_1));
+ EXPECT_EQ(0, iree_vm_ref_equal(&b_ref_0, &b_ref_1));
+ EXPECT_EQ(1, ReadCounter(&b_ref_1));
+ iree_vm_ref_release(&b_ref_1);
+}
+
+// Tests that types are verified and retains/moves fail if types don't match.
+TEST(VMRefTest, RetainOrMoveCheckedMismatch) {
+ // Retain.
+ iree_vm_ref_t a_ref_0 = MakeRef<A>("AType");
+ iree_vm_ref_t a_ref_1 = {0};
+ iree_status_t status = iree_vm_ref_retain_or_move_checked(
+ /*is_move=*/0, &a_ref_0, B::kTypeID, &a_ref_1);
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_INVALID_ARGUMENT, status);
+ iree_status_free(status);
+ EXPECT_EQ(0, iree_vm_ref_equal(&a_ref_0, &a_ref_1));
+ EXPECT_EQ(1, ReadCounter(&a_ref_0));
+ iree_vm_ref_release(&a_ref_0);
+
+ // Move.
+ iree_vm_ref_t b_ref_0 = MakeRef<B>("BType");
+ iree_vm_ref_t b_ref_1 = {0};
+ status = iree_vm_ref_retain_or_move_checked(
+ /*is_move=*/1, &b_ref_0, A::kTypeID, &b_ref_1);
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_INVALID_ARGUMENT, status);
+ iree_status_free(status);
+ EXPECT_EQ(1, ReadCounter(&b_ref_0));
+ iree_vm_ref_release(&b_ref_0);
+}
+
+// Tests that existing references are released when being overwritten.
+TEST(VMRefTest, RetainOrMoveCheckedReleasesExistingNull) {
+ iree_vm_ref_t null_ref = {0};
+ iree_vm_ref_t a_ref = MakeRef<A>("AType");
+ IREE_EXPECT_OK(iree_vm_ref_retain_or_move_checked(
+ /*is_move=*/0, &null_ref, A::kTypeID, &a_ref));
+}
+
+// Tests that existing references are released when being overwritten.
+TEST(VMRefTest, RetainOrMoveCheckedReleasesExisting) {
+ iree_vm_ref_t a_ref_0 = MakeRef<A>("AType");
+ iree_vm_ref_t a_ref_1 = MakeRef<A>("AType");
+ IREE_EXPECT_OK(iree_vm_ref_retain_or_move_checked(
+ /*is_move=*/1, &a_ref_0, A::kTypeID, &a_ref_1));
+ iree_vm_ref_release(&a_ref_1);
+}
+
+// Checks that assigning null refs is fine.
+TEST(VMRefTest, AssignNull) {
+ iree_vm_ref_t null_ref_0 = {0};
+ iree_vm_ref_t null_ref_1 = {0};
+ iree_vm_ref_assign(&null_ref_0, &null_ref_1);
+}
+
+// Tests that assigning does not reset the source ref nor inc the ref count.
+TEST(VMRefTest, Assign) {
+ iree_vm_ref_t a_ref_0 = MakeRef<A>("AType");
+ iree_vm_ref_t a_ref_1 = {0};
+ iree_vm_ref_assign(&a_ref_0, &a_ref_1);
+ EXPECT_EQ(1, iree_vm_ref_equal(&a_ref_0, &a_ref_1));
+ EXPECT_EQ(1, ReadCounter(&a_ref_0));
+ iree_vm_ref_release(&a_ref_0);
+}
+
+// Tests that assigning into itself is a no-op.
+TEST(VMRefTest, AssignSelf) {
+ iree_vm_ref_t a_ref = MakeRef<A>("AType");
+ iree_vm_ref_assign(&a_ref, &a_ref);
+ EXPECT_EQ(1, ReadCounter(&a_ref));
+ iree_vm_ref_release(&a_ref);
+}
+
+// Tests that assigning into out_ref releases the existing contents.
+TEST(VMRefTest, AssignReleasesExisting) {
+ iree_vm_ref_t a_ref = MakeRef<A>("AType");
+ iree_vm_ref_t b_ref = MakeRef<B>("BType");
+ iree_vm_ref_assign(&a_ref, &b_ref);
+ EXPECT_EQ(1, iree_vm_ref_equal(&a_ref, &b_ref));
+ EXPECT_EQ(1, ReadCounter(&a_ref));
+ iree_vm_ref_release(&a_ref);
+ // NOTE: do not release b - it was just assigned!
+}
+
+// Checks that moving null refs is fine.
+TEST(VMRefTest, MovingNull) {
+ iree_vm_ref_t null_ref_0 = {0};
+ iree_vm_ref_t null_ref_1 = {0};
+ iree_vm_ref_move(&null_ref_0, &null_ref_1);
+}
+
+// Tests that moving resets the source ref.
+TEST(VMRefTest, MovingResetsSource) {
+ iree_vm_ref_t a_ref_0 = MakeRef<A>("AType");
+ iree_vm_ref_t a_ref_1 = {0};
+ iree_vm_ref_move(&a_ref_0, &a_ref_1);
+ IREE_EXPECT_OK(iree_vm_ref_check(a_ref_0, IREE_VM_REF_TYPE_NULL));
+ iree_vm_ref_release(&a_ref_1);
+}
+
+// Tests that moving into itself is a no-op.
+TEST(VMRefTest, MovingIntoSelf) {
+ iree_vm_ref_t a_ref = MakeRef<A>("AType");
+ iree_vm_ref_move(&a_ref, &a_ref);
+ IREE_EXPECT_OK(iree_vm_ref_check(a_ref, A::kTypeID));
+ iree_vm_ref_release(&a_ref);
+}
+
+// Tests that moving into out_ref releases the existing contents.
+TEST(VMRefTest, MovingReleasesExisting) {
+ iree_vm_ref_t a_ref_0 = MakeRef<A>("AType");
+ iree_vm_ref_t a_ref_1 = MakeRef<A>("AType");
+ iree_vm_ref_move(&a_ref_0, &a_ref_1);
+ iree_vm_ref_release(&a_ref_1);
+}
+
+// Null references should always be equal.
+TEST(VMRefTest, EqualityNull) {
+ iree_vm_ref_t null_ref_0 = {0};
+ iree_vm_ref_t null_ref_1 = {0};
+ EXPECT_EQ(1, iree_vm_ref_equal(&null_ref_0, &null_ref_0));
+ EXPECT_EQ(1, iree_vm_ref_equal(&null_ref_0, &null_ref_1));
+ EXPECT_EQ(1, iree_vm_ref_equal(&null_ref_1, &null_ref_0));
+}
+
+// Tests comparing with self and against null.
+TEST(VMRefTest, EqualitySelfOrNull) {
+ iree_vm_ref_t a_ref = MakeRef<A>("AType");
+ iree_vm_ref_t null_ref = {0};
+ EXPECT_EQ(1, iree_vm_ref_equal(&a_ref, &a_ref));
+ EXPECT_EQ(0, iree_vm_ref_equal(&a_ref, &null_ref));
+ EXPECT_EQ(0, iree_vm_ref_equal(&null_ref, &a_ref));
+ iree_vm_ref_release(&a_ref);
+}
+
+// Tests comparing between different types.
+TEST(VMRefTest, EqualityDifferentTypes) {
+ iree_vm_ref_t a_ref = MakeRef<A>("AType");
+ iree_vm_ref_t b_ref = MakeRef<B>("BType");
+ EXPECT_EQ(0, iree_vm_ref_equal(&a_ref, &b_ref));
+ EXPECT_EQ(0, iree_vm_ref_equal(&b_ref, &a_ref));
+ iree_vm_ref_release(&b_ref);
+ iree_vm_ref_release(&a_ref);
+}
+
+} // namespace
diff --git a/runtime/src/iree/vm/shims.c b/runtime/src/iree/vm/shims.c
new file mode 100644
index 0000000..7d79fa1
--- /dev/null
+++ b/runtime/src/iree/vm/shims.c
@@ -0,0 +1,52 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/shims.h"
+
+IREE_VM_ABI_DEFINE_SHIM(irii, v);
+IREE_VM_ABI_DEFINE_SHIM(r, i);
+IREE_VM_ABI_DEFINE_SHIM(r, ii);
+IREE_VM_ABI_DEFINE_SHIM(r, iii);
+IREE_VM_ABI_DEFINE_SHIM(r, iiii);
+IREE_VM_ABI_DEFINE_SHIM(r, r);
+IREE_VM_ABI_DEFINE_SHIM(r, v);
+IREE_VM_ABI_DEFINE_SHIM(rCiD, i);
+IREE_VM_ABI_DEFINE_SHIM(rCrD, v);
+IREE_VM_ABI_DEFINE_SHIM(ri, i);
+IREE_VM_ABI_DEFINE_SHIM(ri, f);
+IREE_VM_ABI_DEFINE_SHIM(ri, r);
+IREE_VM_ABI_DEFINE_SHIM(ri, v);
+IREE_VM_ABI_DEFINE_SHIM(riCiD, r);
+IREE_VM_ABI_DEFINE_SHIM(riiCiD, r);
+IREE_VM_ABI_DEFINE_SHIM(riCiiD, r);
+IREE_VM_ABI_DEFINE_SHIM(riCrD, r);
+IREE_VM_ABI_DEFINE_SHIM(rii, i);
+IREE_VM_ABI_DEFINE_SHIM(rii, r);
+IREE_VM_ABI_DEFINE_SHIM(rii, v);
+IREE_VM_ABI_DEFINE_SHIM(rif, v);
+IREE_VM_ABI_DEFINE_SHIM(riii, r);
+IREE_VM_ABI_DEFINE_SHIM(riii, v);
+IREE_VM_ABI_DEFINE_SHIM(riirii, r);
+IREE_VM_ABI_DEFINE_SHIM(riiirii, r);
+IREE_VM_ABI_DEFINE_SHIM(rrrrCrD, r);
+IREE_VM_ABI_DEFINE_SHIM(ririi, v);
+IREE_VM_ABI_DEFINE_SHIM(rr, i);
+IREE_VM_ABI_DEFINE_SHIM(rr, r);
+IREE_VM_ABI_DEFINE_SHIM(rr, v);
+IREE_VM_ABI_DEFINE_SHIM(rr, ii);
+IREE_VM_ABI_DEFINE_SHIM(rrr, ii);
+IREE_VM_ABI_DEFINE_SHIM(rrCiriiD, r);
+IREE_VM_ABI_DEFINE_SHIM(rriCiD, v);
+IREE_VM_ABI_DEFINE_SHIM(rriiCiD, v);
+IREE_VM_ABI_DEFINE_SHIM(rriCiriiD, v);
+IREE_VM_ABI_DEFINE_SHIM(rriiii, v);
+IREE_VM_ABI_DEFINE_SHIM(rrirCiD, v);
+IREE_VM_ABI_DEFINE_SHIM(rriri, v);
+IREE_VM_ABI_DEFINE_SHIM(rririi, v);
+IREE_VM_ABI_DEFINE_SHIM(rrriii, v);
+IREE_VM_ABI_DEFINE_SHIM(v, i);
+IREE_VM_ABI_DEFINE_SHIM(v, r);
+IREE_VM_ABI_DEFINE_SHIM(v, v);
diff --git a/runtime/src/iree/vm/shims.h b/runtime/src/iree/vm/shims.h
new file mode 100644
index 0000000..0524452
--- /dev/null
+++ b/runtime/src/iree/vm/shims.h
@@ -0,0 +1,453 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_SHIMS_H_
+#define IREE_VM_SHIMS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "iree/base/api.h"
+#include "iree/base/attributes.h"
+#include "iree/base/target_platform.h"
+#include "iree/vm/module.h"
+#include "iree/vm/ref.h"
+#include "iree/vm/stack.h"
+#include "iree/vm/value.h"
+
+//===----------------------------------------------------------------------===//
+// Argument/result struct utilities
+//===----------------------------------------------------------------------===//
+
+#define IREE_VM_ABI_TYPE_NAME(types) iree_vm_abi_##types##_t
+
+#define IREE_VM_ABI_FIXED_STRUCT(types, body) \
+ IREE_VM_ABI_FIXED_STRUCT_IMPL(types, IREE_VM_ABI_TYPE_NAME(types), body)
+
+#define IREE_VM_ABI_VLA_STRUCT(types, vla_count, vla_field, body) \
+ IREE_VM_ABI_VLA_STRUCT_IMPL(types, vla_count, vla_field, \
+ IREE_VM_ABI_TYPE_NAME(types), body)
+
+#define IREE_VM_ABI_FIXED_STRUCT_IMPL(types, struct_type, body) \
+ typedef struct iree_vm_abi_##types##_t body IREE_ATTRIBUTE_PACKED \
+ struct_type; \
+ static inline struct_type* iree_vm_abi_##types##_checked_deref( \
+ iree_byte_span_t buffer) { \
+ return IREE_LIKELY(buffer.data_length == sizeof(struct_type)) \
+ ? (struct_type*)buffer.data \
+ : NULL; \
+ } \
+ static inline void iree_vm_abi_##types##_reset(struct_type* value) { \
+ memset(value, 0, sizeof(struct_type)); \
+ }
+
+#define IREE_VM_ABI_FIELD_SIZE(type, member) sizeof(((type*)NULL)->member)
+#define IREE_VM_ABI_VLA_STRUCT_IMPL(types, vla_count, vla_field, struct_type, \
+ body) \
+ typedef struct iree_vm_abi_##types##_t body IREE_ATTRIBUTE_PACKED \
+ struct_type; \
+ static inline struct_type* iree_vm_abi_##types##_checked_deref( \
+ iree_byte_span_t buffer) { \
+ return IREE_LIKELY(buffer.data_length >= sizeof(struct_type)) && \
+ IREE_LIKELY( \
+ buffer.data_length == \
+ sizeof(struct_type) + \
+ ((const struct_type*)buffer.data)->vla_count * \
+ IREE_VM_ABI_FIELD_SIZE(struct_type, \
+ vla_field[0])) \
+ ? (struct_type*)buffer.data \
+ : NULL; \
+ }
+
+//===----------------------------------------------------------------------===//
+// Shim function declaration/definition and accessor utilities
+//===----------------------------------------------------------------------===//
+
+typedef iree_status_t(IREE_API_PTR* iree_vm_native_function_target2_t)(
+ iree_vm_stack_t* IREE_RESTRICT stack, void* IREE_RESTRICT module,
+ void* IREE_RESTRICT module_state, const void* IREE_RESTRICT args,
+ void* IREE_RESTRICT rets);
+
+#define IREE_VM_ABI_DECLARE_SHIM(arg_types, ret_types) \
+ iree_status_t iree_vm_shim_##arg_types##_##ret_types( \
+ iree_vm_stack_t* IREE_RESTRICT stack, \
+ const iree_vm_function_call_t* IREE_RESTRICT call, \
+ iree_vm_native_function_target2_t target_fn, void* IREE_RESTRICT module, \
+ void* IREE_RESTRICT module_state, \
+ iree_vm_execution_result_t* IREE_RESTRICT out_result);
+
+#define IREE_VM_ABI_DEFINE_SHIM(arg_types, ret_types) \
+ iree_status_t iree_vm_shim_##arg_types##_##ret_types( \
+ iree_vm_stack_t* IREE_RESTRICT stack, \
+ const iree_vm_function_call_t* IREE_RESTRICT call, \
+ iree_vm_native_function_target2_t target_fn, void* IREE_RESTRICT module, \
+ void* IREE_RESTRICT module_state, \
+ iree_vm_execution_result_t* IREE_RESTRICT out_result) { \
+ const IREE_VM_ABI_TYPE_NAME(arg_types)* args = \
+ iree_vm_abi_##arg_types##_checked_deref(call->arguments); \
+ IREE_VM_ABI_TYPE_NAME(ret_types)* rets = \
+ iree_vm_abi_##ret_types##_checked_deref(call->results); \
+ if (IREE_UNLIKELY(!args || !rets)) { \
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, \
+ "argument/result signature mismatch"); \
+ } \
+ iree_vm_abi_##ret_types##_reset(rets); \
+ return target_fn(stack, module, module_state, args, rets); \
+ }
+
+#define IREE_VM_ABI_EXPORT(function_name, module_state, arg_types, ret_types) \
+ static iree_status_t function_name( \
+ iree_vm_stack_t* IREE_RESTRICT stack, void* IREE_RESTRICT module, \
+ module_state* IREE_RESTRICT state, \
+ IREE_VM_ABI_TYPE_NAME(arg_types) * IREE_RESTRICT args, \
+ IREE_VM_ABI_TYPE_NAME(ret_types) * IREE_RESTRICT rets)
+
+// TODO(benvanik): special case when source type and target type match.
+#define IREE_VM_ABI_VLA_STACK_CAST(args, vla_count, vla_field, target_type, \
+ max_count, out_count, out_ptrs) \
+ *(out_count) = (args)->vla_count; \
+ if (IREE_UNLIKELY((args)->vla_count > (max_count))) { \
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE, "count %u > %u", \
+ (args)->vla_count, (uint32_t)(max_count)); \
+ } \
+ *(out_ptrs) = \
+ (target_type*)iree_alloca((args)->vla_count * sizeof(target_type)); \
+ for (iree_host_size_t i = 0; i < (args)->vla_count; ++i) { \
+ (*(out_ptrs))[i] = (target_type)((args)->vla_field[i].i0); \
+ }
+
+#define IREE_VM_ABI_VLA_STACK_DEREF(args, vla_count, vla_field, ref_type, \
+ max_count, out_count, out_ptrs) \
+ *(out_count) = (args)->vla_count; \
+ if (IREE_UNLIKELY((args)->vla_count > (max_count))) { \
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE, \
+ "count %u of " #ref_type " > %u", \
+ (args)->vla_count, (uint32_t)(max_count)); \
+ } \
+ *(out_ptrs) = \
+ (ref_type##_t**)iree_alloca((args)->vla_count * sizeof(ref_type##_t*)); \
+ for (iree_host_size_t i = 0; i < (args)->vla_count; ++i) { \
+ IREE_RETURN_IF_ERROR( \
+ ref_type##_check_deref((args)->vla_field[i].r0, &(*(out_ptrs))[i])); \
+ }
+
+#define IREE_VM_ABI_VLA_HEAP_DEREF(args, vla_count, vla_field, ref_type, \
+ host_allocator, out_count, out_ptrs) \
+ *(out_count) = (args)->vla_count; \
+ IREE_RETURN_IF_ERROR(iree_alloca((args)->vla_count * sizeof(ref_type##_t*)); \
+ for (iree_host_size_t i = 0; i < (args)->vla_count; ++i) { \
+ IREE_RETURN_IF_ERROR( \
+ ref_type##_check_deref((args)->vla_field[i].r0, &(*(out_ptrs))[i])); \
+ }
+
+//===----------------------------------------------------------------------===//
+// Structures used for arguments and results.
+//===----------------------------------------------------------------------===//
+
+#if defined(IREE_COMPILER_MSVC)
+#pragma pack(push, 1)
+#endif // IREE_COMPILER_MSVC
+
+// Special case for void (empty args/rets) as C structs can't have a 0 length.
+typedef struct iree_vm_abi_v_t {
+ int unused;
+} iree_vm_abi_v_t;
+static inline iree_vm_abi_v_t* iree_vm_abi_v_checked_deref(
+ iree_byte_span_t buffer) {
+ return (iree_vm_abi_v_t*)buffer.data;
+}
+static inline void iree_vm_abi_v_reset(iree_vm_abi_v_t* value) {}
+
+IREE_VM_ABI_FIXED_STRUCT(i, { int32_t i0; });
+
+IREE_VM_ABI_FIXED_STRUCT(f, { float f0; });
+
+IREE_VM_ABI_FIXED_STRUCT(ii, {
+ int32_t i0;
+ int32_t i1;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(iii, {
+ int32_t i0;
+ int32_t i1;
+ int32_t i2;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(iiii, {
+ int32_t i0;
+ int32_t i1;
+ int32_t i2;
+ int32_t i3;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(irii, {
+ int32_t i0;
+ iree_vm_ref_t r1;
+ int32_t i2;
+ int32_t i3;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(r, { iree_vm_ref_t r0; });
+
+IREE_VM_ABI_FIXED_STRUCT(rr, {
+ iree_vm_ref_t r0;
+ iree_vm_ref_t r1;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(rrr, {
+ iree_vm_ref_t r0;
+ iree_vm_ref_t r1;
+ iree_vm_ref_t r2;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(ri, {
+ iree_vm_ref_t r0;
+ int32_t i1;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(ririi, {
+ iree_vm_ref_t r0;
+ int32_t i1;
+ iree_vm_ref_t r2;
+ int32_t i3;
+ int32_t i4;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(rii, {
+ iree_vm_ref_t r0;
+ int32_t i1;
+ int32_t i2;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(rif, {
+ iree_vm_ref_t r0;
+ int32_t i1;
+ float f2;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(riii, {
+ iree_vm_ref_t r0;
+ int32_t i1;
+ int32_t i2;
+ int32_t i3;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(riirii, {
+ iree_vm_ref_t r0;
+ int32_t i1;
+ int32_t i2;
+ iree_vm_ref_t r3;
+ int32_t i4;
+ int32_t i5;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(riiirii, {
+ iree_vm_ref_t r0;
+ int32_t i1;
+ int32_t i2;
+ int32_t i3;
+ iree_vm_ref_t r4;
+ int32_t i5;
+ int32_t i6;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(rriiii, {
+ iree_vm_ref_t r0;
+ iree_vm_ref_t r1;
+ int32_t i2;
+ int32_t i3;
+ int32_t i4;
+ int32_t i5;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(rriri, {
+ iree_vm_ref_t r0;
+ iree_vm_ref_t r1;
+ int32_t i2;
+ iree_vm_ref_t r3;
+ int32_t i4;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(rririi, {
+ iree_vm_ref_t r0;
+ iree_vm_ref_t r1;
+ int32_t i2;
+ iree_vm_ref_t r3;
+ int32_t i4;
+ int32_t i5;
+});
+
+IREE_VM_ABI_FIXED_STRUCT(rrriii, {
+ iree_vm_ref_t r0;
+ iree_vm_ref_t r1;
+ iree_vm_ref_t r2;
+ int32_t i3;
+ int32_t i4;
+ int32_t i5;
+});
+
+IREE_VM_ABI_VLA_STRUCT(rCiD, a1_count, a1, {
+ iree_vm_ref_t r0;
+ iree_vm_size_t a1_count;
+ iree_vm_abi_i_t a1[0];
+});
+
+IREE_VM_ABI_VLA_STRUCT(rCrD, a1_count, a1, {
+ iree_vm_ref_t r0;
+ iree_vm_size_t a1_count;
+ iree_vm_abi_r_t a1[0];
+});
+
+IREE_VM_ABI_VLA_STRUCT(riCiD, a2_count, a2, {
+ iree_vm_ref_t r0;
+ int32_t i1;
+ iree_vm_size_t a2_count;
+ iree_vm_abi_i_t a2[0];
+});
+
+IREE_VM_ABI_VLA_STRUCT(riiCiD, a3_count, a3, {
+ iree_vm_ref_t r0;
+ int32_t i1;
+ int32_t i2;
+ iree_vm_size_t a3_count;
+ iree_vm_abi_i_t a3[0];
+});
+
+IREE_VM_ABI_VLA_STRUCT(rriiCiD, a4_count, a4, {
+ iree_vm_ref_t r0;
+ iree_vm_ref_t r1;
+ int32_t i2;
+ int32_t i3;
+ iree_vm_size_t a4_count;
+ iree_vm_abi_i_t a4[0];
+});
+
+IREE_VM_ABI_VLA_STRUCT(riCrD, a2_count, a2, {
+ iree_vm_ref_t r0;
+ int32_t i1;
+ iree_vm_size_t a2_count;
+ iree_vm_abi_r_t a2[0];
+});
+
+IREE_VM_ABI_VLA_STRUCT(riiCriD, a3_count, a3, {
+ iree_vm_ref_t r0;
+ int32_t i1;
+ int32_t i2;
+ iree_vm_size_t a3_count;
+ iree_vm_abi_ri_t a3[0];
+});
+
+IREE_VM_ABI_VLA_STRUCT(rirCrD, a3_count, a3, {
+ iree_vm_ref_t r0;
+ int32_t i1;
+ iree_vm_ref_t r2;
+ iree_vm_size_t a3_count;
+ iree_vm_abi_r_t a3[0];
+});
+
+IREE_VM_ABI_VLA_STRUCT(rrrrCrD, a4_count, a4, {
+ iree_vm_ref_t r0;
+ iree_vm_ref_t r1;
+ iree_vm_ref_t r2;
+ iree_vm_ref_t r3;
+ iree_vm_size_t a4_count;
+ iree_vm_abi_r_t a4[0];
+});
+
+IREE_VM_ABI_VLA_STRUCT(rriCiD, a3_count, a3, {
+ iree_vm_ref_t r0;
+ iree_vm_ref_t r1;
+ int32_t i2;
+ iree_vm_size_t a3_count;
+ iree_vm_abi_i_t a3[0];
+});
+
+IREE_VM_ABI_VLA_STRUCT(rrirCiD, a4_count, a4, {
+ iree_vm_ref_t r0;
+ iree_vm_ref_t r1;
+ int32_t i2;
+ iree_vm_ref_t r3;
+ iree_vm_size_t a4_count;
+ iree_vm_abi_i_t a4[0];
+});
+
+IREE_VM_ABI_VLA_STRUCT(riCiiD, a2_count, a2, {
+ iree_vm_ref_t r0;
+ int32_t i1;
+ iree_vm_size_t a2_count;
+ iree_vm_abi_ii_t a2[0];
+});
+
+IREE_VM_ABI_VLA_STRUCT(rrCiriiD, a2_count, a2, {
+ iree_vm_ref_t r0;
+ iree_vm_ref_t r1;
+ iree_vm_size_t a2_count;
+ iree_vm_abi_irii_t a2[0];
+});
+
+IREE_VM_ABI_VLA_STRUCT(rriCiriiD, a3_count, a3, {
+ iree_vm_ref_t r0;
+ iree_vm_ref_t r1;
+ int32_t i2;
+ iree_vm_size_t a3_count;
+ iree_vm_abi_irii_t a3[0];
+});
+
+#if defined(IREE_COMPILER_MSVC)
+#pragma pack(pop)
+#endif // IREE_COMPILER_MSVC
+
+//===----------------------------------------------------------------------===//
+// Shims for marshaling arguments and results
+//===----------------------------------------------------------------------===//
+
+IREE_VM_ABI_DECLARE_SHIM(irii, v);
+IREE_VM_ABI_DECLARE_SHIM(r, i);
+IREE_VM_ABI_DECLARE_SHIM(r, ii);
+IREE_VM_ABI_DECLARE_SHIM(r, iii);
+IREE_VM_ABI_DECLARE_SHIM(r, iiii);
+IREE_VM_ABI_DECLARE_SHIM(r, r);
+IREE_VM_ABI_DECLARE_SHIM(r, v);
+IREE_VM_ABI_DECLARE_SHIM(rCiD, i);
+IREE_VM_ABI_DECLARE_SHIM(rCrD, v);
+IREE_VM_ABI_DECLARE_SHIM(ri, i);
+IREE_VM_ABI_DECLARE_SHIM(ri, f);
+IREE_VM_ABI_DECLARE_SHIM(ri, r);
+IREE_VM_ABI_DECLARE_SHIM(ri, v);
+IREE_VM_ABI_DECLARE_SHIM(riCiD, r);
+IREE_VM_ABI_DECLARE_SHIM(riiCiD, r);
+IREE_VM_ABI_DECLARE_SHIM(riCiiD, r);
+IREE_VM_ABI_DECLARE_SHIM(riCrD, r);
+IREE_VM_ABI_DECLARE_SHIM(rii, i);
+IREE_VM_ABI_DECLARE_SHIM(rii, r);
+IREE_VM_ABI_DECLARE_SHIM(rii, v);
+IREE_VM_ABI_DECLARE_SHIM(rif, v);
+IREE_VM_ABI_DECLARE_SHIM(riii, r);
+IREE_VM_ABI_DECLARE_SHIM(riii, v);
+IREE_VM_ABI_DECLARE_SHIM(riirii, r);
+IREE_VM_ABI_DECLARE_SHIM(riiirii, r);
+IREE_VM_ABI_DECLARE_SHIM(rrrrCrD, r);
+IREE_VM_ABI_DECLARE_SHIM(ririi, v);
+IREE_VM_ABI_DECLARE_SHIM(rr, i);
+IREE_VM_ABI_DECLARE_SHIM(rr, r);
+IREE_VM_ABI_DECLARE_SHIM(rr, v);
+IREE_VM_ABI_DECLARE_SHIM(rr, ii);
+IREE_VM_ABI_DECLARE_SHIM(rrr, ii);
+IREE_VM_ABI_DECLARE_SHIM(rrCiriiD, r);
+IREE_VM_ABI_DECLARE_SHIM(rriCiD, v);
+IREE_VM_ABI_DECLARE_SHIM(rriiCiD, v);
+IREE_VM_ABI_DECLARE_SHIM(rriCiriiD, v);
+IREE_VM_ABI_DECLARE_SHIM(rriiii, v);
+IREE_VM_ABI_DECLARE_SHIM(rrirCiD, v);
+IREE_VM_ABI_DECLARE_SHIM(rriri, v);
+IREE_VM_ABI_DECLARE_SHIM(rririi, v);
+IREE_VM_ABI_DECLARE_SHIM(rrriii, v);
+IREE_VM_ABI_DECLARE_SHIM(v, i);
+IREE_VM_ABI_DECLARE_SHIM(v, r);
+IREE_VM_ABI_DECLARE_SHIM(v, v);
+
+#endif // IREE_VM_SHIMS_H_
diff --git a/runtime/src/iree/vm/shims_emitc.h b/runtime/src/iree/vm/shims_emitc.h
new file mode 100644
index 0000000..76d76d2
--- /dev/null
+++ b/runtime/src/iree/vm/shims_emitc.h
@@ -0,0 +1,29 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_SHIMS_EMITC_H_
+#define IREE_VM_SHIMS_EMITC_H_
+
+#include "iree/base/attributes.h"
+#include "iree/vm/module.h"
+#include "iree/vm/stack.h"
+
+typedef iree_status_t (*iree_vm_native_function_target_emitc)(
+ iree_vm_stack_t* IREE_RESTRICT stack,
+ iree_vm_function_call_t* IREE_RESTRICT call, void* IREE_RESTRICT module,
+ void* IREE_RESTRICT module_state,
+ iree_vm_execution_result_t* IREE_RESTRICT);
+
+static iree_status_t iree_emitc_shim(
+ iree_vm_stack_t* IREE_RESTRICT stack,
+ /*const*/ iree_vm_function_call_t* IREE_RESTRICT call,
+ iree_vm_native_function_target_emitc target_fn, void* IREE_RESTRICT module,
+ void* IREE_RESTRICT module_state,
+ iree_vm_execution_result_t* IREE_RESTRICT out_result) {
+ return target_fn(stack, call, module, module_state, out_result);
+}
+
+#endif // IREE_VM_SHIMS_EMITC_H_
diff --git a/runtime/src/iree/vm/stack.c b/runtime/src/iree/vm/stack.c
new file mode 100644
index 0000000..5ea5b9c
--- /dev/null
+++ b/runtime/src/iree/vm/stack.c
@@ -0,0 +1,541 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/stack.h"
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "iree/base/alignment.h"
+#include "iree/base/api.h"
+#include "iree/base/tracing.h"
+#include "iree/vm/module.h"
+
+#ifndef NDEBUG
+#define VMCHECK(expr) assert(expr)
+#else
+#define VMCHECK(expr)
+#endif // NDEBUG
+
+//===----------------------------------------------------------------------===//
+// Stack implementation
+//===----------------------------------------------------------------------===//
+//
+// The stack is (currently) designed to contain enough information to allow us
+// to build some nice debugging tools. This means that we try hard to preserve
+// all information needed for complete and precise stack dumps as well as
+// allowing inspection of both current and previous stack frame registers.
+// In the future we may want to toggle these modes such that registers, for
+// example, are hidden by the module implementations to allow for more
+// optimization opportunity but as a whole we tradeoff minimal memory
+// consumption for flexibility and debugging. Given that a single activation
+// tensor will usually dwarf the entire size of the stack used for an invocation
+// it's generally acceptable :)
+//
+// Stack frames and storage
+// ------------------------
+// Frames are stored as a linked list of iree_vm_stack_frame_header_t's
+// containing the API-visible stack frame information (such as which function
+// the frame is in and it's program counter) and the storage for registers used
+// by the frame. As all operations including stack dumps only ever need to
+// enumerate the frames in storage order there's no need to be able to randomly
+// index into them and the linked list combined with dynamic stack growth gives
+// us (practically) unlimited stack depth.
+//
+// [iree_vm_stack_t]
+// +- top -------> [frame 3 header] [registers] ---+
+// |
+// +--- [frame 2 header] [registers] <--+
+// |
+// +--> [frame 1 header] [registers] ---+
+// |
+// NULL <--- [frame 0 header] [registers] <--+
+//
+// To allow for static stack allocation and make allocating the VM stack on the
+// host stack or within an existing data structure the entire stack, including
+// all frame storage, can be placed into an existing allocation. This is similar
+// to inlined vectors/etc where some storage is available directly in the object
+// and only when exceeded will it switch to a dynamic allocation.
+//
+// Dynamic stack growth
+// --------------------
+// Though most of the stacks we deal with are rather shallow due to aggressive
+// inlining in the compiler it's still possible to spill any reasonably-sized
+// static storage allocation. This can be especially true in modules compiled
+// with optimizations disabled; for example the debug register allocator may
+// expand the required register count for a function from 30 to 3000.
+//
+// To support these cases the stack can optionally be provided an allocator to
+// enable it to grow the stack when the initial storage is exhausted. As we
+// store pointers to the stack storage within the storage itself (such as the
+// iree_vm_registers_t pointers) this means we need to perform a fixup step
+// during reallocation to ensure they are all updated. This also means that the
+// pointers to the stack frames are possibly invalidated on every function
+// entry and that users of the stack cannot rely on pointer stability during
+// execution.
+//
+// Calling convention
+// ------------------
+// Callers provide an arguments buffer and results buffer sized appropriately
+// for the call and with the arguments buffer populated. Callees will push
+// their new stack frame, copy or move the arguments from the caller buffer into
+// the callee frame, and then begin execution. Upon return the callee function
+// will move return values to the result buffer and pop their stack frame.
+//
+// By making the actual stack frame setup and teardown callee-controlled we can
+// have optimized implementations that treat register storage differently across
+// various frames. For example, native modules that store their registers in
+// host-machine specific registers can marshal the caller registers in/out of
+// the host registers (or stack/etc) without exposing the actual implementation
+// to the caller.
+//
+// Calling into the VM
+// -------------------
+// Calls from external code into the VM such as via iree_vm_invoke reuse the
+// same calling convention as internal-to-internal calls: callees load arguments
+// from the caller frame and store results into the caller frame.
+//
+// Marshaling arguments is easy given that the caller controls these and we can
+// trivially map the ordered set of argument types into the VM calling
+// convention buffers.
+//
+// A side-effect (beyond code reuse) is that ref types are retained by the VM
+// for the entire lifetime they may be accessible by VM routines. This lets us
+// get rich stack traces without needing to hook into external code and lets us
+// timeshift via coroutines where we may otherwise not know when the external
+// caller will resume a yielded call and actually read back the results.
+//
+// The overhead of this marshaling is minimal as external functions can always
+// use move semantics on the ref objects. Since we are reusing the normal VM
+// code paths which are likely still in instruction cache the bulk of the work
+// amounts to some small memcpys.
+
+// Multiplier on the capacity of the stack frame storage when growing.
+// Since we never shrink stacks it's nice to keep this relative low. If we
+// measure a lot of growth happening in normal models we should increase this
+// but otherwise leave as small as we can to avoid overallocation.
+#define IREE_VM_STACK_GROWTH_FACTOR 2
+
+// A private stack frame header that allows us to walk the linked list of
+// frames without exposing their exact structure through the API. This makes it
+// easier for us to add/version additional information or hide implementation
+// details.
+typedef struct iree_vm_stack_frame_header_t {
+ // Size, in bytes, of the frame header and frame payload including registers.
+ // Adding this value to the base header pointer will yield the next available
+ // memory location. Ensure that it does not exceed the total
+ // frame_storage_capacity.
+ iree_host_size_t frame_size;
+
+ // Pointer to the parent stack frame, usually immediately preceding this one
+ // in the frame storage. May be NULL.
+ struct iree_vm_stack_frame_header_t* parent;
+
+ // Stack frame type used to determine which fields are valid.
+ iree_vm_stack_frame_type_t type;
+
+ // Size, in bytes, of the additional stack frame data that follows the frame.
+ iree_host_size_t data_size;
+
+ // Function called when the stack frame is left.
+ iree_vm_stack_frame_cleanup_fn_t frame_cleanup_fn;
+
+ // Actual stack frame as visible through the API.
+ // The registers within the frame will (likely) point to addresses immediately
+ // following this header in memory.
+ iree_vm_stack_frame_t frame;
+} iree_vm_stack_frame_header_t;
+
+// Core stack storage. This will be mapped either into dynamic memory allocated
+// by the member allocator or static memory allocated externally. Static stacks
+// cannot grow when storage runs out while dynamic ones will resize their stack.
+struct iree_vm_stack_t {
+ // NOTE: to get better cache hit rates we put the most frequently accessed
+ // members first.
+
+ // Pointer to the current top of the stack.
+ // This can be used to walk the stack from top to bottom by following the
+ // |parent| pointers. Note that these pointers are invalidated each time the
+ // stack grows (if dynamic growth is enabled) and all of the frames will need
+ // updating.
+ iree_vm_stack_frame_header_t* top;
+
+ // Base pointer to stack storage.
+ // For statically-allocated stacks this will (likely) point to immediately
+ // after the iree_vm_stack_t in memory. For dynamically-allocated stacks this
+ // will (likely) point to heap memory.
+ iree_host_size_t frame_storage_capacity;
+ iree_host_size_t frame_storage_size;
+ void* frame_storage;
+
+ // Flags controlling the behavior of the invocation owning this stack.
+ iree_vm_invocation_flags_t flags;
+
+ // True if the stack owns the frame_storage and should free it when it is no
+ // longer required. Host stack-allocated stacks don't own their storage but
+ // may transition to owning it on dynamic growth.
+ bool owns_frame_storage;
+
+ // Resolves a module to a module state within a context.
+ // This will be called on function entry whenever module transitions occur.
+ iree_vm_state_resolver_t state_resolver;
+
+ // Allocator used for dynamic stack allocations. May be the null allocator
+ // if growth is prohibited.
+ iree_allocator_t allocator;
+};
+
+//===----------------------------------------------------------------------===//
+// Stack implementation
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT iree_status_t iree_vm_stack_initialize(
+ iree_byte_span_t storage, iree_vm_invocation_flags_t flags,
+ iree_vm_state_resolver_t state_resolver, iree_allocator_t allocator,
+ iree_vm_stack_t** out_stack) {
+ IREE_ASSERT_ARGUMENT(out_stack);
+ *out_stack = NULL;
+ if (storage.data_length < IREE_VM_STACK_MIN_SIZE) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "stack storage under minimum required amount: %zu < %d",
+ storage.data_length, IREE_VM_STACK_MIN_SIZE);
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_vm_stack_t* stack = (iree_vm_stack_t*)storage.data;
+ memset(stack, 0, sizeof(iree_vm_stack_t));
+ stack->owns_frame_storage = false;
+ stack->flags = flags;
+ stack->state_resolver = state_resolver;
+ stack->allocator = allocator;
+
+ iree_host_size_t storage_offset =
+ iree_host_align(sizeof(iree_vm_stack_t), 16);
+ stack->frame_storage_capacity = storage.data_length - storage_offset;
+ stack->frame_storage_size = 0;
+ stack->frame_storage = storage.data + storage_offset;
+
+ stack->top = NULL;
+
+ *out_stack = stack;
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT void iree_vm_stack_deinitialize(iree_vm_stack_t* stack) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ while (stack->top) {
+ iree_status_ignore(iree_vm_stack_function_leave(stack));
+ }
+
+ if (stack->owns_frame_storage) {
+ iree_allocator_free(stack->allocator, stack->frame_storage);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_stack_allocate(
+ iree_vm_invocation_flags_t flags, iree_vm_state_resolver_t state_resolver,
+ iree_allocator_t allocator, iree_vm_stack_t** out_stack) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ *out_stack = NULL;
+
+ iree_host_size_t storage_size = IREE_VM_STACK_DEFAULT_SIZE;
+ void* storage = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(allocator, storage_size, &storage);
+ iree_vm_stack_t* stack = NULL;
+ if (iree_status_is_ok(status)) {
+ iree_byte_span_t storage_span = iree_make_byte_span(storage, storage_size);
+ status = iree_vm_stack_initialize(storage_span, flags, state_resolver,
+ allocator, &stack);
+ }
+
+ *out_stack = stack;
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+IREE_API_EXPORT void iree_vm_stack_free(iree_vm_stack_t* stack) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_t allocator = stack->allocator;
+ void* storage = (void*)stack;
+ iree_vm_stack_deinitialize(stack);
+ iree_allocator_free(allocator, storage);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+IREE_API_EXPORT iree_vm_invocation_flags_t
+iree_vm_stack_invocation_flags(const iree_vm_stack_t* stack) {
+ return stack->flags;
+}
+
+IREE_API_EXPORT iree_vm_stack_frame_t* iree_vm_stack_current_frame(
+ iree_vm_stack_t* stack) {
+ return stack->top ? &stack->top->frame : NULL;
+}
+
+IREE_API_EXPORT iree_vm_stack_frame_t* iree_vm_stack_parent_frame(
+ iree_vm_stack_t* stack) {
+ if (!stack->top) return NULL;
+ iree_vm_stack_frame_header_t* parent_header = stack->top->parent;
+ return parent_header ? &parent_header->frame : NULL;
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_stack_query_module_state(
+ iree_vm_stack_t* stack, iree_vm_module_t* module,
+ iree_vm_module_state_t** out_module_state) {
+ return stack->state_resolver.query_module_state(stack->state_resolver.self,
+ module, out_module_state);
+}
+
+// Attempts to grow the stack store to hold at least |minimum_capacity|.
+// Pointers to existing stack frames will be invalidated and any pointers
+// embedded in the stack frame data structures will be updated.
+// Fails if dynamic stack growth is disabled or the allocator is OOM.
+static iree_status_t iree_vm_stack_grow(iree_vm_stack_t* stack,
+ iree_host_size_t minimum_capacity) {
+ if (IREE_UNLIKELY(stack->allocator.ctl == NULL)) {
+ return iree_make_status(
+ IREE_STATUS_RESOURCE_EXHAUSTED,
+ "stack initialized on the host stack and cannot grow");
+ }
+
+ // Ensure we grow at least as much as required.
+ iree_host_size_t new_capacity = stack->frame_storage_capacity;
+ do {
+ new_capacity *= IREE_VM_STACK_GROWTH_FACTOR;
+ } while (new_capacity < minimum_capacity);
+ if (new_capacity > IREE_VM_STACK_MAX_SIZE) {
+ return iree_make_status(
+ IREE_STATUS_RESOURCE_EXHAUSTED,
+ "new stack size would exceed maximum size: %zu > %d", new_capacity,
+ IREE_VM_STACK_MAX_SIZE);
+ }
+
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ // Reallocate the frame storage. 99.9999% chance the new storage pointer will
+ // differ and we'll need to fix up pointers so we just always do that.
+ void* old_storage = stack->frame_storage;
+ void* new_storage = stack->frame_storage;
+ iree_status_t status;
+ if (stack->owns_frame_storage) {
+ // We own the storage already likely from a previous growth operation.
+ status =
+ iree_allocator_realloc(stack->allocator, new_capacity, &new_storage);
+ } else {
+ // We don't own the original storage so we are going to switch to our own
+ // newly-allocated storage instead. We need to make sure we copy over the
+ // existing stack contents.
+ status =
+ iree_allocator_malloc(stack->allocator, new_capacity, &new_storage);
+ if (iree_status_is_ok(status)) {
+ memcpy(new_storage, old_storage, stack->frame_storage_capacity);
+ }
+ }
+ if (!iree_status_is_ok(status)) {
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+ stack->frame_storage = new_storage;
+ stack->frame_storage_capacity = new_capacity;
+ stack->owns_frame_storage = true;
+
+#define REBASE_POINTER(type, ptr, old_base, new_base) \
+ if (ptr) { \
+ (ptr) = (type)(((uintptr_t)(ptr) - (uintptr_t)(old_base)) + \
+ (uintptr_t)(new_base)); \
+ }
+
+ // Fixup embedded stack frame pointers.
+ REBASE_POINTER(iree_vm_stack_frame_header_t*, stack->top, old_storage,
+ new_storage);
+ iree_vm_stack_frame_header_t* frame_header = stack->top;
+ while (frame_header != NULL) {
+ REBASE_POINTER(iree_vm_stack_frame_header_t*, frame_header->parent,
+ old_storage, new_storage);
+ frame_header = frame_header->parent;
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_stack_function_enter(
+ iree_vm_stack_t* stack, const iree_vm_function_t* function,
+ iree_vm_stack_frame_type_t frame_type, iree_host_size_t frame_size,
+ iree_vm_stack_frame_cleanup_fn_t frame_cleanup_fn,
+ iree_vm_stack_frame_t** out_callee_frame) {
+ if (out_callee_frame) *out_callee_frame = NULL;
+
+ // Allocate stack space and grow stack, if required.
+ iree_host_size_t header_size = sizeof(iree_vm_stack_frame_header_t);
+ iree_host_size_t new_top =
+ stack->frame_storage_size + header_size + frame_size;
+ if (IREE_UNLIKELY(new_top > stack->frame_storage_capacity)) {
+ IREE_RETURN_IF_ERROR(iree_vm_stack_grow(stack, new_top));
+ }
+
+ // Try to reuse the same module state if the caller and callee are from the
+ // same module. Otherwise, query the state from the registered handler.
+ iree_vm_stack_frame_header_t* caller_frame_header = stack->top;
+ iree_vm_stack_frame_t* caller_frame =
+ caller_frame_header ? &caller_frame_header->frame : NULL;
+ iree_vm_module_state_t* module_state = NULL;
+ if (caller_frame && caller_frame->function.module == function->module) {
+ module_state = caller_frame->module_state;
+ } else if (function->module != NULL) {
+ IREE_RETURN_IF_ERROR(stack->state_resolver.query_module_state(
+ stack->state_resolver.self, function->module, &module_state));
+ }
+
+ // Bump pointer and get real stack pointer offsets.
+ iree_vm_stack_frame_header_t* frame_header =
+ (iree_vm_stack_frame_header_t*)((uintptr_t)stack->frame_storage +
+ stack->frame_storage_size);
+ memset(frame_header, 0, header_size + frame_size);
+
+ frame_header->frame_size = header_size + frame_size;
+ frame_header->parent = stack->top;
+ frame_header->type = frame_type;
+ frame_header->data_size = frame_size;
+ frame_header->frame_cleanup_fn = frame_cleanup_fn;
+
+ iree_vm_stack_frame_t* callee_frame = &frame_header->frame;
+ callee_frame->function = *function;
+ callee_frame->module_state = module_state;
+ callee_frame->pc = 0;
+ callee_frame->depth = caller_frame ? caller_frame->depth + 1 : 0;
+
+ stack->frame_storage_size = new_top;
+ stack->top = frame_header;
+
+ IREE_TRACE({
+ if (frame_type != IREE_VM_STACK_FRAME_NATIVE) {
+ // TODO(benvanik): cache source location and query from module.
+ iree_string_view_t function_name = iree_vm_function_name(function);
+ IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, function_name.data,
+ function_name.size);
+ callee_frame->trace_zone = z0;
+ if (frame_size) {
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, frame_size);
+ }
+ }
+ });
+
+ if (out_callee_frame) *out_callee_frame = callee_frame;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t
+iree_vm_stack_function_leave(iree_vm_stack_t* stack) {
+ if (IREE_UNLIKELY(!stack->top)) {
+ return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+ "unbalanced stack leave");
+ }
+
+ // Call (optional) frame storage cleanup function.
+ if (stack->top->frame_cleanup_fn) {
+ stack->top->frame_cleanup_fn(&stack->top->frame);
+ }
+
+ IREE_TRACE({
+ if (stack->top->frame.trace_zone) {
+ IREE_TRACE_ZONE_END(stack->top->frame.trace_zone);
+ }
+ });
+
+ // Restore the frame pointer to the caller.
+ stack->frame_storage_size -= stack->top->frame_size;
+ stack->top = stack->top->parent;
+
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_stack_format_backtrace(
+ iree_vm_stack_t* stack, iree_string_builder_t* builder) {
+ for (iree_vm_stack_frame_header_t* frame = stack->top; frame != NULL;
+ frame = frame->parent) {
+ // Stack frame prefix.
+ const char* type_str;
+ switch (frame->type) {
+ default:
+ type_str = "??";
+ break;
+ case IREE_VM_STACK_FRAME_EXTERNAL:
+ type_str = "external";
+ break;
+ case IREE_VM_STACK_FRAME_NATIVE:
+ type_str = "native";
+ break;
+ case IREE_VM_STACK_FRAME_BYTECODE:
+ type_str = "bytecode";
+ break;
+ }
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ builder, "\n[%*" PRId32 "] %*s ", 2, frame->frame.depth, 8, type_str));
+
+ // Common module/function name and PC.
+ iree_string_view_t module_name =
+ iree_vm_module_name(frame->frame.function.module);
+ iree_string_view_t function_name =
+ iree_vm_function_name(&frame->frame.function);
+ if (iree_string_view_is_empty(function_name)) {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ builder, "%.*s@%d", (int)module_name.size, module_name.data,
+ (int)frame->frame.function.ordinal));
+ } else {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ builder, "%.*s.%.*s", (int)module_name.size, module_name.data,
+ (int)function_name.size, function_name.data));
+ }
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ builder, ":%" PRIu64 " ", (uint64_t)frame->frame.pc));
+
+ iree_vm_module_t* module = frame->frame.function.module;
+ iree_vm_source_location_t source_location;
+ iree_status_t status = iree_vm_module_resolve_source_location(
+ module, &frame->frame, &source_location);
+ if (iree_status_is_ok(status)) {
+ status = iree_vm_source_location_format(
+ &source_location, IREE_VM_SOURCE_LOCATION_FORMAT_FLAG_NONE, builder);
+ }
+ if (iree_status_is_unavailable(status)) {
+ // TODO(benvanik): if this is an import/export we can get that name.
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "-"));
+ } else if (!iree_status_is_ok(status)) {
+ return status;
+ }
+ }
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_stack_annotate_backtrace(
+ iree_vm_stack_t* stack, iree_status_t base_status) {
+ iree_string_builder_t builder;
+ iree_string_builder_initialize(stack->allocator, &builder);
+ iree_status_t status = iree_vm_stack_format_backtrace(stack, &builder);
+ if (iree_status_is_ok(status)) {
+ // TODO(benvanik): don't duplicate the buffer here - we should be attaching
+ // a payload but that requires additional plumbing.
+ status = iree_status_annotate_f(base_status, "%.*s",
+ (int)iree_string_builder_size(&builder),
+ iree_string_builder_buffer(&builder));
+ }
+ iree_string_builder_deinitialize(&builder);
+ return status;
+}
diff --git a/runtime/src/iree/vm/stack.h b/runtime/src/iree/vm/stack.h
new file mode 100644
index 0000000..abd5b75
--- /dev/null
+++ b/runtime/src/iree/vm/stack.h
@@ -0,0 +1,248 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_STACK_H_
+#define IREE_VM_STACK_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "iree/base/alignment.h"
+#include "iree/base/api.h"
+#include "iree/base/attributes.h"
+#include "iree/base/string_builder.h"
+#include "iree/base/tracing.h"
+#include "iree/vm/module.h"
+#include "iree/vm/ref.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// A reasonable default stack storage size, in bytes.
+// This will allow most (reasonable) programs to run. If running
+// unverified/untested programs then prefer to use a dynamically growable stack
+// until the expectations of the programs are checked; for example, hopefully
+// in a year or two we have much more complex models with much deeper call
+// stacks and we may want to re-evaluate the host-stack allocation size.
+//
+// The value was chosen to fit quite a few i32 registers and a reasonable amount
+// of ref registers (that are 2 * sizeof(void*)). For many invocations this will
+// be more than enough to perform the work without needing an additional dynamic
+// allocation/resize.
+#define IREE_VM_STACK_DEFAULT_SIZE (8 * 1024)
+
+// The minimum size of VM stack storage.
+#define IREE_VM_STACK_MIN_SIZE (1 * 1024)
+
+// The maximum size of VM stack storage; anything larger is probably a bug.
+#define IREE_VM_STACK_MAX_SIZE (1 * 1024 * 1024)
+
+enum iree_vm_invocation_flag_bits_t {
+ IREE_VM_INVOCATION_FLAG_NONE = 0u,
+
+ // Enables tracing of execution to stderr (when available) for the invocation.
+ // See iree/base/config.h for the flags that control whether this
+ // functionality is available; specifically:
+ // -DIREE_VM_EXECUTION_TRACING_ENABLE=1
+ IREE_VM_INVOCATION_FLAG_TRACE_EXECUTION = 1u << 0,
+};
+typedef uint32_t iree_vm_invocation_flags_t;
+
+typedef enum iree_vm_stack_frame_type_e {
+ // Represents an `[external]` frame that needs to marshal args/results.
+ // These frames have no source location and are tracked so that we know when
+ // transitions occur into/out-of external code.
+ IREE_VM_STACK_FRAME_EXTERNAL = 0,
+ // Represents a `[native]` frame that has no persistent register storage.
+ // These frames may have source location information provided by the
+ // implementation.
+ IREE_VM_STACK_FRAME_NATIVE = 1,
+ // VM stack frame in bytecode using internal register storage.
+ IREE_VM_STACK_FRAME_BYTECODE = 2,
+} iree_vm_stack_frame_type_t;
+
+// A single stack frame within the VM.
+//
+// NOTE: to (try to) get better cache hit rates we put the most frequently
+// accessed members **LAST**. This is because the custom frame storage data
+// immediately follows this struct in memory and is highly likely to be touched
+// by the callee immediately and repeatedly.
+typedef struct iree_vm_stack_frame_t {
+ // Function that the stack frame is within.
+ iree_vm_function_t function;
+
+ // Cached module state pointer for the module containing |function|.
+ // This removes the need to lookup the module state when control returns to
+ // the function during continuation or from a return instruction.
+ iree_vm_module_state_t* module_state;
+
+ // Current program counter within the function.
+ // Implementations may treat this offset differently, treating it as a byte
+ // offset (such as in the case of VM bytecode), a block identifier (compiled
+ // code), etc.
+ iree_vm_source_offset_t pc;
+
+ // Depth of the frame within the stack.
+ // As stack frame pointers are not stable this can be used instead to detect
+ // stack enter/leave balance issues.
+ int32_t depth;
+
+ IREE_TRACE(iree_zone_id_t trace_zone;)
+} iree_vm_stack_frame_t;
+
+// Returns the implementation-defined frame storage associated with |frame|.
+// The pointer will contain at least as many bytes as requested by frame_size.
+static inline void* iree_vm_stack_frame_storage(iree_vm_stack_frame_t* frame) {
+ return (void*)((uintptr_t)frame + sizeof(iree_vm_stack_frame_t));
+}
+
+// Callback for cleaning up stack frame storage before a frame is left or the
+// stack is destroyed.
+typedef void(IREE_API_PTR* iree_vm_stack_frame_cleanup_fn_t)(
+ iree_vm_stack_frame_t* frame);
+
+// A state resolver that can allocate or lookup module state.
+typedef struct iree_vm_state_resolver_t {
+ void* self;
+ iree_status_t(IREE_API_PTR* query_module_state)(
+ void* state_resolver, iree_vm_module_t* module,
+ iree_vm_module_state_t** out_module_state);
+} iree_vm_state_resolver_t;
+
+// A fiber stack used for storing stack frame state during execution.
+// All required state is stored within the stack and no host thread-local state
+// is used allowing us to execute multiple fibers on the same host thread.
+typedef struct iree_vm_stack_t iree_vm_stack_t;
+
+// Defines and initializes an inline VM stack.
+// The stack will be ready for use and must be deinitialized with
+// iree_vm_stack_deinitialize when no longer required.
+//
+// Example:
+// IREE_VM_INLINE_STACK_INITIALIZE(
+// stack,
+// IREE_VM_INVOCATION_FLAG_NONE,
+// iree_vm_context_state_resolver(context),
+// iree_allocator_system());
+// ...
+// iree_vm_stack_deinitialize(stack);
+#define IREE_VM_INLINE_STACK_INITIALIZE(stack, flags, state_resolver, \
+ allocator) \
+ uint8_t __stack_storage[IREE_VM_STACK_DEFAULT_SIZE]; \
+ iree_byte_span_t __stack_storage_span = \
+ iree_make_byte_span(__stack_storage, sizeof(__stack_storage)); \
+ iree_vm_stack_t* stack = NULL; \
+ IREE_IGNORE_ERROR(iree_vm_stack_initialize( \
+ __stack_storage_span, (flags), (state_resolver), (allocator), &stack));
+
+// Initializes a statically-allocated stack in |storage|.
+// The contents of the |storage| can be anything upon initialization and the
+// stack must be deinitialized with iree_vm_stack_deinitialize before the
+// storage is freed. The provided |allocator| is only used for stack growth
+// beyond the initial storage capacity and may be iree_allocator_null() to
+// prevent growth. Use IREE_VM_STACK_DEFAULT_SIZE for a reasonable default or
+// use iree_vm_stack_allocate if the input programs may exceed reason.
+//
+// The provided |state_resolver| will be used to resolve a module to a module
+// state within a context. This will be called on function entry whenever module
+// transitions occur.
+//
+// Example:
+// uint8_t stack_storage[IREE_VM_STACK_DEFAULT_SIZE];
+// iree_vm_stack_t* stack = NULL;
+// iree_vm_stack_initialize(stack_storage, ..., &stack);
+// ...
+// iree_vm_stack_deinitialize(stack);
+// // stack_storage can now be reused/freed/etc
+IREE_API_EXPORT iree_status_t iree_vm_stack_initialize(
+ iree_byte_span_t storage, iree_vm_invocation_flags_t flags,
+ iree_vm_state_resolver_t state_resolver, iree_allocator_t allocator,
+ iree_vm_stack_t** out_stack);
+
+// Deinitializes a statically-allocated |stack| previously initialized with
+// iree_vm_stack_initialize.
+IREE_API_EXPORT void iree_vm_stack_deinitialize(iree_vm_stack_t* stack);
+
+// Allocates a dynamically-growable stack.
+//
+// The provided |state_resolver| will be used to resolve a module to a module
+// state within a context. This will be called on function entry whenever module
+// transitions occur.
+//
+// The stack will be allocated from |allocator| and returned in |out_stack|.
+// It must be freed with iree_vm_stack_free.
+//
+// Example:
+// iree_vm_stack_t* stack = NULL;
+// iree_vm_stack_allocate(..., iree_allocator_system(), &stack);
+// ...
+// iree_vm_stack_free(stack);
+IREE_API_EXPORT iree_status_t iree_vm_stack_allocate(
+ iree_vm_invocation_flags_t flags, iree_vm_state_resolver_t state_resolver,
+ iree_allocator_t allocator, iree_vm_stack_t** out_stack);
+
+// Frees a dynamically-allocated |stack| from iree_vm_stack_allocate.
+IREE_API_EXPORT void iree_vm_stack_free(iree_vm_stack_t* stack);
+
+// Returns the flags controlling the invocation this stack is used with.
+IREE_API_EXPORT iree_vm_invocation_flags_t
+iree_vm_stack_invocation_flags(const iree_vm_stack_t* stack);
+
+// Returns the current stack frame or nullptr if the stack is empty.
+IREE_API_EXPORT iree_vm_stack_frame_t* iree_vm_stack_current_frame(
+ iree_vm_stack_t* stack);
+
+// Returns the parent stack frame or nullptr if the stack is empty.
+IREE_API_EXPORT iree_vm_stack_frame_t* iree_vm_stack_parent_frame(
+ iree_vm_stack_t* stack);
+
+// Queries the context-specific module state for the given module.
+IREE_API_EXPORT iree_status_t iree_vm_stack_query_module_state(
+ iree_vm_stack_t* stack, iree_vm_module_t* module,
+ iree_vm_module_state_t** out_module_state);
+
+// Enters into the given |function| and returns the callee stack frame.
+// May invalidate any pointers to stack frames and the only pointer that can be
+// assumed valid after return is the one in |out_callee_frame|.
+//
+// |frame_size| can optionally be used to allocate storage within the stack for
+// callee data. |frame_cleanup_fn| will be called when the frame is left either
+// normally via an iree_vm_stack_function_leave call or if an error occurs and
+// the stack needs to be torn down.
+IREE_API_EXPORT iree_status_t iree_vm_stack_function_enter(
+ iree_vm_stack_t* stack, const iree_vm_function_t* function,
+ iree_vm_stack_frame_type_t frame_type, iree_host_size_t frame_size,
+ iree_vm_stack_frame_cleanup_fn_t frame_cleanup_fn,
+ iree_vm_stack_frame_t** out_callee_frame);
+
+// Leaves the current stack frame.
+IREE_API_EXPORT iree_status_t
+iree_vm_stack_function_leave(iree_vm_stack_t* stack);
+
+// Formats a backtrace of the current stack to the given string |builder|.
+IREE_API_EXPORT iree_status_t iree_vm_stack_format_backtrace(
+ iree_vm_stack_t* stack, iree_string_builder_t* builder);
+
+// Annotates |status| with the backtrace of |stack| and returns |base_status|.
+IREE_API_EXPORT IREE_MUST_USE_RESULT iree_status_t
+iree_vm_stack_annotate_backtrace(iree_vm_stack_t* stack,
+ iree_status_t base_status);
+
+#if IREE_VM_BACKTRACE_ENABLE && \
+ (IREE_STATUS_FEATURES & IREE_STATUS_FEATURE_ANNOTATIONS)
+#define IREE_VM_STACK_ANNOTATE_BACKTRACE_IF_ENABLED(stack, base_status) \
+ iree_vm_stack_annotate_backtrace(stack, base_status)
+#else
+#define IREE_VM_STACK_ANNOTATE_BACKTRACE_IF_ENABLED(stack, base_status) \
+ (base_status)
+#endif // IREE_VM_BACKTRACE_ENABLE && IREE_STATUS_FEATURE_ANNOTATIONS
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_VM_STACK_H_
diff --git a/runtime/src/iree/vm/stack_test.cc b/runtime/src/iree/vm/stack_test.cc
new file mode 100644
index 0000000..80303df
--- /dev/null
+++ b/runtime/src/iree/vm/stack_test.cc
@@ -0,0 +1,202 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/stack.h"
+
+#include "iree/base/api.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace {
+
+#define MODULE_A_SENTINEL reinterpret_cast<iree_vm_module_t*>(1)
+#define MODULE_B_SENTINEL reinterpret_cast<iree_vm_module_t*>(2)
+#define MODULE_A_STATE_SENTINEL reinterpret_cast<iree_vm_module_state_t*>(101)
+#define MODULE_B_STATE_SENTINEL reinterpret_cast<iree_vm_module_state_t*>(102)
+
+static int module_a_state_resolve_count = 0;
+static int module_b_state_resolve_count = 0;
+static iree_status_t SentinelStateResolver(
+ void* state_resolver, iree_vm_module_t* module,
+ iree_vm_module_state_t** out_module_state) {
+ if (module == MODULE_A_SENTINEL) {
+ ++module_a_state_resolve_count;
+ *out_module_state = MODULE_A_STATE_SENTINEL;
+ return iree_ok_status();
+ } else if (module == MODULE_B_SENTINEL) {
+ ++module_b_state_resolve_count;
+ *out_module_state = MODULE_B_STATE_SENTINEL;
+ return iree_ok_status();
+ }
+ return iree_make_status(IREE_STATUS_NOT_FOUND);
+}
+
+// Tests simple stack usage, mainly just for demonstration.
+TEST(VMStackTest, Usage) {
+ iree_vm_state_resolver_t state_resolver = {nullptr, SentinelStateResolver};
+ IREE_VM_INLINE_STACK_INITIALIZE(stack, IREE_VM_INVOCATION_FLAG_NONE,
+ state_resolver, iree_allocator_system());
+
+ EXPECT_EQ(nullptr, iree_vm_stack_current_frame(stack));
+ EXPECT_EQ(nullptr, iree_vm_stack_parent_frame(stack));
+
+ iree_vm_function_t function_a = {MODULE_A_SENTINEL,
+ IREE_VM_FUNCTION_LINKAGE_INTERNAL, 0};
+ iree_vm_stack_frame_t* frame_a = nullptr;
+ IREE_EXPECT_OK(iree_vm_stack_function_enter(
+ stack, &function_a, IREE_VM_STACK_FRAME_NATIVE, 0, NULL, &frame_a));
+ EXPECT_EQ(0, frame_a->function.ordinal);
+ EXPECT_EQ(frame_a, iree_vm_stack_current_frame(stack));
+ EXPECT_EQ(nullptr, iree_vm_stack_parent_frame(stack));
+
+ iree_vm_function_t function_b = {MODULE_B_SENTINEL,
+ IREE_VM_FUNCTION_LINKAGE_INTERNAL, 1};
+ iree_vm_stack_frame_t* frame_b = nullptr;
+ IREE_EXPECT_OK(iree_vm_stack_function_enter(
+ stack, &function_b, IREE_VM_STACK_FRAME_NATIVE, 0, NULL, &frame_b));
+ EXPECT_EQ(1, frame_b->function.ordinal);
+ EXPECT_EQ(frame_b, iree_vm_stack_current_frame(stack));
+ EXPECT_EQ(frame_a, iree_vm_stack_parent_frame(stack));
+
+ IREE_EXPECT_OK(iree_vm_stack_function_leave(stack));
+ EXPECT_EQ(frame_a, iree_vm_stack_current_frame(stack));
+ EXPECT_EQ(nullptr, iree_vm_stack_parent_frame(stack));
+ IREE_EXPECT_OK(iree_vm_stack_function_leave(stack));
+ EXPECT_EQ(nullptr, iree_vm_stack_current_frame(stack));
+ EXPECT_EQ(nullptr, iree_vm_stack_parent_frame(stack));
+
+ iree_vm_stack_deinitialize(stack);
+}
+
+// Tests stack cleanup with unpopped frames (like during failure teardown).
+TEST(VMStackTest, DeinitWithRemainingFrames) {
+ iree_vm_state_resolver_t state_resolver = {nullptr, SentinelStateResolver};
+ IREE_VM_INLINE_STACK_INITIALIZE(stack, IREE_VM_INVOCATION_FLAG_NONE,
+ state_resolver, iree_allocator_system());
+
+ iree_vm_function_t function_a = {MODULE_A_SENTINEL,
+ IREE_VM_FUNCTION_LINKAGE_INTERNAL, 0};
+ iree_vm_stack_frame_t* frame_a = nullptr;
+ IREE_EXPECT_OK(iree_vm_stack_function_enter(
+ stack, &function_a, IREE_VM_STACK_FRAME_NATIVE, 0, NULL, &frame_a));
+ EXPECT_EQ(0, frame_a->function.ordinal);
+ EXPECT_EQ(frame_a, iree_vm_stack_current_frame(stack));
+ EXPECT_EQ(nullptr, iree_vm_stack_parent_frame(stack));
+
+ // Don't pop the last frame before deinit; it should handle it.
+ iree_vm_stack_deinitialize(stack);
+}
+
+// Tests stack overflow detection.
+TEST(VMStackTest, StackOverflow) {
+ iree_vm_state_resolver_t state_resolver = {nullptr, SentinelStateResolver};
+ IREE_VM_INLINE_STACK_INITIALIZE(stack, IREE_VM_INVOCATION_FLAG_NONE,
+ state_resolver, iree_allocator_system());
+
+ EXPECT_EQ(nullptr, iree_vm_stack_current_frame(stack));
+ EXPECT_EQ(nullptr, iree_vm_stack_parent_frame(stack));
+
+ // Fill the entire stack up to the max.
+ iree_vm_function_t function_a = {MODULE_A_SENTINEL,
+ IREE_VM_FUNCTION_LINKAGE_INTERNAL, 0};
+ bool did_overflow = false;
+ for (int i = 0; i < 99999; ++i) {
+ iree_vm_stack_frame_t* frame_a = nullptr;
+ iree_status_t status = iree_vm_stack_function_enter(
+ stack, &function_a, IREE_VM_STACK_FRAME_NATIVE, 0, NULL, &frame_a);
+ if (iree_status_is_resource_exhausted(status)) {
+ // Hit the stack overflow, as expected.
+ did_overflow = true;
+ IREE_IGNORE_ERROR(status);
+ break;
+ }
+ IREE_EXPECT_OK(status);
+ }
+ ASSERT_TRUE(did_overflow);
+
+ iree_vm_stack_deinitialize(stack);
+}
+
+// Tests unbalanced stack popping.
+TEST(VMStackTest, UnbalancedPop) {
+ iree_vm_state_resolver_t state_resolver = {nullptr, SentinelStateResolver};
+ IREE_VM_INLINE_STACK_INITIALIZE(stack, IREE_VM_INVOCATION_FLAG_NONE,
+ state_resolver, iree_allocator_system());
+
+ iree_status_t status = iree_vm_stack_function_leave(stack);
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_FAILED_PRECONDITION, status);
+ iree_status_free(status);
+
+ iree_vm_stack_deinitialize(stack);
+}
+
+// Tests module state reuse and querying.
+TEST(VMStackTest, ModuleStateQueries) {
+ iree_vm_state_resolver_t state_resolver = {nullptr, SentinelStateResolver};
+ IREE_VM_INLINE_STACK_INITIALIZE(stack, IREE_VM_INVOCATION_FLAG_NONE,
+ state_resolver, iree_allocator_system());
+
+ EXPECT_EQ(nullptr, iree_vm_stack_current_frame(stack));
+ EXPECT_EQ(nullptr, iree_vm_stack_parent_frame(stack));
+
+ module_a_state_resolve_count = 0;
+ module_b_state_resolve_count = 0;
+
+ // [A (queried)]
+ iree_vm_function_t function_a = {MODULE_A_SENTINEL,
+ IREE_VM_FUNCTION_LINKAGE_INTERNAL, 0};
+ iree_vm_stack_frame_t* frame_a = nullptr;
+ IREE_EXPECT_OK(iree_vm_stack_function_enter(
+ stack, &function_a, IREE_VM_STACK_FRAME_NATIVE, 0, NULL, &frame_a));
+ EXPECT_EQ(MODULE_A_STATE_SENTINEL, frame_a->module_state);
+ EXPECT_EQ(1, module_a_state_resolve_count);
+
+ // [A, B (queried)]
+ iree_vm_function_t function_b = {MODULE_B_SENTINEL,
+ IREE_VM_FUNCTION_LINKAGE_INTERNAL, 1};
+ iree_vm_stack_frame_t* frame_b = nullptr;
+ IREE_EXPECT_OK(iree_vm_stack_function_enter(
+ stack, &function_b, IREE_VM_STACK_FRAME_NATIVE, 0, NULL, &frame_b));
+ EXPECT_EQ(MODULE_B_STATE_SENTINEL, frame_b->module_state);
+ EXPECT_EQ(1, module_b_state_resolve_count);
+
+ // [A, B, B (reuse)]
+ IREE_EXPECT_OK(iree_vm_stack_function_enter(
+ stack, &function_b, IREE_VM_STACK_FRAME_NATIVE, 0, NULL, &frame_b));
+ EXPECT_EQ(MODULE_B_STATE_SENTINEL, frame_b->module_state);
+ EXPECT_EQ(1, module_b_state_resolve_count);
+
+ IREE_EXPECT_OK(iree_vm_stack_function_leave(stack));
+ IREE_EXPECT_OK(iree_vm_stack_function_leave(stack));
+ IREE_EXPECT_OK(iree_vm_stack_function_leave(stack));
+
+ iree_vm_stack_deinitialize(stack);
+}
+
+// Tests that module state query failures propagate to callers correctly.
+TEST(VMStackTest, ModuleStateQueryFailure) {
+ iree_vm_state_resolver_t state_resolver = {
+ nullptr,
+ +[](void* state_resolver, iree_vm_module_t* module,
+ iree_vm_module_state_t** out_module_state) -> iree_status_t {
+ // NOTE: always failing.
+ return iree_make_status(IREE_STATUS_INTERNAL);
+ }};
+ IREE_VM_INLINE_STACK_INITIALIZE(stack, IREE_VM_INVOCATION_FLAG_NONE,
+ state_resolver, iree_allocator_system());
+
+ // Push should fail if we can't query state, status should propagate.
+ iree_vm_function_t function_a = {MODULE_A_SENTINEL,
+ IREE_VM_FUNCTION_LINKAGE_INTERNAL, 0};
+ iree_vm_stack_frame_t* frame_a = nullptr;
+ iree_status_t status = iree_vm_stack_function_enter(
+ stack, &function_a, IREE_VM_STACK_FRAME_NATIVE, 0, NULL, &frame_a);
+ IREE_EXPECT_STATUS_IS(IREE_STATUS_INTERNAL, status);
+ iree_status_free(status);
+ iree_vm_stack_deinitialize(stack);
+}
+
+} // namespace
diff --git a/runtime/src/iree/vm/test/BUILD b/runtime/src/iree/vm/test/BUILD
new file mode 100644
index 0000000..c0f5ba5
--- /dev/null
+++ b/runtime/src/iree/vm/test/BUILD
@@ -0,0 +1,225 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//iree:build_defs.oss.bzl", "iree_cmake_extra_content")
+load("//build_tools/bazel:iree_bytecode_module.bzl", "iree_bytecode_module")
+load("//build_tools/embed_data:build_defs.bzl", "c_embed_data")
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = ["layering_check"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+iree_cmake_extra_content(
+ content = """
+if (NOT ${IREE_BUILD_COMPILER} OR NOT ${IREE_BUILD_TESTS})
+ return()
+endif()
+""",
+ inline = True,
+)
+
+c_embed_data(
+ name = "all_bytecode_modules_c",
+ srcs = [
+ ":arithmetic_ops.vmfb",
+ ":arithmetic_ops_f32.vmfb",
+ ":arithmetic_ops_i64.vmfb",
+ ":assignment_ops.vmfb",
+ ":assignment_ops_f32.vmfb",
+ ":assignment_ops_i64.vmfb",
+ ":buffer_ops.vmfb",
+ ":call_ops.vmfb",
+ ":comparison_ops.vmfb",
+ ":comparison_ops_f32.vmfb",
+ ":comparison_ops_i64.vmfb",
+ ":control_flow_ops.vmfb",
+ ":conversion_ops.vmfb",
+ ":conversion_ops_f32.vmfb",
+ ":conversion_ops_i64.vmfb",
+ ":global_ops.vmfb",
+ ":global_ops_f32.vmfb",
+ ":global_ops_i64.vmfb",
+ ":list_ops.vmfb",
+ ":list_ops_i64.vmfb",
+ ":list_variant_ops.vmfb",
+ ":ref_ops.vmfb",
+ ":shift_ops.vmfb",
+ ":shift_ops_i64.vmfb",
+ ],
+ c_file_output = "all_bytecode_modules.c",
+ flatten = True,
+ h_file_output = "all_bytecode_modules.h",
+)
+
+iree_bytecode_module(
+ name = "arithmetic_ops",
+ src = "arithmetic_ops.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "arithmetic_ops_f32",
+ src = "arithmetic_ops_f32.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "arithmetic_ops_i64",
+ src = "arithmetic_ops_i64.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "assignment_ops",
+ src = "assignment_ops.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "assignment_ops_f32",
+ src = "assignment_ops_f32.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "assignment_ops_i64",
+ src = "assignment_ops_i64.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "buffer_ops",
+ src = "buffer_ops.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "call_ops",
+ src = "call_ops.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "comparison_ops",
+ src = "comparison_ops.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "comparison_ops_f32",
+ src = "comparison_ops_f32.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "comparison_ops_i64",
+ src = "comparison_ops_i64.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "control_flow_ops",
+ src = "control_flow_ops.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "conversion_ops",
+ src = "conversion_ops.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "conversion_ops_f32",
+ src = "conversion_ops_f32.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "conversion_ops_i64",
+ src = "conversion_ops_i64.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "global_ops",
+ src = "global_ops.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "global_ops_f32",
+ src = "global_ops_f32.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "global_ops_i64",
+ src = "global_ops_i64.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "list_ops",
+ src = "list_ops.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "list_ops_i64",
+ src = "list_ops_i64.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "list_variant_ops",
+ src = "list_variant_ops.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "ref_ops",
+ src = "ref_ops.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "shift_ops",
+ src = "shift_ops.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
+
+iree_bytecode_module(
+ name = "shift_ops_i64",
+ src = "shift_ops_i64.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+ translate_tool = "//iree/tools:iree-translate",
+)
diff --git a/runtime/src/iree/vm/test/CMakeLists.txt b/runtime/src/iree/vm/test/CMakeLists.txt
new file mode 100644
index 0000000..80e96c0
--- /dev/null
+++ b/runtime/src/iree/vm/test/CMakeLists.txt
@@ -0,0 +1,341 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/vm/test/BUILD #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+if (NOT ${IREE_BUILD_COMPILER} OR NOT ${IREE_BUILD_TESTS})
+ return()
+endif()
+
+iree_c_embed_data(
+ NAME
+ all_bytecode_modules_c
+ GENERATED_SRCS
+ "arithmetic_ops.vmfb"
+ "arithmetic_ops_f32.vmfb"
+ "arithmetic_ops_i64.vmfb"
+ "assignment_ops.vmfb"
+ "assignment_ops_f32.vmfb"
+ "assignment_ops_i64.vmfb"
+ "buffer_ops.vmfb"
+ "call_ops.vmfb"
+ "comparison_ops.vmfb"
+ "comparison_ops_f32.vmfb"
+ "comparison_ops_i64.vmfb"
+ "control_flow_ops.vmfb"
+ "conversion_ops.vmfb"
+ "conversion_ops_f32.vmfb"
+ "conversion_ops_i64.vmfb"
+ "global_ops.vmfb"
+ "global_ops_f32.vmfb"
+ "global_ops_i64.vmfb"
+ "list_ops.vmfb"
+ "list_ops_i64.vmfb"
+ "list_variant_ops.vmfb"
+ "ref_ops.vmfb"
+ "shift_ops.vmfb"
+ "shift_ops_i64.vmfb"
+ C_FILE_OUTPUT
+ "all_bytecode_modules.c"
+ H_FILE_OUTPUT
+ "all_bytecode_modules.h"
+ FLATTEN
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ arithmetic_ops
+ SRC
+ "arithmetic_ops.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ arithmetic_ops_f32
+ SRC
+ "arithmetic_ops_f32.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ arithmetic_ops_i64
+ SRC
+ "arithmetic_ops_i64.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ assignment_ops
+ SRC
+ "assignment_ops.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ assignment_ops_f32
+ SRC
+ "assignment_ops_f32.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ assignment_ops_i64
+ SRC
+ "assignment_ops_i64.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ buffer_ops
+ SRC
+ "buffer_ops.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ call_ops
+ SRC
+ "call_ops.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ comparison_ops
+ SRC
+ "comparison_ops.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ comparison_ops_f32
+ SRC
+ "comparison_ops_f32.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ comparison_ops_i64
+ SRC
+ "comparison_ops_i64.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ control_flow_ops
+ SRC
+ "control_flow_ops.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ conversion_ops
+ SRC
+ "conversion_ops.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ conversion_ops_f32
+ SRC
+ "conversion_ops_f32.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ conversion_ops_i64
+ SRC
+ "conversion_ops_i64.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ global_ops
+ SRC
+ "global_ops.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ global_ops_f32
+ SRC
+ "global_ops_f32.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ global_ops_i64
+ SRC
+ "global_ops_i64.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ list_ops
+ SRC
+ "list_ops.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ list_ops_i64
+ SRC
+ "list_ops_i64.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ list_variant_ops
+ SRC
+ "list_variant_ops.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ ref_ops
+ SRC
+ "ref_ops.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ shift_ops
+ SRC
+ "shift_ops.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
+ shift_ops_i64
+ SRC
+ "shift_ops_i64.mlir"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/vm/test/arithmetic_ops.mlir b/runtime/src/iree/vm/test/arithmetic_ops.mlir
new file mode 100644
index 0000000..65b82dc
--- /dev/null
+++ b/runtime/src/iree/vm/test/arithmetic_ops.mlir
@@ -0,0 +1,146 @@
+vm.module @arithmetic_ops {
+
+ //===--------------------------------------------------------------------===//
+ // Native integer arithmetic
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_add_i32
+ vm.func @test_add_i32() {
+ %c1 = vm.const.i32 1
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %v = vm.add.i32 %c1dno, %c1dno : i32
+ %c2 = vm.const.i32 2
+ vm.check.eq %v, %c2, "1+1=2" : i32
+ vm.return
+ }
+
+ vm.export @test_sub_i32
+ vm.func @test_sub_i32() {
+ %c1 = vm.const.i32 3
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %c2 = vm.const.i32 2
+ %c2dno = util.do_not_optimize(%c2) : i32
+ %v = vm.sub.i32 %c1dno, %c2dno : i32
+ %c3 = vm.const.i32 1
+ vm.check.eq %v, %c3, "3-2=1" : i32
+ vm.return
+ }
+
+ vm.export @test_mul_i32
+ vm.func @test_mul_i32() {
+ %c1 = vm.const.i32 2
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %v = vm.mul.i32 %c1dno, %c1dno : i32
+ %c2 = vm.const.i32 4
+ vm.check.eq %v, %c2, "2*2=4" : i32
+ vm.return
+ }
+
+ vm.export @test_div_i32s
+ vm.func @test_div_i32s() {
+ %c1 = vm.const.i32 4
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %c2 = vm.const.i32 -2
+ %c2dno = util.do_not_optimize(%c2) : i32
+ %v = vm.div.i32.s %c1dno, %c2dno : i32
+ %c3 = vm.const.i32 -2
+ vm.check.eq %v, %c3, "4/-2=-2" : i32
+ vm.return
+ }
+
+ vm.export @test_div_i32u
+ vm.func @test_div_i32u() {
+ %c1 = vm.const.i32 4
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %c2 = vm.const.i32 2
+ %c2dno = util.do_not_optimize(%c2) : i32
+ %v = vm.div.i32.u %c1dno, %c2dno : i32
+ %c3 = vm.const.i32 2
+ vm.check.eq %v, %c3, "4/2=2" : i32
+ vm.return
+ }
+
+ vm.export @test_rem_i32s
+ vm.func @test_rem_i32s() {
+ %c1 = vm.const.i32 -3
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %c2 = vm.const.i32 -2
+ %c2dno = util.do_not_optimize(%c2) : i32
+ %v = vm.rem.i32.s %c1dno, %c2dno : i32
+ %c3 = vm.const.i32 -1
+ vm.check.eq %v, %c3, "-3%-2=-1" : i32
+ vm.return
+ }
+
+ vm.export @test_rem_i32u
+ vm.func @test_rem_i32u() {
+ %c1 = vm.const.i32 3
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %c2 = vm.const.i32 2
+ %c2dno = util.do_not_optimize(%c2) : i32
+ %v = vm.rem.i32.u %c1dno, %c2dno : i32
+ %c3 = vm.const.i32 1
+ vm.check.eq %v, %c3, "3%2=1" : i32
+ vm.return
+ }
+
+ vm.export @test_fma_i32
+ vm.func @test_fma_i32() {
+ %c2 = vm.const.i32 2
+ %c2dno = util.do_not_optimize(%c2) : i32
+ %c3 = vm.const.i32 3
+ %c3dno = util.do_not_optimize(%c3) : i32
+ %c5 = vm.const.i32 5
+ %c5dno = util.do_not_optimize(%c5) : i32
+ %v = vm.fma.i32 %c2dno, %c3dno, %c5dno : i32
+ %c11 = vm.const.i32 11
+ vm.check.eq %v, %c11, "2*3+5=11" : i32
+ vm.return
+ }
+
+ vm.export @test_not_i32
+ vm.func @test_not_i32() {
+ %c1 = vm.const.i32 0
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %v = vm.not.i32 %c1dno : i32
+ %c2 = vm.const.i32 -1
+ vm.check.eq %v, %c2, "~0=-1" : i32
+ vm.return
+ }
+
+ vm.export @test_and_i32
+ vm.func @test_and_i32() {
+ %c1 = vm.const.i32 5
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %c2 = vm.const.i32 3
+ %c2dno = util.do_not_optimize(%c2) : i32
+ %v = vm.and.i32 %c1dno, %c2dno : i32
+ %c3 = vm.const.i32 1
+ vm.check.eq %v, %c3, "5&3=1" : i32
+ vm.return
+ }
+
+ vm.export @test_or_i32
+ vm.func @test_or_i32() {
+ %c1 = vm.const.i32 5
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %c2 = vm.const.i32 3
+ %c2dno = util.do_not_optimize(%c2) : i32
+ %v = vm.or.i32 %c1dno, %c2dno : i32
+ %c3 = vm.const.i32 7
+ vm.check.eq %v, %c3, "5|3=7" : i32
+ vm.return
+ }
+
+ vm.export @test_xor_i32
+ vm.func @test_xor_i32() {
+ %c1 = vm.const.i32 5
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %c2 = vm.const.i32 3
+ %c2dno = util.do_not_optimize(%c2) : i32
+ %v = vm.xor.i32 %c1dno, %c2dno : i32
+ %c3 = vm.const.i32 6
+ vm.check.eq %v, %c3, "5^3=6" : i32
+ vm.return
+ }
+}
diff --git a/runtime/src/iree/vm/test/arithmetic_ops_f32.mlir b/runtime/src/iree/vm/test/arithmetic_ops_f32.mlir
new file mode 100644
index 0000000..f23cf94
--- /dev/null
+++ b/runtime/src/iree/vm/test/arithmetic_ops_f32.mlir
@@ -0,0 +1,281 @@
+vm.module @arithmetic_ops_f32 {
+
+ //===--------------------------------------------------------------------===//
+ // ExtF32: Native floating-point arithmetic
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_add_f32
+ vm.func @test_add_f32() {
+ %c1 = vm.const.f32 1.5
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.add.f32 %c1dno, %c1dno : f32
+ %c2 = vm.const.f32 3.0
+ vm.check.eq %v, %c2, "1.5+1.5=3" : f32
+ vm.return
+ }
+
+ vm.export @test_sub_f32
+ vm.func @test_sub_f32() {
+ %c1 = vm.const.f32 3.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %c2 = vm.const.f32 2.5
+ %c2dno = util.do_not_optimize(%c2) : f32
+ %v = vm.sub.f32 %c1dno, %c2dno : f32
+ %c3 = vm.const.f32 0.5
+ vm.check.eq %v, %c3, "3.0-2.5=0.5" : f32
+ vm.return
+ }
+
+ vm.export @test_mul_f32
+ vm.func @test_mul_f32() {
+ %c1 = vm.const.f32 2.5
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.mul.f32 %c1dno, %c1dno : f32
+ %c2 = vm.const.f32 6.25
+ vm.check.eq %v, %c2, "2.5*2.5=6.25" : f32
+ vm.return
+ }
+
+ vm.export @test_div_f32
+ vm.func @test_div_f32() {
+ %c1 = vm.const.f32 4.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %c2 = vm.const.f32 -2.0
+ %c2dno = util.do_not_optimize(%c2) : f32
+ %v = vm.div.f32 %c1dno, %c2dno : f32
+ %c3 = vm.const.f32 -2.0
+ vm.check.eq %v, %c3, "4.0/-2.0=-2.0" : f32
+ vm.return
+ }
+
+ vm.export @test_rem_f32
+ vm.func @test_rem_f32() {
+ %c1 = vm.const.f32 -3.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %c2 = vm.const.f32 -2.0
+ %c2dno = util.do_not_optimize(%c2) : f32
+ %v = vm.rem.f32 %c1dno, %c2dno : f32
+ %c3 = vm.const.f32 1.0
+ vm.check.eq %v, %c3, "-3.0%-2.0=1.0" : f32
+ vm.return
+ }
+
+ vm.export @test_fma_f32
+ vm.func @test_fma_f32() {
+ %c2 = vm.const.f32 2.0
+ %c2dno = util.do_not_optimize(%c2) : f32
+ %c3 = vm.const.f32 3.0
+ %c3dno = util.do_not_optimize(%c3) : f32
+ %c5 = vm.const.f32 5.0
+ %c5dno = util.do_not_optimize(%c5) : f32
+ %v = vm.fma.f32 %c2dno, %c3dno, %c5dno : f32
+ %c11 = vm.const.f32 11.0
+ vm.check.eq %v, %c11, "2.0*3.0+5.0=11.0" : f32
+ vm.return
+ }
+
+ vm.export @test_abs_f32
+ vm.func @test_abs_f32() {
+ %c1 = vm.const.f32 -1.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.abs.f32 %c1dno : f32
+ %c2 = vm.const.f32 1.0
+ vm.check.eq %v, %c2, "abs(-1.0)=1.0" : f32
+ vm.return
+ }
+
+ vm.export @test_neg_f32
+ vm.func @test_neg_f32() {
+ %c1 = vm.const.f32 -1.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.neg.f32 %c1dno : f32
+ %c2 = vm.const.f32 1.0
+ vm.check.eq %v, %c2, "neg(-1.0)=1.0" : f32
+ vm.return
+ }
+
+ vm.export @test_ceil_f32
+ vm.func @test_ceil_f32() {
+ %c1 = vm.const.f32 1.5
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.ceil.f32 %c1dno : f32
+ %c2 = vm.const.f32 2.0
+ vm.check.eq %v, %c2, "ceil(1.5)=2.0" : f32
+ vm.return
+ }
+
+ vm.export @test_floor_f32
+ vm.func @test_floor_f32() {
+ %c1 = vm.const.f32 1.5
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.floor.f32 %c1dno : f32
+ %c2 = vm.const.f32 1.0
+ vm.check.eq %v, %c2, "floor(1.5)=1.0" : f32
+ vm.return
+ }
+
+ vm.export @test_atan_f32
+ vm.func @test_atan_f32() {
+ %c1 = vm.const.f32 1.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.atan.f32 %c1dno : f32
+ %c2 = vm.const.f32 0.7853981633974483
+ vm.check.eq %v, %c2, "atan(1.0)=0.7853981633974483" : f32
+ vm.return
+ }
+
+ vm.export @test_atan2_f32
+ vm.func @test_atan2_f32() {
+ %c1 = vm.const.f32 1.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %c2 = vm.const.f32 0.0
+ %c2dno = util.do_not_optimize(%c2) : f32
+ %v = vm.atan2.f32 %c1dno, %c2dno : f32
+ %c3 = vm.const.f32 1.5707963267948966
+ vm.check.eq %v, %c3, "atan2(1.0,0.0)=1.5707963267948966" : f32
+ vm.return
+ }
+
+ vm.export @test_cos_f32
+ vm.func @test_cos_f32() {
+ %c1 = vm.const.f32 0.5
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.cos.f32 %c1dno : f32
+ %c2 = vm.const.f32 0.8775825618903728
+ vm.check.eq %v, %c2, "cos(0.5)=0.8775825618903728" : f32
+ vm.return
+ }
+
+ vm.export @test_sin_f32
+ vm.func @test_sin_f32() {
+ %c1 = vm.const.f32 0.5
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.sin.f32 %c1dno : f32
+ %c2 = vm.const.f32 0.479425538604203
+ vm.check.eq %v, %c2, "sin(0.5)=0.479425538604203" : f32
+ vm.return
+ }
+
+ vm.export @test_exp_f32
+ vm.func @test_exp_f32() {
+ %c1 = vm.const.f32 1.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.exp.f32 %c1dno : f32
+ %c2 = vm.const.f32 2.718281828459045
+ vm.check.eq %v, %c2, "exp(1.0)=2.718281828459045" : f32
+ vm.return
+ }
+
+ vm.export @test_exp2_f32
+ vm.func @test_exp2_f32() {
+ %c1 = vm.const.f32 2.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.exp2.f32 %c1dno : f32
+ %c2 = vm.const.f32 4.0
+ vm.check.eq %v, %c2, "exp(2.0)=4.0" : f32
+ vm.return
+ }
+
+ vm.export @test_expm1_f32
+ vm.func @test_expm1_f32() {
+ %c1 = vm.const.f32 2.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.expm1.f32 %c1dno : f32
+ %c2 = vm.const.f32 6.38905609893065
+ vm.check.eq %v, %c2, "expm1(2.0)=6.38905609893065" : f32
+ vm.return
+ }
+
+ vm.export @test_log_f32
+ vm.func @test_log_f32() {
+ %c1 = vm.const.f32 10.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.log.f32 %c1dno : f32
+ %c2 = vm.const.f32 2.302585092994046
+ vm.check.eq %v, %c2, "log(10.0)=2.302585092994046" : f32
+ vm.return
+ }
+
+ vm.export @test_log10_f32
+ vm.func @test_log10_f32() {
+ %c1 = vm.const.f32 10.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.log10.f32 %c1dno : f32
+ %c2 = vm.const.f32 1.0
+ vm.check.eq %v, %c2, "log10(10.0)=1.0" : f32
+ vm.return
+ }
+
+ vm.export @test_log1p_f32
+ vm.func @test_log1p_f32() {
+ %c1 = vm.const.f32 10.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.log1p.f32 %c1dno : f32
+ %c2 = vm.const.f32 2.3978952727983707
+ vm.check.eq %v, %c2, "log1p(10.0)=2.3978952727983707" : f32
+ vm.return
+ }
+
+ vm.export @test_log2_f32
+ vm.func @test_log2_f32() {
+ %c1 = vm.const.f32 10.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.log2.f32 %c1dno : f32
+ %c2 = vm.const.f32 3.321928094887362
+ vm.check.eq %v, %c2, "log2(10.0)=3.321928094887362" : f32
+ vm.return
+ }
+
+ vm.export @test_pow_f32
+ vm.func @test_pow_f32() {
+ %c1 = vm.const.f32 3.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %c2 = vm.const.f32 2.0
+ %c2dno = util.do_not_optimize(%c2) : f32
+ %v = vm.pow.f32 %c1dno, %c2dno : f32
+ %c3 = vm.const.f32 9.0
+ vm.check.eq %v, %c3, "pow(3.0,2.0)=9.0" : f32
+ vm.return
+ }
+
+ vm.export @test_rsqrt_f32
+ vm.func @test_rsqrt_f32() {
+ %c1 = vm.const.f32 4.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.rsqrt.f32 %c1dno : f32
+ %c2 = vm.const.f32 0.5
+ vm.check.eq %v, %c2, "rsqrt(4.0)=0.5" : f32
+ vm.return
+ }
+
+ vm.export @test_sqrt_f32
+ vm.func @test_sqrt_f32() {
+ %c1 = vm.const.f32 4.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.sqrt.f32 %c1dno : f32
+ %c2 = vm.const.f32 2.0
+ vm.check.eq %v, %c2, "sqrt(4.0)=2.0" : f32
+ vm.return
+ }
+
+ vm.export @test_tanh_f32
+ vm.func @test_tanh_f32() {
+ %c1 = vm.const.f32 0.5
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.tanh.f32 %c1dno : f32
+ %c2 = vm.const.f32 0.46211715726000974
+ vm.check.eq %v, %c2, "tanh(0.5)=0.46211715726000974" : f32
+ vm.return
+ }
+
+ // TODO(#5854): vm.check.nearly_eq; this can differ across libm impls.
+ // vm.export @test_erf_f32
+ // vm.func @test_erf_f32() {
+ // %c1 = vm.const.f32 0.5
+ // %c1dno = util.do_not_optimize(%c1) : f32
+ // %v = vm.erf.f32 %c1dno : f32
+ // %c2 = vm.const.f32 0.520499945
+ // vm.check.eq %v, %c2, "erf(0.5)=0.520499945" : f32
+ // vm.return
+ // }
+}
diff --git a/runtime/src/iree/vm/test/arithmetic_ops_i64.mlir b/runtime/src/iree/vm/test/arithmetic_ops_i64.mlir
new file mode 100644
index 0000000..65f2c7d
--- /dev/null
+++ b/runtime/src/iree/vm/test/arithmetic_ops_i64.mlir
@@ -0,0 +1,146 @@
+vm.module @arithmetic_ops_i64 {
+
+ //===--------------------------------------------------------------------===//
+ // ExtI64: Native integer arithmetic
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_add_i64
+ vm.func @test_add_i64() {
+ %c1 = vm.const.i64 1
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %v = vm.add.i64 %c1dno, %c1dno : i64
+ %c2 = vm.const.i64 2
+ vm.check.eq %v, %c2, "1+1=2" : i64
+ vm.return
+ }
+
+ vm.export @test_sub_i64
+ vm.func @test_sub_i64() {
+ %c1 = vm.const.i64 3
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %c2 = vm.const.i64 2
+ %c2dno = util.do_not_optimize(%c2) : i64
+ %v = vm.sub.i64 %c1dno, %c2dno : i64
+ %c3 = vm.const.i64 1
+ vm.check.eq %v, %c3, "3-2=1" : i64
+ vm.return
+ }
+
+ vm.export @test_mul_i64
+ vm.func @test_mul_i64() {
+ %c1 = vm.const.i64 2
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %v = vm.mul.i64 %c1dno, %c1dno : i64
+ %c2 = vm.const.i64 4
+ vm.check.eq %v, %c2, "2*2=4" : i64
+ vm.return
+ }
+
+ vm.export @test_div_i64s
+ vm.func @test_div_i64s() {
+ %c1 = vm.const.i64 4
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %c2 = vm.const.i64 -2
+ %c2dno = util.do_not_optimize(%c2) : i64
+ %v = vm.div.i64.s %c1dno, %c2dno : i64
+ %c3 = vm.const.i64 -2
+ vm.check.eq %v, %c3, "4/-2=-2" : i64
+ vm.return
+ }
+
+ vm.export @test_div_i64u
+ vm.func @test_div_i64u() {
+ %c1 = vm.const.i64 4
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %c2 = vm.const.i64 2
+ %c2dno = util.do_not_optimize(%c2) : i64
+ %v = vm.div.i64.u %c1dno, %c2dno : i64
+ %c3 = vm.const.i64 2
+ vm.check.eq %v, %c3, "4/2=2" : i64
+ vm.return
+ }
+
+ vm.export @test_rem_i64s
+ vm.func @test_rem_i64s() {
+ %c1 = vm.const.i64 -3
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %c2 = vm.const.i64 -2
+ %c2dno = util.do_not_optimize(%c2) : i64
+ %v = vm.rem.i64.s %c1dno, %c2dno : i64
+ %c3 = vm.const.i64 -1
+ vm.check.eq %v, %c3, "-3%-2=-1" : i64
+ vm.return
+ }
+
+ vm.export @test_rem_i64u
+ vm.func @test_rem_i64u() {
+ %c1 = vm.const.i64 3
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %c2 = vm.const.i64 2
+ %c2dno = util.do_not_optimize(%c2) : i64
+ %v = vm.rem.i64.u %c1dno, %c2dno : i64
+ %c3 = vm.const.i64 1
+ vm.check.eq %v, %c3, "3%2=1" : i64
+ vm.return
+ }
+
+ vm.export @test_fma_i64
+ vm.func @test_fma_i64() {
+ %c2 = vm.const.i64 2
+ %c2dno = util.do_not_optimize(%c2) : i64
+ %c3 = vm.const.i64 3
+ %c3dno = util.do_not_optimize(%c3) : i64
+ %c5 = vm.const.i64 5
+ %c5dno = util.do_not_optimize(%c5) : i64
+ %v = vm.fma.i64 %c2dno, %c3dno, %c5dno : i64
+ %c11 = vm.const.i64 11
+ vm.check.eq %v, %c11, "2*3+5=11" : i64
+ vm.return
+ }
+
+ vm.export @test_not_i64
+ vm.func @test_not_i64() {
+ %c1 = vm.const.i64 0
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %v = vm.not.i64 %c1dno : i64
+ %c2 = vm.const.i64 -1
+ vm.check.eq %v, %c2, "~0=-1" : i64
+ vm.return
+ }
+
+ vm.export @test_and_i64
+ vm.func @test_and_i64() {
+ %c1 = vm.const.i64 5
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %c2 = vm.const.i64 3
+ %c2dno = util.do_not_optimize(%c2) : i64
+ %v = vm.and.i64 %c1dno, %c2dno : i64
+ %c3 = vm.const.i64 1
+ vm.check.eq %v, %c3, "5&3=1" : i64
+ vm.return
+ }
+
+ vm.export @test_or_i64
+ vm.func @test_or_i64() {
+ %c1 = vm.const.i64 5
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %c2 = vm.const.i64 3
+ %c2dno = util.do_not_optimize(%c2) : i64
+ %v = vm.or.i64 %c1dno, %c2dno : i64
+ %c3 = vm.const.i64 7
+ vm.check.eq %v, %c3, "5|3=7" : i64
+ vm.return
+ }
+
+ vm.export @test_xor_i64
+ vm.func @test_xor_i64() {
+ %c1 = vm.const.i64 5
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %c2 = vm.const.i64 3
+ %c2dno = util.do_not_optimize(%c2) : i64
+ %v = vm.xor.i64 %c1dno, %c2dno : i64
+ %c3 = vm.const.i64 6
+ vm.check.eq %v, %c3, "5^3=6" : i64
+ vm.return
+ }
+}
diff --git a/runtime/src/iree/vm/test/assignment_ops.mlir b/runtime/src/iree/vm/test/assignment_ops.mlir
new file mode 100644
index 0000000..a5b77c7
--- /dev/null
+++ b/runtime/src/iree/vm/test/assignment_ops.mlir
@@ -0,0 +1,32 @@
+vm.module @assignment_ops {
+
+ //===--------------------------------------------------------------------===//
+ // Conditional assignment
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_select_i32
+ vm.func @test_select_i32() {
+ %c0 = vm.const.i32 0
+ %c0dno = util.do_not_optimize(%c0) : i32
+ %c1 = vm.const.i32 1
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %v1 = vm.select.i32 %c0dno, %c0dno, %c1dno : i32
+ vm.check.eq %v1, %c1, "0 ? 0 : 1 = 1" : i32
+ %v2 = vm.select.i32 %c1dno, %c0dno, %c1dno : i32
+ vm.check.eq %v2, %c0, "1 ? 0 : 1 = 0" : i32
+ vm.return
+ }
+
+ vm.export @test_select_ref attributes {emitc.exclude}
+ vm.func private @test_select_ref() {
+ %c0 = vm.const.i32 0
+ %list0 = vm.list.alloc %c0 : (i32) -> !vm.list<i8>
+ %c1 = vm.const.i32 1
+ %list1 = vm.list.alloc %c1 : (i32) -> !vm.list<i8>
+ %cond = vm.const.i32 0
+ %cond_dno = util.do_not_optimize(%cond) : i32
+ %list = vm.select.ref %cond_dno, %list0, %list1 : !vm.list<i8>
+ vm.check.eq %list, %list1, "0 ? list0 : list1 = list1" : !vm.list<i8>
+ vm.return
+ }
+}
diff --git a/runtime/src/iree/vm/test/assignment_ops_f32.mlir b/runtime/src/iree/vm/test/assignment_ops_f32.mlir
new file mode 100644
index 0000000..1a88bd0
--- /dev/null
+++ b/runtime/src/iree/vm/test/assignment_ops_f32.mlir
@@ -0,0 +1,21 @@
+vm.module @assignment_ops_f32 {
+
+ //===--------------------------------------------------------------------===//
+ // ExtF32: Conditional assignment
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_select_f32
+ vm.func @test_select_f32() {
+ %c0 = vm.const.i32 0
+ %c0dno = util.do_not_optimize(%c0) : i32
+ %c1 = vm.const.i32 1
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %c2 = vm.const.f32 0.0
+ %c3 = vm.const.f32 1.0
+ %v1 = vm.select.f32 %c0dno, %c2, %c3 : f32
+ vm.check.eq %v1, %c3, "0 ? 0.0 : 1.0 = 1.0" : f32
+ %v2 = vm.select.f32 %c1dno, %c2, %c3 : f32
+ vm.check.eq %v2, %c2, "1 ? 0.0 : 1.0 = 0.0" : f32
+ vm.return
+ }
+}
diff --git a/runtime/src/iree/vm/test/assignment_ops_i64.mlir b/runtime/src/iree/vm/test/assignment_ops_i64.mlir
new file mode 100644
index 0000000..72429f3
--- /dev/null
+++ b/runtime/src/iree/vm/test/assignment_ops_i64.mlir
@@ -0,0 +1,21 @@
+vm.module @assignment_ops_i64 {
+
+ //===--------------------------------------------------------------------===//
+ // ExtI64: Conditional assignment
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_select_i64
+ vm.func @test_select_i64() {
+ %c0 = vm.const.i32 0
+ %c0dno = util.do_not_optimize(%c0) : i32
+ %c1 = vm.const.i32 1
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %c2 = vm.const.i64 0
+ %c3 = vm.const.i64 1
+ %v1 = vm.select.i64 %c0dno, %c2, %c3 : i64
+ vm.check.eq %v1, %c3, "0 ? 0 : 1 = 1" : i64
+ %v2 = vm.select.i64 %c1dno, %c2, %c3 : i64
+ vm.check.eq %v2, %c2, "1 ? 0 : 1 = 0" : i64
+ vm.return
+ }
+}
diff --git a/runtime/src/iree/vm/test/buffer_ops.mlir b/runtime/src/iree/vm/test/buffer_ops.mlir
new file mode 100644
index 0000000..754b97b
--- /dev/null
+++ b/runtime/src/iree/vm/test/buffer_ops.mlir
@@ -0,0 +1,635 @@
+vm.module @buffer_ops {
+
+ vm.rodata private @rodata_3xi32 dense<[1, 2, 3]> : tensor<3xi32>
+
+ //===--------------------------------------------------------------------===//
+ // Compare
+ //===--------------------------------------------------------------------===//
+ // NOTE: we test this first because all of the other tests rely on it and we
+ // can do it with rodata.
+
+ vm.rodata private @rodata_cmp_3xi32_a dense<[100, 200, 300]> : tensor<3xi32>
+ vm.rodata private @rodata_cmp_3xi32_b dense<[100, 201, 300]> : tensor<3xi32>
+
+ // Compares some multi-element buffers. Note that comparisons are bytewise.
+ vm.export @test_compare attributes {emitc.exclude}
+ vm.func private @test_compare() {
+ %rodata_a = vm.const.ref.rodata @rodata_cmp_3xi32_a : !vm.buffer
+ %rodata_b = vm.const.ref.rodata @rodata_cmp_3xi32_b : !vm.buffer
+ %rodata_a_dno = util.do_not_optimize(%rodata_a) : !vm.buffer
+ %rodata_b_dno = util.do_not_optimize(%rodata_b) : !vm.buffer
+
+ %c0 = vm.const.i32 0
+ %length = vm.buffer.length %rodata_a_dno : !vm.buffer -> i32
+
+ %cmp0 = vm.buffer.compare %rodata_a_dno, %c0, %rodata_a_dno, %c0, %length : !vm.buffer, !vm.buffer
+ vm.check.nz %cmp0, "buffer a == a" : i32
+
+ %cmp1 = vm.buffer.compare %rodata_a_dno, %c0, %rodata_b_dno, %c0, %length : !vm.buffer, !vm.buffer
+ vm.check.eq %cmp1, %c0, "buffer a != b" : i32
+
+ vm.return
+ }
+
+ // Tests comparing an empty range, which should always be equal.
+ vm.export @test_compare_empty attributes {emitc.exclude}
+ vm.func private @test_compare_empty() {
+ %rodata_a = vm.const.ref.rodata @rodata_cmp_3xi32_a : !vm.buffer
+ %rodata_b = vm.const.ref.rodata @rodata_cmp_3xi32_b : !vm.buffer
+ %rodata_a_dno = util.do_not_optimize(%rodata_a) : !vm.buffer
+ %rodata_b_dno = util.do_not_optimize(%rodata_b) : !vm.buffer
+
+ %c0 = vm.const.i32 0
+ %c2 = vm.const.i32 2
+
+ %cmp = vm.buffer.compare %rodata_a_dno, %c2, %rodata_a_dno, %c2, %c0 : !vm.buffer, !vm.buffer
+ vm.check.nz %cmp, "empty buffer ranges are always equal" : i32
+
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Allocation
+ //===--------------------------------------------------------------------===//
+
+ // Tests allocating a buffer.
+ vm.export @test_alloc attributes {emitc.exclude}
+ vm.func private @test_alloc() {
+ %c128 = vm.const.i32 128
+ %buf = vm.buffer.alloc %c128 : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+ vm.check.nz %buf_dno, "!null" : !vm.buffer
+
+ %buf_length = vm.buffer.length %buf_dno : !vm.buffer -> i32
+ vm.check.eq %c128, %buf_length, "buffer length == 128" : i32
+
+ vm.return
+ }
+
+ // Tests that zero-length buffers can be allocated.
+ vm.export @test_alloc_empty attributes {emitc.exclude}
+ vm.func private @test_alloc_empty() {
+ %c0 = vm.const.i32 0
+ %buf = vm.buffer.alloc %c0 : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+ vm.check.nz %buf_dno, "!null" : !vm.buffer
+
+ %buf_length = vm.buffer.length %buf_dno : !vm.buffer -> i32
+ vm.check.eq %c0, %buf_length, "buffer length == 0" : i32
+
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Cloning
+ //===--------------------------------------------------------------------===//
+
+ // Tests cloning a subrange of a buffer.
+ vm.export @test_clone attributes {emitc.exclude}
+ vm.func private @test_clone() {
+ // Fetch source .rodata blob.
+ %rodata = vm.const.ref.rodata @rodata_3xi32 : !vm.buffer
+
+ // Clone the last two 32-bit elements.
+ %c4 = vm.const.i32 4
+ %c8 = vm.const.i32 8
+ %buf = vm.buffer.clone %rodata, %c4, %c8 : !vm.buffer -> !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+ vm.check.nz %buf_dno, "!null" : !vm.buffer
+
+ // Compare the cloned range to the original.
+ %c0 = vm.const.i32 0
+ %cmp = vm.buffer.compare %rodata, %c4, %buf_dno, %c0, %c8 : !vm.buffer, !vm.buffer
+ vm.check.nz %cmp, "buffer subspans are equal" : i32
+
+ vm.return
+ }
+
+ // Tests cloning a zero-length buffer.
+ vm.export @test_clone_empty attributes {emitc.exclude}
+ vm.func private @test_clone_empty() {
+ // Allocate source zero-length buffer.
+ %c0 = vm.const.i32 0
+ %buf0 = vm.buffer.alloc %c0 : !vm.buffer
+ %buf0_dno = util.do_not_optimize(%buf0) : !vm.buffer
+ vm.check.nz %buf0_dno, "!null" : !vm.buffer
+ %buf0_length = vm.buffer.length %buf0_dno : !vm.buffer -> i32
+ vm.check.eq %c0, %buf0_length, "buffer length == 0" : i32
+
+ // Clone it all (or, clone nothing?).
+ %buf1 = vm.buffer.clone %buf0_dno, %c0, %c0 : !vm.buffer -> !vm.buffer
+ %buf1_dno = util.do_not_optimize(%buf1) : !vm.buffer
+ vm.check.nz %buf1_dno, "!null" : !vm.buffer
+ %buf1_length = vm.buffer.length %buf1_dno : !vm.buffer -> i32
+ vm.check.eq %c0, %buf1_length, "buffer length == 0" : i32
+
+ vm.return
+ }
+
+ // Tests an out-of-bounds cloning subrange.
+ vm.export @fail_clone_out_of_range attributes {emitc.exclude}
+ vm.func private @fail_clone_out_of_range() {
+ // Fetch source .rodata blob.
+ %rodata = vm.const.ref.rodata @rodata_3xi32 : !vm.buffer
+ %rodata_dno = util.do_not_optimize(%rodata) : !vm.buffer
+ vm.check.nz %rodata_dno, "!null" : !vm.buffer
+
+ // Try to clone off the end of the buffer.
+ %c8 = vm.const.i32 8
+ %buf = vm.buffer.clone %rodata, %c8, %c8 : !vm.buffer -> !vm.buffer
+
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Copy
+ //===--------------------------------------------------------------------===//
+
+ // Tests copying an entire buffer from one buffer to another.
+ vm.export @test_copy_full attributes {emitc.exclude}
+ vm.func private @test_copy_full() {
+ // Fetch source .rodata blob.
+ %rodata = vm.const.ref.rodata @rodata_3xi32 : !vm.buffer
+ %rodata_length = vm.buffer.length %rodata : !vm.buffer -> i32
+ vm.check.nz %rodata, "!null" : !vm.buffer
+
+ // Allocate target buffer.
+ %buf = vm.buffer.alloc %rodata_length : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+ vm.check.nz %buf_dno, "!null" : !vm.buffer
+
+ // Copy the entire contents.
+ %c0 = vm.const.i32 0
+ vm.buffer.copy %rodata, %c0, %buf_dno, %c0, %rodata_length : !vm.buffer -> !vm.buffer
+
+ // Compare to source.
+ %cmp = vm.buffer.compare %rodata, %c0, %buf_dno, %c0, %rodata_length : !vm.buffer, !vm.buffer
+ vm.check.nz %cmp, "source and target match" : i32
+
+ vm.return
+ }
+
+ vm.rodata private @test_copy_partial_ref dense<[2]> : tensor<1xi32>
+
+ // Tests copying a range of bytes from one buffer to another.
+ vm.export @test_copy_partial attributes {emitc.exclude}
+ vm.func private @test_copy_partial() {
+ // Allocate target buffer.
+ %c4 = vm.const.i32 4
+ %buf = vm.buffer.alloc %c4 : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+ vm.check.nz %buf_dno, "!null" : !vm.buffer
+
+ // Copy the middle 4-byte element.
+ %rodata = vm.const.ref.rodata @rodata_3xi32 : !vm.buffer
+ %c0 = vm.const.i32 0
+ vm.buffer.copy %rodata, %c4, %buf, %c0, %c4 : !vm.buffer -> !vm.buffer
+
+ // Compare to reference.
+ %ref = vm.const.ref.rodata @test_copy_partial_ref : !vm.buffer
+ %cmp = vm.buffer.compare %ref, %c0, %buf, %c0, %c4 : !vm.buffer, !vm.buffer
+ vm.check.nz %cmp, "source and target match" : i32
+
+ vm.return
+ }
+
+ // Tests an out-of-bounds copy source.
+ vm.export @fail_copy_out_of_range_source_offset attributes {emitc.exclude}
+ vm.func private @fail_copy_out_of_range_source_offset() {
+ %rodata = vm.const.ref.rodata @rodata_3xi32 : !vm.buffer
+ %c128 = vm.const.i32 128
+ %buf = vm.buffer.alloc %c128 : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+ vm.check.nz %buf_dno, "!null" : !vm.buffer
+
+ // Try to clone off the end of the source buffer.
+ %c0 = vm.const.i32 0
+ vm.buffer.copy %rodata, %c0, %buf_dno, %c0, %c128 : !vm.buffer -> !vm.buffer
+
+ vm.return
+ }
+
+ // Tests an out-of-bounds copy source.
+ vm.export @fail_copy_out_of_range_source_length attributes {emitc.exclude}
+ vm.func private @fail_copy_out_of_range_source_length() {
+ %rodata = vm.const.ref.rodata @rodata_3xi32 : !vm.buffer
+ %c128 = vm.const.i32 128
+ %buf = vm.buffer.alloc %c128 : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+ vm.check.nz %buf_dno, "!null" : !vm.buffer
+
+ // Try to clone off the end of the source buffer.
+ %c0 = vm.const.i32 0
+ %c8 = vm.const.i32 8
+ vm.buffer.copy %rodata, %c8, %buf_dno, %c0, %c8 : !vm.buffer -> !vm.buffer
+
+ vm.return
+ }
+
+ // Tests an out-of-bounds copy target.
+ vm.export @fail_copy_out_of_range_target_offset attributes {emitc.exclude}
+ vm.func private @fail_copy_out_of_range_target_offset() {
+ %rodata = vm.const.ref.rodata @rodata_3xi32 : !vm.buffer
+ %rodata_length = vm.buffer.length %rodata : !vm.buffer -> i32
+ %c8 = vm.const.i32 8
+ %buf = vm.buffer.alloc %c8 : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+ vm.check.nz %buf_dno, "!null" : !vm.buffer
+
+ // Try to clone off the end of the target buffer.
+ %c0 = vm.const.i32 0
+ vm.buffer.copy %rodata, %c0, %buf_dno, %c0, %rodata_length : !vm.buffer -> !vm.buffer
+
+ vm.return
+ }
+
+ // Tests an out-of-bounds copy target.
+ vm.export @fail_copy_out_of_range_target_length attributes {emitc.exclude}
+ vm.func private @fail_copy_out_of_range_target_length() {
+ %rodata = vm.const.ref.rodata @rodata_3xi32 : !vm.buffer
+ %c8 = vm.const.i32 8
+ %buf = vm.buffer.alloc %c8 : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+ vm.check.nz %buf_dno, "!null" : !vm.buffer
+
+ // Try to clone off the end of the target buffer.
+ %c0 = vm.const.i32 0
+ vm.buffer.copy %rodata, %c0, %buf_dno, %c8, %c8 : !vm.buffer -> !vm.buffer
+
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Fill
+ //===--------------------------------------------------------------------===//
+
+ vm.rodata private @test_fill_i16_ref dense<[0, 51966, 51966, 0]> : tensor<4xi16>
+
+ // Tests filling a buffer with 16-bit values.
+ vm.export @test_fill_i16 attributes {emitc.exclude}
+ vm.func private @test_fill_i16() {
+ // Allocate zeroed buffer.
+ %c8 = vm.const.i32 8
+ %buf = vm.buffer.alloc %c8 : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+ vm.check.nz %buf_dno, "!null" : !vm.buffer
+
+ // Fill the middle two elements.
+ %c2 = vm.const.i32 2
+ %c4 = vm.const.i32 4
+ %cafe = vm.const.i32 0xCAFE
+ vm.buffer.fill.i16 %buf_dno, %c2, %c4, %cafe : i32 -> !vm.buffer
+
+ // Compare to reference.
+ %c0 = vm.const.i32 0
+ %rodata_ref = vm.const.ref.rodata @test_fill_i16_ref : !vm.buffer
+ %cmp = vm.buffer.compare %rodata_ref, %c0, %buf_dno, %c0, %c8 : !vm.buffer, !vm.buffer
+ vm.check.nz %cmp, "buffer should match reference" : i32
+
+ vm.return
+ }
+
+ vm.rodata private @test_fill_i16_misaligned_offset_ref dense<[0xCAFE, 0xCAFE, 0, 0]> : tensor<4xi16>
+
+ // Tests that misaligned fill offsets will succeed but round down.
+ vm.export @test_fill_i16_misaligned_offset attributes {emitc.exclude}
+ vm.func private @test_fill_i16_misaligned_offset() {
+ // Allocate zeroed buffer.
+ %c8 = vm.const.i32 8
+ %buf = vm.buffer.alloc %c8 : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+
+ // Try filling from offset 1, which is not i16-aligned.
+ %c1 = vm.const.i32 1
+ %c4 = vm.const.i32 4
+ %cafe = vm.const.i32 0xCAFE
+ vm.buffer.fill.i16 %buf_dno, %c1, %c4, %cafe : i32 -> !vm.buffer
+
+ // Compare to reference - should have written at offset 0.
+ %c0 = vm.const.i32 0
+ %rodata_ref = vm.const.ref.rodata @test_fill_i16_misaligned_offset_ref : !vm.buffer
+ %cmp = vm.buffer.compare %rodata_ref, %c0, %buf_dno, %c0, %c8 : !vm.buffer, !vm.buffer
+ vm.check.nz %cmp, "buffer should match reference" : i32
+
+
+ vm.return
+ }
+
+ vm.rodata private @test_fill_i16_misaligned_length_ref dense<[0, 0, 0, 0]> : tensor<4xi16>
+
+ // Tests that misaligned fill lengths will succeed but round down.
+ vm.export @test_fill_i16_misaligned_length attributes {emitc.exclude}
+ vm.func private @test_fill_i16_misaligned_length() {
+ // Allocate zeroed buffer.
+ %c8 = vm.const.i32 8
+ %buf = vm.buffer.alloc %c8 : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+
+ // Try filling for length 1, which is not i16-aligned.
+ %c0 = vm.const.i32 0
+ %c1 = vm.const.i32 1
+ %cafe = vm.const.i32 0xCAFE
+ vm.buffer.fill.i16 %buf_dno, %c0, %c1, %cafe : i32 -> !vm.buffer
+
+ // Compare to reference - should have written 0 bytes.
+ %rodata_ref = vm.const.ref.rodata @test_fill_i16_misaligned_length_ref : !vm.buffer
+ %cmp = vm.buffer.compare %rodata_ref, %c0, %buf_dno, %c0, %c8 : !vm.buffer, !vm.buffer
+ vm.check.nz %cmp, "buffer should match reference" : i32
+
+ vm.return
+ }
+
+ // Tests that trying to fill .rodata will fail.
+ vm.export @fail_fill_i16_rodata attributes {emitc.exclude}
+ vm.func private @fail_fill_i16_rodata() {
+ %rodata = vm.const.ref.rodata @rodata_3xi32 : !vm.buffer
+
+ // Permission denied:
+ %c0 = vm.const.i32 0
+ %c2 = vm.const.i32 2
+ %cafe = vm.const.i32 0xCAFE
+ vm.buffer.fill.i16 %rodata, %c0, %c2, %cafe : i32 -> !vm.buffer
+
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Load
+ //===--------------------------------------------------------------------===//
+
+ vm.rodata private @test_load_i8_data dense<[0x00, 0x01, 0x7F, 0x80, 0xFF]> : tensor<5xui8>
+
+ vm.export @test_load_i8u attributes {emitc.exclude}
+ vm.func private @test_load_i8u() {
+ %c0 = vm.const.i32 0
+ %c1 = vm.const.i32 1
+ %c2 = vm.const.i32 2
+ %c3 = vm.const.i32 3
+ %c4 = vm.const.i32 4
+ %rodata = vm.const.ref.rodata @test_load_i8_data : !vm.buffer
+ %v0 = vm.buffer.load.i8.u %rodata[%c0] : !vm.buffer -> i32
+ %e0 = vm.const.i32 0
+ vm.check.eq %v0, %e0, "0" : i32
+ %v1 = vm.buffer.load.i8.u %rodata[%c1] : !vm.buffer -> i32
+ %e1 = vm.const.i32 1
+ vm.check.eq %v1, %e1, "1" : i32
+ %v2 = vm.buffer.load.i8.u %rodata[%c2] : !vm.buffer -> i32
+ %e2 = vm.const.i32 0x7F
+ vm.check.eq %v2, %e2, "0x7F" : i32
+ %v3 = vm.buffer.load.i8.u %rodata[%c3] : !vm.buffer -> i32
+ %e3 = vm.const.i32 0x80
+ vm.check.eq %v3, %e3, "0x80" : i32
+ %v4 = vm.buffer.load.i8.u %rodata[%c4] : !vm.buffer -> i32
+ %e4 = vm.const.i32 0xFF
+ vm.check.eq %v4, %e4, "0xFF" : i32
+ vm.return
+ }
+
+ vm.export @test_load_i8s attributes {emitc.exclude}
+ vm.func private @test_load_i8s() {
+ %c0 = vm.const.i32 0
+ %c1 = vm.const.i32 1
+ %c2 = vm.const.i32 2
+ %c3 = vm.const.i32 3
+ %c4 = vm.const.i32 4
+ %rodata = vm.const.ref.rodata @test_load_i8_data : !vm.buffer
+ %v0 = vm.buffer.load.i8.s %rodata[%c0] : !vm.buffer -> i32
+ %e0 = vm.const.i32 0
+ vm.check.eq %v0, %e0, "0" : i32
+ %v1 = vm.buffer.load.i8.s %rodata[%c1] : !vm.buffer -> i32
+ %e1 = vm.const.i32 1
+ vm.check.eq %v1, %e1, "1" : i32
+ %v2 = vm.buffer.load.i8.s %rodata[%c2] : !vm.buffer -> i32
+ %e2 = vm.const.i32 0x7F
+ vm.check.eq %v2, %e2, "0x7F" : i32
+ %v3 = vm.buffer.load.i8.s %rodata[%c3] : !vm.buffer -> i32
+ %e3 = vm.const.i32 -128
+ vm.check.eq %v3, %e3, "-128" : i32
+ %v4 = vm.buffer.load.i8.s %rodata[%c4] : !vm.buffer -> i32
+ %e4 = vm.const.i32 -1
+ vm.check.eq %v4, %e4, "-1" : i32
+ vm.return
+ }
+
+ vm.rodata private @test_load_i16_data dense<[0x0000, 0x0001, 0x7FFF, 0x8000, 0xFFFF]> : tensor<5xui16>
+
+ vm.export @test_load_i16u attributes {emitc.exclude}
+ vm.func private @test_load_i16u() {
+ %c0 = vm.const.i32 0
+ %c2 = vm.const.i32 2
+ %c4 = vm.const.i32 4
+ %c6 = vm.const.i32 6
+ %c8 = vm.const.i32 8
+ %rodata = vm.const.ref.rodata @test_load_i16_data : !vm.buffer
+ %v0 = vm.buffer.load.i16.u %rodata[%c0] : !vm.buffer -> i32
+ %e0 = vm.const.i32 0
+ vm.check.eq %v0, %e0, "0" : i32
+ %v1 = vm.buffer.load.i16.u %rodata[%c2] : !vm.buffer -> i32
+ %e1 = vm.const.i32 1
+ vm.check.eq %v1, %e1, "1" : i32
+ %v2 = vm.buffer.load.i16.u %rodata[%c4] : !vm.buffer -> i32
+ %e2 = vm.const.i32 0x7FFF
+ vm.check.eq %v2, %e2, "0x7FFF" : i32
+ %v3 = vm.buffer.load.i16.u %rodata[%c6] : !vm.buffer -> i32
+ %e3 = vm.const.i32 0x8000
+ vm.check.eq %v3, %e3, "0x8000" : i32
+ %v4 = vm.buffer.load.i16.u %rodata[%c8] : !vm.buffer -> i32
+ %e4 = vm.const.i32 0xFFFF
+ vm.check.eq %v4, %e4, "0xFFFF" : i32
+ vm.return
+ }
+
+ vm.export @test_load_i16s attributes {emitc.exclude}
+ vm.func private @test_load_i16s() {
+ %c0 = vm.const.i32 0
+ %c2 = vm.const.i32 2
+ %c4 = vm.const.i32 4
+ %c6 = vm.const.i32 6
+ %c8 = vm.const.i32 8
+ %rodata = vm.const.ref.rodata @test_load_i16_data : !vm.buffer
+ %v0 = vm.buffer.load.i16.s %rodata[%c0] : !vm.buffer -> i32
+ %e0 = vm.const.i32 0
+ vm.check.eq %v0, %e0, "0" : i32
+ %v1 = vm.buffer.load.i16.s %rodata[%c2] : !vm.buffer -> i32
+ %e1 = vm.const.i32 1
+ vm.check.eq %v1, %e1, "1" : i32
+ %v2 = vm.buffer.load.i16.s %rodata[%c4] : !vm.buffer -> i32
+ %e2 = vm.const.i32 0x7FFF
+ vm.check.eq %v2, %e2, "0x7FFF" : i32
+ %v3 = vm.buffer.load.i16.s %rodata[%c6] : !vm.buffer -> i32
+ %e3 = vm.const.i32 -32768
+ vm.check.eq %v3, %e3, "-32768" : i32
+ %v4 = vm.buffer.load.i16.s %rodata[%c8] : !vm.buffer -> i32
+ %e4 = vm.const.i32 -1
+ vm.check.eq %v4, %e4, "-1" : i32
+ vm.return
+ }
+
+ vm.rodata private @test_load_i32_data dense<[0x00000000, 0x00000001, 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF]> : tensor<5xui32>
+
+ vm.export @test_load_i32 attributes {emitc.exclude}
+ vm.func private @test_load_i32() {
+ %c0 = vm.const.i32 0
+ %c4 = vm.const.i32 4
+ %c8 = vm.const.i32 8
+ %c12 = vm.const.i32 12
+ %c16 = vm.const.i32 16
+ %rodata = vm.const.ref.rodata @test_load_i32_data : !vm.buffer
+ %v0 = vm.buffer.load.i32 %rodata[%c0] : !vm.buffer -> i32
+ %e0 = vm.const.i32 0
+ vm.check.eq %v0, %e0, "0" : i32
+ %v1 = vm.buffer.load.i32 %rodata[%c4] : !vm.buffer -> i32
+ %e1 = vm.const.i32 1
+ vm.check.eq %v1, %e1, "1" : i32
+ %v2 = vm.buffer.load.i32 %rodata[%c8] : !vm.buffer -> i32
+ %e2 = vm.const.i32 0x7FFFFFFF
+ vm.check.eq %v2, %e2, "0x7FFFFFFF" : i32
+ %v3 = vm.buffer.load.i32 %rodata[%c12] : !vm.buffer -> i32
+ %e3 = vm.const.i32 0x80000000
+ vm.check.eq %v3, %e3, "0x80000000" : i32
+ %v4 = vm.buffer.load.i32 %rodata[%c16] : !vm.buffer -> i32
+ %e4 = vm.const.i32 0xFFFFFFFF
+ vm.check.eq %v4, %e4, "0xFFFFFFFF" : i32
+ vm.return
+ }
+
+ vm.rodata private @test_load_i32_unaligned_data dense<[0x00112233, 0x44556677, 0x8899AABB, 0xCCDDEEFF]> : tensor<4xui32>
+
+ // Unaligned loads are not supported and offsets will be rounded down.
+ vm.export @test_load_i32_unaligned attributes {emitc.exclude}
+ vm.func private @test_load_i32_unaligned() {
+ %rodata = vm.const.ref.rodata @test_load_i32_unaligned_data : !vm.buffer
+
+ // Byte offset 5 rounded to byte offset 4 (element 1).
+ %c5 = vm.const.i32 5
+ %v1 = vm.buffer.load.i32 %rodata[%c5] : !vm.buffer -> i32
+ %e1 = vm.const.i32 0x44556677
+ vm.check.eq %v1, %e1, "0x44556677" : i32
+
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Store
+ //===--------------------------------------------------------------------===//
+
+ vm.rodata private @test_store_i8_ref dense<[0x00, 0x01, 0x7F, 0x80, 0xFF]> : tensor<5xui8>
+
+ vm.export @test_store_i8 attributes {emitc.exclude}
+ vm.func private @test_store_i8() {
+ %ref = vm.const.ref.rodata @test_store_i8_ref : !vm.buffer
+ %ref_dno = util.do_not_optimize(%ref) : !vm.buffer
+ %ref_length = vm.buffer.length %ref_dno : !vm.buffer -> i32
+
+ %buf = vm.buffer.alloc %ref_length : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+
+ %c0 = vm.const.i32 0
+ %e0 = vm.const.i32 0
+ vm.buffer.store.i8 %e0, %buf_dno[%c0] : i32 -> !vm.buffer
+ %c1 = vm.const.i32 1
+ %e1 = vm.const.i32 1
+ vm.buffer.store.i8 %e1, %buf_dno[%c1] : i32 -> !vm.buffer
+ %c2 = vm.const.i32 2
+ %e2 = vm.const.i32 0x7F
+ vm.buffer.store.i8 %e2, %buf_dno[%c2] : i32 -> !vm.buffer
+ %c3 = vm.const.i32 3
+ %e3 = vm.const.i32 0x80
+ vm.buffer.store.i8 %e3, %buf_dno[%c3] : i32 -> !vm.buffer
+ %c4 = vm.const.i32 4
+ %e4 = vm.const.i32 0xFF
+ vm.buffer.store.i8 %e4, %buf_dno[%c4] : i32 -> !vm.buffer
+
+ %cmp = vm.buffer.compare %ref_dno, %c0, %buf_dno, %c0, %ref_length : !vm.buffer, !vm.buffer
+ vm.check.nz %cmp, "source and target match" : i32
+
+ vm.return
+ }
+
+ vm.rodata private @test_store_i16_ref dense<[0x0000, 0x0001, 0x7FFF, 0x8000, 0xFFFF]> : tensor<5xui16>
+
+ vm.export @test_store_i16 attributes {emitc.exclude}
+ vm.func private @test_store_i16() {
+ %ref = vm.const.ref.rodata @test_store_i16_ref : !vm.buffer
+ %ref_dno = util.do_not_optimize(%ref) : !vm.buffer
+ %ref_length = vm.buffer.length %ref_dno : !vm.buffer -> i32
+
+ %buf = vm.buffer.alloc %ref_length : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+
+ %c0 = vm.const.i32 0
+ %e0 = vm.const.i32 0
+ vm.buffer.store.i16 %e0, %buf_dno[%c0] : i32 -> !vm.buffer
+ %c2 = vm.const.i32 2
+ %e1 = vm.const.i32 1
+ vm.buffer.store.i16 %e1, %buf_dno[%c2] : i32 -> !vm.buffer
+ %c4 = vm.const.i32 4
+ %e2 = vm.const.i32 0x7FFF
+ vm.buffer.store.i16 %e2, %buf_dno[%c4] : i32 -> !vm.buffer
+ %c6 = vm.const.i32 6
+ %e3 = vm.const.i32 0x8000
+ vm.buffer.store.i16 %e3, %buf_dno[%c6] : i32 -> !vm.buffer
+ %c8 = vm.const.i32 8
+ %e4 = vm.const.i32 0xFFFF
+ vm.buffer.store.i16 %e4, %buf_dno[%c8] : i32 -> !vm.buffer
+
+ %cmp = vm.buffer.compare %ref_dno, %c0, %buf_dno, %c0, %ref_length : !vm.buffer, !vm.buffer
+ vm.check.nz %cmp, "source and target match" : i32
+
+ vm.return
+ }
+
+ vm.rodata private @test_store_i32_ref dense<[0x00000000, 0x00000001, 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF]> : tensor<5xui32>
+
+ vm.export @test_store_i32 attributes {emitc.exclude}
+ vm.func private @test_store_i32() {
+ %ref = vm.const.ref.rodata @test_store_i32_ref : !vm.buffer
+ %ref_dno = util.do_not_optimize(%ref) : !vm.buffer
+ %ref_length = vm.buffer.length %ref_dno : !vm.buffer -> i32
+
+ %buf = vm.buffer.alloc %ref_length : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+
+ %c0 = vm.const.i32 0
+ %e0 = vm.const.i32 0
+ vm.buffer.store.i32 %e0, %buf_dno[%c0] : i32 -> !vm.buffer
+ %c4 = vm.const.i32 4
+ %e1 = vm.const.i32 1
+ vm.buffer.store.i32 %e1, %buf_dno[%c4] : i32 -> !vm.buffer
+ %c8 = vm.const.i32 8
+ %e2 = vm.const.i32 0x7FFFFFFF
+ vm.buffer.store.i32 %e2, %buf_dno[%c8] : i32 -> !vm.buffer
+ %c12 = vm.const.i32 12
+ %e3 = vm.const.i32 0x80000000
+ vm.buffer.store.i32 %e3, %buf_dno[%c12] : i32 -> !vm.buffer
+ %c16 = vm.const.i32 16
+ %e4 = vm.const.i32 0xFFFFFFFF
+ vm.buffer.store.i32 %e4, %buf_dno[%c16] : i32 -> !vm.buffer
+
+ %cmp = vm.buffer.compare %ref_dno, %c0, %buf_dno, %c0, %ref_length : !vm.buffer, !vm.buffer
+ vm.check.nz %cmp, "source and target match" : i32
+
+ vm.return
+ }
+
+ // Unaligned stores are not supported and offsets will be rounded down.
+ vm.export @test_store_i32_unaligned attributes {emitc.exclude}
+ vm.func private @test_store_i32_unaligned() {
+ %c12 = vm.const.i32 12
+ %buf = vm.buffer.alloc %c12 : !vm.buffer
+ %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+
+ // Byte offset 5 rounded to byte offset 4 (element 1).
+ %c5 = vm.const.i32 5
+ %e1 = vm.const.i32 0x44556677
+ vm.buffer.store.i32 %e1, %buf_dno[%c5] : i32 -> !vm.buffer
+
+ // Read back at offset 4 (where the data should be).
+ %c4 = vm.const.i32 4
+ %a1 = vm.buffer.load.i32 %buf_dno[%c4] : !vm.buffer -> i32
+ vm.check.eq %a1, %e1, "0x44556677" : i32
+
+ vm.return
+ }
+
+}
diff --git a/runtime/src/iree/vm/test/call_ops.mlir b/runtime/src/iree/vm/test/call_ops.mlir
new file mode 100644
index 0000000..6f5072d
--- /dev/null
+++ b/runtime/src/iree/vm/test/call_ops.mlir
@@ -0,0 +1,139 @@
+vm.module @call_ops {
+
+ vm.rodata private @buffer dense<[1, 2, 3]> : tensor<3xi8>
+
+ vm.export @fail_call_v_v
+ vm.func @fail_call_v_v() {
+ vm.call @_v_v_fail() : () -> ()
+ vm.return
+ }
+
+ vm.export @test_call_i_v
+ vm.func @test_call_i_v() {
+ %c1 = vm.const.i32 1
+ vm.call @_i_v(%c1) : (i32) -> ()
+ vm.return
+ }
+
+ vm.export @test_call_r_v
+ vm.func @test_call_r_v() {
+ %ref = vm.const.ref.zero : !vm.ref<?>
+ vm.call @_r_v(%ref) : (!vm.ref<?>) -> ()
+ vm.return
+ }
+
+ // Check that reused ref argument slots are handled properly
+ vm.export @test_call_r_v_reuse_reg
+ vm.func @test_call_r_v_reuse_reg() {
+ %ref = vm.const.ref.zero : !vm.buffer
+ %unused = vm.const.ref.zero : !vm.buffer
+ vm.call @_r_v_reuse_reg(%ref, %unused) : (!vm.buffer, !vm.buffer) -> ()
+ vm.return
+ }
+
+ // Check passing refs as arguments doesn't alter values on the call site
+ // TODO(simon-camp): In the C target we run the DropCompilerHintsPass after
+ // ordinal allocation and vm to EmitC conversion to prevent constant folding
+ // of the tests during the lattter. This means we would need to add a pattern
+ // that inserts calls to `iree_vm_ref_retain` for operand/result pairs of the
+ // do_not_optimize op.
+ // TODO(simon-camp): Enable the test for emitc.
+ vm.export @test_call_r_v_preserve_ref attributes {emitc.exclude}
+ vm.func private @test_call_r_v_preserve_ref() {
+ %ref = vm.const.ref.zero : !vm.buffer
+ %unused = vm.const.ref.rodata @buffer : !vm.buffer
+ %unusued_dno_1 = util.do_not_optimize(%unused) : !vm.buffer
+ vm.check.nz %unused : !vm.buffer
+ vm.call @_r_v_preserve_reg(%ref, %unused) : (!vm.buffer, !vm.buffer) -> ()
+ %unusued_dno_2 = util.do_not_optimize(%unused) : !vm.buffer
+ vm.check.nz %unusued_dno_2 : !vm.buffer
+ vm.return
+ }
+
+ vm.export @test_call_v_i
+ vm.func @test_call_v_i() {
+ %c1 = vm.const.i32 1
+ %0 = vm.call @_v_i() : () -> (i32)
+ vm.check.eq %0, %c1, "_v_i()=1" : i32
+ vm.return
+ }
+
+ vm.export @test_call_v_r
+ vm.func @test_call_v_r() {
+ %ref = vm.const.ref.zero : !vm.ref<?>
+ %ref_dno = util.do_not_optimize(%ref) : !vm.ref<?>
+ %res = vm.call @_v_r() : () -> (!vm.ref<?>)
+ vm.check.eq %ref_dno, %res, "_v_r()=NULL" : !vm.ref<?>
+ vm.return
+ }
+
+ vm.export @test_call_v_ii
+ vm.func @test_call_v_ii() {
+ %c1 = vm.const.i32 1
+ %c2 = vm.const.i32 2
+ %0:2 = vm.call @_v_ii() : () -> (i32, i32)
+ vm.check.eq %0#0, %c1, "_v_ii()#0=1" : i32
+ vm.check.eq %0#1, %c2, "_v_ii()#1=2" : i32
+ vm.return
+ }
+
+ vm.export @test_call_v_v
+ vm.func @test_call_v_v() {
+ vm.call @_v_v() : () -> ()
+ vm.return
+ }
+
+ vm.func @_i_v(%arg : i32) attributes {noinline} {
+ %c1 = vm.const.i32 1
+ vm.check.eq %arg, %c1, "Expected %arg to be 1" : i32
+ vm.return
+ }
+
+ vm.func @_r_v(%arg : !vm.ref<?>) attributes {noinline} {
+ %ref = vm.const.ref.zero : !vm.ref<?>
+ %ref_dno = util.do_not_optimize(%ref) : !vm.ref<?>
+ vm.check.eq %arg, %ref_dno, "Expected %arg to be NULL" : !vm.ref<?>
+ vm.return
+ }
+
+ vm.func @_r_v_reuse_reg(%arg : !vm.ref<?>, %unused : !vm.ref<?>) attributes {noinline} {
+ %ref = vm.const.ref.zero : !vm.ref<?>
+ %ref_dno = util.do_not_optimize(%ref) : !vm.ref<?>
+ vm.check.eq %arg, %ref_dno, "Expected %arg to be NULL" : !vm.ref<?>
+ vm.return
+ }
+
+ vm.func @_r_v_preserve_reg(%arg1 : !vm.ref<?>, %arg2 : !vm.ref<?>) attributes {noinline} {
+ %ref = vm.const.ref.zero : !vm.ref<?>
+ %ref_dno = util.do_not_optimize(%ref) : !vm.ref<?>
+ vm.check.eq %arg1, %ref_dno, "Expected %arg1 to be NULL" : !vm.ref<?>
+ vm.check.nz %arg2, "Expected %arg2 to be not NULL" : !vm.ref<?>
+ vm.return
+ }
+
+ vm.func @_v_i() -> i32 attributes {noinline} {
+ %c1 = vm.const.i32 1
+ vm.return %c1 : i32
+ }
+
+ vm.func @_v_r() -> !vm.ref<?> attributes {noinline} {
+ %ref = vm.const.ref.zero : !vm.ref<?>
+ vm.return %ref : !vm.ref<?>
+ }
+
+ vm.func @_v_ii() -> (i32, i32) attributes {noinline} {
+ %c1 = vm.const.i32 1
+ %c2 = vm.const.i32 2
+ vm.return %c1, %c2 : i32, i32
+ }
+
+ vm.func @_v_v() attributes {noinline} {
+ vm.return
+ }
+
+ vm.func @_v_v_fail() attributes {noinline} {
+ %c2 = vm.const.i32 2
+ vm.fail %c2
+ }
+
+}
diff --git a/runtime/src/iree/vm/test/comparison_ops.mlir b/runtime/src/iree/vm/test/comparison_ops.mlir
new file mode 100644
index 0000000..56067b8
--- /dev/null
+++ b/runtime/src/iree/vm/test/comparison_ops.mlir
@@ -0,0 +1,172 @@
+vm.module @comparison_ops {
+
+ //===--------------------------------------------------------------------===//
+ // vm.cmp.lt.i32.s
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_cmp_lt_s_0
+ vm.func @test_cmp_lt_s_0() {
+ %lhs = vm.const.i32 2
+ %lhs_dno = util.do_not_optimize(%lhs) : i32
+ %rhs = vm.const.i32 -2
+ %rhs_dno = util.do_not_optimize(%rhs) : i32
+ %actual = vm.cmp.lt.i32.s %lhs_dno, %rhs_dno : i32
+ %expected = vm.const.i32 0
+ vm.check.eq %actual, %expected, "2 < -2" : i32
+ vm.return
+ }
+
+ vm.export @test_cmp_lt_s_1
+ vm.func @test_cmp_lt_s_1() {
+ %lhs = vm.const.i32 -2
+ %lhs_dno = util.do_not_optimize(%lhs) : i32
+ %rhs = vm.const.i32 2
+ %rhs_dno = util.do_not_optimize(%rhs) : i32
+ %actual = vm.cmp.lt.i32.s %lhs_dno, %rhs_dno : i32
+ %expected = vm.const.i32 1
+ vm.check.eq %actual, %expected, "-2 < 2" : i32
+ vm.return
+ }
+
+ // Expect UINT_MAX to be interpreted as -1 when doing a signed compare.
+ vm.export @test_cmp_lt_s_2
+ vm.func @test_cmp_lt_s_2() {
+ %lhs = vm.const.i32 4294967295
+ %lhs_dno = util.do_not_optimize(%lhs) : i32
+ %rhs = vm.const.i32 2
+ %rhs_dno = util.do_not_optimize(%rhs) : i32
+ %actual = vm.cmp.lt.i32.s %lhs_dno, %rhs_dno : i32
+ %expected = vm.const.i32 1
+ vm.check.eq %actual, %expected, "4294967295 (UINT_MAX) < 2" : i32
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.cmp.lt.i32.u
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_cmp_lt_u_0
+ vm.func @test_cmp_lt_u_0() {
+ %lhs = vm.const.i32 2
+ %lhs_dno = util.do_not_optimize(%lhs) : i32
+ %rhs = vm.const.i32 -2
+ %rhs_dno = util.do_not_optimize(%rhs) : i32
+ %actual = vm.cmp.lt.i32.u %lhs_dno, %rhs_dno : i32
+ %expected = vm.const.i32 1
+ vm.check.eq %actual, %expected, "2 < -2 (as unsigned)" : i32
+ vm.return
+ }
+
+ vm.export @test_cmp_lt_u_1
+ vm.func @test_cmp_lt_u_1() {
+ %lhs = vm.const.i32 -2
+ %lhs_dno = util.do_not_optimize(%lhs) : i32
+ %rhs = vm.const.i32 2
+ %rhs_dno = util.do_not_optimize(%rhs) : i32
+ %actual = vm.cmp.lt.i32.u %lhs_dno, %rhs_dno : i32
+ %expected = vm.const.i32 0
+ vm.check.eq %actual, %expected, "-2 < 2 (as unsigned)" : i32
+ vm.return
+ }
+
+ vm.export @test_cmp_lt_u_2
+ vm.func @test_cmp_lt_u_2() {
+ %lhs = vm.const.i32 4294967295
+ %lhs_dno = util.do_not_optimize(%lhs) : i32
+ %rhs = vm.const.i32 2
+ %rhs_dno = util.do_not_optimize(%rhs) : i32
+ %actual = vm.cmp.lt.i32.u %lhs_dno, %rhs_dno : i32
+ %expected = vm.const.i32 0
+ vm.check.eq %actual, %expected, "4294967295 (UINT_MAX) < 2 (as unsigned)" : i32
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.cmp.*.i32.* pseudo-ops
+ //===--------------------------------------------------------------------===//
+ // NOTE: all of these are turned in to some variants of vm.cmp.lt by the
+ // compiler and are here as a way to test the runtime behavior of the
+ // pseudo-op expansions.
+
+ vm.export @test_cmp_lte
+ vm.func @test_cmp_lte() {
+ %true = vm.const.i32 1
+ %false = vm.const.i32 0
+
+ %cn2 = vm.const.i32 -2
+ %cn2_dno = util.do_not_optimize(%cn2) : i32
+ %c2 = vm.const.i32 2
+ %c2_dno = util.do_not_optimize(%c2) : i32
+
+ %cmp_0 = vm.cmp.lte.i32.s %cn2_dno, %c2_dno : i32
+ vm.check.eq %cmp_0, %true, "-2 <= 2" : i32
+ %cmp_1 = vm.cmp.lte.i32.s %c2_dno, %cn2_dno : i32
+ vm.check.eq %cmp_1, %false, "2 <= -2" : i32
+ %cmp_2 = vm.cmp.lte.i32.s %c2_dno, %c2_dno : i32
+ vm.check.eq %cmp_2, %true, "2 <= 2" : i32
+
+ %cmp_3 = vm.cmp.lte.i32.u %cn2_dno, %c2_dno : i32
+ vm.check.eq %cmp_3, %false, "-2 <= 2 (unsigned)" : i32
+ %cmp_4 = vm.cmp.lte.i32.u %c2_dno, %cn2_dno : i32
+ vm.check.eq %cmp_4, %true, "2 <= -2 (unsigned)" : i32
+ %cmp_5 = vm.cmp.lte.i32.u %c2_dno, %c2_dno : i32
+ vm.check.eq %cmp_5, %true, "2 <= 2 (unsigned)" : i32
+
+ vm.return
+ }
+
+ vm.export @test_cmp_gt
+ vm.func @test_cmp_gt() {
+ %true = vm.const.i32 1
+ %false = vm.const.i32 0
+
+ %cn2 = vm.const.i32 -2
+ %cn2_dno = util.do_not_optimize(%cn2) : i32
+ %c2 = vm.const.i32 2
+ %c2_dno = util.do_not_optimize(%c2) : i32
+
+ %cmp_0 = vm.cmp.gt.i32.s %cn2_dno, %c2_dno : i32
+ vm.check.eq %cmp_0, %false, "-2 > 2" : i32
+ %cmp_1 = vm.cmp.gt.i32.s %c2_dno, %cn2_dno : i32
+ vm.check.eq %cmp_1, %true, "2 > -2" : i32
+ %cmp_2 = vm.cmp.gt.i32.s %c2_dno, %c2_dno : i32
+ vm.check.eq %cmp_2, %false, "2 > 2" : i32
+
+ %cmp_3 = vm.cmp.gt.i32.u %cn2_dno, %c2_dno : i32
+ vm.check.eq %cmp_3, %true, "-2 > 2 (unsigned)" : i32
+ %cmp_4 = vm.cmp.gt.i32.u %c2_dno, %cn2_dno : i32
+ vm.check.eq %cmp_4, %false, "2 > -2 (unsigned)" : i32
+ %cmp_5 = vm.cmp.gt.i32.u %c2_dno, %c2_dno : i32
+ vm.check.eq %cmp_5, %false, "2 > 2 (unsigned)" : i32
+
+ vm.return
+ }
+
+ vm.export @test_cmp_gte
+ vm.func @test_cmp_gte() {
+ %true = vm.const.i32 1
+ %false = vm.const.i32 0
+
+ %cn2 = vm.const.i32 -2
+ %cn2_dno = util.do_not_optimize(%cn2) : i32
+ %c2 = vm.const.i32 2
+ %c2_dno = util.do_not_optimize(%c2) : i32
+
+ %cmp_0 = vm.cmp.gte.i32.s %cn2_dno, %c2_dno : i32
+ vm.check.eq %cmp_0, %false, "-2 >= 2" : i32
+ %cmp_1 = vm.cmp.gte.i32.s %c2_dno, %cn2_dno : i32
+ vm.check.eq %cmp_1, %true, "2 >= -2" : i32
+ %cmp_2 = vm.cmp.gte.i32.s %c2_dno, %c2_dno : i32
+ vm.check.eq %cmp_2, %true, "2 >= 2" : i32
+
+ %cmp_3 = vm.cmp.gte.i32.u %cn2_dno, %c2_dno : i32
+ vm.check.eq %cmp_3, %true, "-2 >= 2 (unsigned)" : i32
+ %cmp_4 = vm.cmp.gte.i32.u %c2_dno, %cn2_dno : i32
+ vm.check.eq %cmp_4, %false, "2 >= -2 (unsigned)" : i32
+ %cmp_5 = vm.cmp.gte.i32.u %c2_dno, %c2_dno : i32
+ vm.check.eq %cmp_5, %true, "2 >= 2 (unsigned)" : i32
+
+ vm.return
+ }
+
+}
diff --git a/runtime/src/iree/vm/test/comparison_ops_f32.mlir b/runtime/src/iree/vm/test/comparison_ops_f32.mlir
new file mode 100644
index 0000000..fe66f4a
--- /dev/null
+++ b/runtime/src/iree/vm/test/comparison_ops_f32.mlir
@@ -0,0 +1,97 @@
+vm.module @comparison_ops_f32 {
+
+ //===--------------------------------------------------------------------===//
+ // vm.cmp.lt.f32
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_cmp_lt_0_f32
+ vm.func @test_cmp_lt_0_f32() {
+ %lhs = vm.const.f32 4.0
+ %lhs_dno = util.do_not_optimize(%lhs) : f32
+ %rhs = vm.const.f32 -4.0
+ %rhs_dno = util.do_not_optimize(%rhs) : f32
+ %actual = vm.cmp.lt.f32.o %lhs_dno, %rhs_dno : f32
+ %expected = vm.const.i32 0
+ vm.check.eq %actual, %expected, "4.0 < -4.0" : i32
+ vm.return
+ }
+
+ vm.export @test_cmp_lt_1_f32
+ vm.func @test_cmp_lt_1_f32() {
+ %lhs = vm.const.f32 -4.0
+ %lhs_dno = util.do_not_optimize(%lhs) : f32
+ %rhs = vm.const.f32 4.0
+ %rhs_dno = util.do_not_optimize(%rhs) : f32
+ %actual = vm.cmp.lt.f32.o %lhs_dno, %rhs_dno : f32
+ %expected = vm.const.i32 1
+ vm.check.eq %actual, %expected, "-4.0 < 4.0" : i32
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.cmp.*.f32 pseudo-ops
+ //===--------------------------------------------------------------------===//
+ // NOTE: all of these are turned in to some variants of vm.cmp.lt by the
+ // compiler and are here as a way to test the runtime behavior of the
+ // pseudo-op expansions.
+
+ vm.export @test_cmp_lte_f32
+ vm.func @test_cmp_lte_f32() {
+ %true = vm.const.i32 1
+ %false = vm.const.i32 0
+
+ %cn2 = vm.const.f32 -2.0
+ %cn2_dno = util.do_not_optimize(%cn2) : f32
+ %c2 = vm.const.f32 2.0
+ %c2_dno = util.do_not_optimize(%c2) : f32
+
+ %cmp_0 = vm.cmp.lte.f32.o %cn2_dno, %c2_dno : f32
+ vm.check.eq %cmp_0, %true, "-2 <= 2" : i32
+ %cmp_1 = vm.cmp.lte.f32.o %c2_dno, %cn2_dno : f32
+ vm.check.eq %cmp_1, %false, "2 <= -2" : i32
+ %cmp_2 = vm.cmp.lte.f32.o %c2_dno, %c2_dno : f32
+ vm.check.eq %cmp_2, %true, "2 <= 2" : i32
+
+ vm.return
+ }
+
+ vm.export @test_cmp_gt_f32
+ vm.func @test_cmp_gt_f32() {
+ %true = vm.const.i32 1
+ %false = vm.const.i32 0
+
+ %cn2 = vm.const.f32 -2.0
+ %cn2_dno = util.do_not_optimize(%cn2) : f32
+ %c2 = vm.const.f32 2.0
+ %c2_dno = util.do_not_optimize(%c2) : f32
+
+ %cmp_0 = vm.cmp.gt.f32.o %cn2_dno, %c2_dno : f32
+ vm.check.eq %cmp_0, %false, "-2 > 2" : i32
+ %cmp_1 = vm.cmp.gt.f32.o %c2_dno, %cn2_dno : f32
+ vm.check.eq %cmp_1, %true, "2 > -2" : i32
+ %cmp_2 = vm.cmp.gt.f32.o %c2_dno, %c2_dno : f32
+ vm.check.eq %cmp_2, %false, "2 > 2" : i32
+
+ vm.return
+ }
+
+ vm.export @test_cmp_gte_f32
+ vm.func @test_cmp_gte_f32() {
+ %true = vm.const.i32 1
+ %false = vm.const.i32 0
+
+ %cn2 = vm.const.f32 -2.0
+ %cn2_dno = util.do_not_optimize(%cn2) : f32
+ %c2 = vm.const.f32 2.0
+ %c2_dno = util.do_not_optimize(%c2) : f32
+
+ %cmp_0 = vm.cmp.gte.f32.o %cn2_dno, %c2_dno : f32
+ vm.check.eq %cmp_0, %false, "-2 >= 2" : i32
+ %cmp_1 = vm.cmp.gte.f32.o %c2_dno, %cn2_dno : f32
+ vm.check.eq %cmp_1, %true, "2 >= -2" : i32
+ %cmp_2 = vm.cmp.gte.f32.o %c2_dno, %c2_dno : f32
+ vm.check.eq %cmp_2, %true, "2 >= 2" : i32
+
+ vm.return
+ }
+}
diff --git a/runtime/src/iree/vm/test/comparison_ops_i64.mlir b/runtime/src/iree/vm/test/comparison_ops_i64.mlir
new file mode 100644
index 0000000..2e1bd76
--- /dev/null
+++ b/runtime/src/iree/vm/test/comparison_ops_i64.mlir
@@ -0,0 +1,171 @@
+vm.module @comparison_ops_i64 {
+
+ //===--------------------------------------------------------------------===//
+ // vm.cmp.lt.i64.s
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_cmp_lt_s_0_i64
+ vm.func @test_cmp_lt_s_0_i64() {
+ %lhs = vm.const.i64 4294967295
+ %lhs_dno = util.do_not_optimize(%lhs) : i64
+ %rhs = vm.const.i64 -4294967295
+ %rhs_dno = util.do_not_optimize(%rhs) : i64
+ %actual = vm.cmp.lt.i64.s %lhs_dno, %rhs_dno : i64
+ %expected = vm.const.i32 0
+ vm.check.eq %actual, %expected, "4294967295 (UINT_MAX) < -4294967295 (UINT_MAX)" : i32
+ vm.return
+ }
+
+ vm.export @test_cmp_lt_s_1_i64
+ vm.func @test_cmp_lt_s_1_i64() {
+ %lhs = vm.const.i64 -4294967295
+ %lhs_dno = util.do_not_optimize(%lhs) : i64
+ %rhs = vm.const.i64 4294967295
+ %rhs_dno = util.do_not_optimize(%rhs) : i64
+ %actual = vm.cmp.lt.i64.s %lhs_dno, %rhs_dno : i64
+ %expected = vm.const.i32 1
+ vm.check.eq %actual, %expected, "-4294967295 (UINT_MAX) < 4294967295 (UINT_MAX)" : i32
+ vm.return
+ }
+
+ // Expect ULONG_MAX to be interpreted as -1 when doing a signed compare.
+ vm.export @test_cmp_lt_s_2_i64
+ vm.func @test_cmp_lt_s_2_i64() {
+ %lhs = vm.const.i64 18446744073709551615
+ %lhs_dno = util.do_not_optimize(%lhs) : i64
+ %rhs = vm.const.i64 2
+ %rhs_dno = util.do_not_optimize(%rhs) : i64
+ %actual = vm.cmp.lt.i64.s %lhs_dno, %rhs_dno : i64
+ %expected = vm.const.i32 1
+ vm.check.eq %actual, %expected, "18446744073709551615 (ULONG_MAX) < 2" : i32
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.cmp.lt.i64.u
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_cmp_lt_u_0_i64
+ vm.func @test_cmp_lt_u_0_i64() {
+ %lhs = vm.const.i64 2
+ %lhs_dno = util.do_not_optimize(%lhs) : i64
+ %rhs = vm.const.i64 -2
+ %rhs_dno = util.do_not_optimize(%rhs) : i64
+ %actual = vm.cmp.lt.i64.u %lhs_dno, %rhs_dno : i64
+ %expected = vm.const.i32 1
+ vm.check.eq %actual, %expected, "2 < -2 (as unsigned)" : i32
+ vm.return
+ }
+
+ vm.export @test_cmp_lt_u_1_i64
+ vm.func @test_cmp_lt_u_1_i64() {
+ %lhs = vm.const.i64 -2
+ %lhs_dno = util.do_not_optimize(%lhs) : i64
+ %rhs = vm.const.i64 2
+ %rhs_dno = util.do_not_optimize(%rhs) : i64
+ %actual = vm.cmp.lt.i64.u %lhs_dno, %rhs_dno : i64
+ %expected = vm.const.i32 0
+ vm.check.eq %actual, %expected, "-2 < 2 (as unsigned)" : i32
+ vm.return
+ }
+
+ vm.export @test_cmp_lt_u_2_i64
+ vm.func @test_cmp_lt_u_2_i64() {
+ %lhs = vm.const.i64 18446744073709551615
+ %lhs_dno = util.do_not_optimize(%lhs) : i64
+ %rhs = vm.const.i64 2
+ %rhs_dno = util.do_not_optimize(%rhs) : i64
+ %actual = vm.cmp.lt.i64.u %lhs_dno, %rhs_dno : i64
+ %expected = vm.const.i32 0
+ vm.check.eq %actual, %expected, "18446744073709551615 (ULONG_MAX) < 2 (as unsigned)" : i32
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.cmp.*.i64.* pseudo-ops
+ //===--------------------------------------------------------------------===//
+ // NOTE: all of these are turned in to some variants of vm.cmp.lt by the
+ // compiler and are here as a way to test the runtime behavior of the
+ // pseudo-op expansions.
+
+ vm.export @test_cmp_lte_i64
+ vm.func @test_cmp_lte_i64() {
+ %true = vm.const.i32 1
+ %false = vm.const.i32 0
+
+ %cn2 = vm.const.i64 -2
+ %cn2_dno = util.do_not_optimize(%cn2) : i64
+ %c2 = vm.const.i64 2
+ %c2_dno = util.do_not_optimize(%c2) : i64
+
+ %cmp_0 = vm.cmp.lte.i64.s %cn2_dno, %c2_dno : i64
+ vm.check.eq %cmp_0, %true, "-2 <= 2" : i32
+ %cmp_1 = vm.cmp.lte.i64.s %c2_dno, %cn2_dno : i64
+ vm.check.eq %cmp_1, %false, "2 <= -2" : i32
+ %cmp_2 = vm.cmp.lte.i64.s %c2_dno, %c2_dno : i64
+ vm.check.eq %cmp_2, %true, "2 <= 2" : i32
+
+ %cmp_3 = vm.cmp.lte.i64.u %cn2_dno, %c2_dno : i64
+ vm.check.eq %cmp_3, %false, "-2 <= 2 (unsigned)" : i32
+ %cmp_4 = vm.cmp.lte.i64.u %c2_dno, %cn2_dno : i64
+ vm.check.eq %cmp_4, %true, "2 <= -2 (unsigned)" : i32
+ %cmp_5 = vm.cmp.lte.i64.u %c2_dno, %c2_dno : i64
+ vm.check.eq %cmp_5, %true, "2 <= 2 (unsigned)" : i32
+
+ vm.return
+ }
+
+ vm.export @test_cmp_gt_i64
+ vm.func @test_cmp_gt_i64() {
+ %true = vm.const.i32 1
+ %false = vm.const.i32 0
+
+ %cn2 = vm.const.i64 -2
+ %cn2_dno = util.do_not_optimize(%cn2) : i64
+ %c2 = vm.const.i64 2
+ %c2_dno = util.do_not_optimize(%c2) : i64
+
+ %cmp_0 = vm.cmp.gt.i64.s %cn2_dno, %c2_dno : i64
+ vm.check.eq %cmp_0, %false, "-2 > 2" : i32
+ %cmp_1 = vm.cmp.gt.i64.s %c2_dno, %cn2_dno : i64
+ vm.check.eq %cmp_1, %true, "2 > -2" : i32
+ %cmp_2 = vm.cmp.gt.i64.s %c2_dno, %c2_dno : i64
+ vm.check.eq %cmp_2, %false, "2 > 2" : i32
+
+ %cmp_3 = vm.cmp.gt.i64.u %cn2_dno, %c2_dno : i64
+ vm.check.eq %cmp_3, %true, "-2 > 2 (unsigned)" : i32
+ %cmp_4 = vm.cmp.gt.i64.u %c2_dno, %cn2_dno : i64
+ vm.check.eq %cmp_4, %false, "2 > -2 (unsigned)" : i32
+ %cmp_5 = vm.cmp.gt.i64.u %c2_dno, %c2_dno : i64
+ vm.check.eq %cmp_5, %false, "2 > 2 (unsigned)" : i32
+
+ vm.return
+ }
+
+ vm.export @test_cmp_gte_i64
+ vm.func @test_cmp_gte_i64() {
+ %true = vm.const.i32 1
+ %false = vm.const.i32 0
+
+ %cn2 = vm.const.i64 -2
+ %cn2_dno = util.do_not_optimize(%cn2) : i64
+ %c2 = vm.const.i64 2
+ %c2_dno = util.do_not_optimize(%c2) : i64
+
+ %cmp_0 = vm.cmp.gte.i64.s %cn2_dno, %c2_dno : i64
+ vm.check.eq %cmp_0, %false, "-2 >= 2" : i32
+ %cmp_1 = vm.cmp.gte.i64.s %c2_dno, %cn2_dno : i64
+ vm.check.eq %cmp_1, %true, "2 >= -2" : i32
+ %cmp_2 = vm.cmp.gte.i64.s %c2_dno, %c2_dno : i64
+ vm.check.eq %cmp_2, %true, "2 >= 2" : i32
+
+ %cmp_3 = vm.cmp.gte.i64.u %cn2_dno, %c2_dno : i64
+ vm.check.eq %cmp_3, %true, "-2 >= 2 (unsigned)" : i32
+ %cmp_4 = vm.cmp.gte.i64.u %c2_dno, %cn2_dno : i64
+ vm.check.eq %cmp_4, %false, "2 >= -2 (unsigned)" : i32
+ %cmp_5 = vm.cmp.gte.i64.u %c2_dno, %c2_dno : i64
+ vm.check.eq %cmp_5, %true, "2 >= 2 (unsigned)" : i32
+
+ vm.return
+ }
+}
diff --git a/runtime/src/iree/vm/test/control_flow_ops.mlir b/runtime/src/iree/vm/test/control_flow_ops.mlir
new file mode 100644
index 0000000..c4015e6
--- /dev/null
+++ b/runtime/src/iree/vm/test/control_flow_ops.mlir
@@ -0,0 +1,112 @@
+vm.module @control_flow_ops {
+
+ //===--------------------------------------------------------------------===//
+ // vm.return
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_return_empty
+ vm.func @test_return_empty() {
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.fail
+ //===--------------------------------------------------------------------===//
+
+ vm.export @fail_always
+ vm.func @fail_always() {
+ %code = vm.const.i32 4
+ vm.fail %code, "error!"
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.check.*
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_check_eq_always
+ vm.func @test_check_eq_always() {
+ %c1 = vm.const.i32 1
+ %c1dno = util.do_not_optimize(%c1) : i32
+ vm.check.eq %c1, %c1dno, "error!" : i32
+ vm.return
+ }
+
+ vm.export @fail_check_eq_never
+ vm.func @fail_check_eq_never() {
+ %c1 = vm.const.i32 1
+ %c2 = vm.const.i32 2
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %c2dno = util.do_not_optimize(%c2) : i32
+ vm.check.eq %c1dno, %c2dno, "error!" : i32
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.import.resolved
+ //===--------------------------------------------------------------------===//
+
+ vm.import optional @reserved.optional(%arg0: i32) -> i32
+
+ // The optional import should not be found.
+ vm.export @test_optional_import_resolved
+ vm.func @test_optional_import_resolved() {
+ %c1 = vm.const.i32 1
+ %has_reserved_optional = vm.import.resolved @reserved.optional : i32
+ vm.check.ne %has_reserved_optional, %c1, "missing optional import found" : i32
+ vm.return
+ }
+
+ // The call should fail at runtime because the optional import is not resolved.
+ vm.export @fail_optional_import_call
+ vm.func @fail_optional_import_call() {
+ %c1 = vm.const.i32 1
+ %0 = vm.call @reserved.optional(%c1) : (i32) -> i32
+ %code = vm.const.i32 4
+ vm.fail %code, "unreachable!"
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.cond_br
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_cond_br
+ vm.func @test_cond_br() {
+ %c1 = vm.const.i32 1
+ %c1dno = util.do_not_optimize(%c1) : i32
+ vm.cond_br %c1dno, ^bb1, ^bb2
+ ^bb1:
+ vm.check.eq %c1dno, %c1dno, "error!" : i32
+ vm.return
+ ^bb2:
+ %code = vm.const.i32 4
+ vm.fail %code, "unreachable!"
+ }
+
+ vm.export @test_cond_br_int_arg
+ vm.func @test_cond_br_int_arg() {
+ %c1 = vm.const.i32 1
+ %c1dno = util.do_not_optimize(%c1) : i32
+ vm.cond_br %c1dno, ^bb1(%c1dno : i32), ^bb2(%c1dno : i32)
+ ^bb1(%arg1 : i32):
+ vm.check.eq %arg1, %c1dno, "error!" : i32
+ vm.return
+ ^bb2(%arg2 : i32):
+ %code = vm.const.i32 4
+ vm.fail %code, "unreachable!"
+ }
+
+ vm.export @test_cond_br_ref_arg
+ vm.func @test_cond_br_ref_arg() {
+ %c1 = vm.const.i32 1
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %ref = vm.const.ref.zero : !vm.ref<?>
+ vm.cond_br %c1dno, ^bb1(%ref : !vm.ref<?>), ^bb2(%ref : !vm.ref<?>)
+ ^bb1(%arg1 : !vm.ref<?>):
+ vm.check.eq %arg1, %ref, "error!" : !vm.ref<?>
+ vm.return
+ ^bb2(%arg2 : !vm.ref<?>):
+ %code = vm.const.i32 4
+ vm.fail %code, "unreachable!"
+ }
+
+}
diff --git a/runtime/src/iree/vm/test/conversion_ops.mlir b/runtime/src/iree/vm/test/conversion_ops.mlir
new file mode 100644
index 0000000..799376e
--- /dev/null
+++ b/runtime/src/iree/vm/test/conversion_ops.mlir
@@ -0,0 +1,27 @@
+vm.module @conversion_ops {
+
+ //===----------------------------------------------------------------------===//
+ // Casting and type conversion/emulation
+ //===----------------------------------------------------------------------===//
+
+ vm.export @test_trunc_i32_i8
+ vm.func @test_trunc_i32_i8() {
+ %c1 = vm.const.i32 2147483647
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %v = vm.trunc.i32.i8 %c1dno : i32 -> i32
+ %c2 = vm.const.i32 255
+ vm.check.eq %v, %c2, "truncate unsigned i32 to unsigned i8" : i32
+ vm.return
+ }
+
+ vm.export @test_trunc_i32_i16
+ vm.func @test_trunc_i32_i16() {
+ %c1 = vm.const.i32 2147483647
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %v = vm.trunc.i32.i16 %c1dno : i32 -> i32
+ %c2 = vm.const.i32 65535
+ vm.check.eq %v, %c2, "truncate unsigned i32 to unsigned i16" : i32
+ vm.return
+ }
+
+}
diff --git a/runtime/src/iree/vm/test/conversion_ops_f32.mlir b/runtime/src/iree/vm/test/conversion_ops_f32.mlir
new file mode 100644
index 0000000..a68bccc
--- /dev/null
+++ b/runtime/src/iree/vm/test/conversion_ops_f32.mlir
@@ -0,0 +1,119 @@
+vm.module @conversion_ops_f32 {
+
+ //===----------------------------------------------------------------------===//
+ // Casting and type conversion/emulation
+ //===----------------------------------------------------------------------===//
+
+ // 5.5 f32 (0x40b00000 hex) -> 1085276160 int32
+ vm.export @test_bitcast_i32_f32
+ vm.func @test_bitcast_i32_f32() {
+ %c1 = vm.const.i32 1085276160
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %v = vm.bitcast.i32.f32 %c1dno : i32 -> f32
+ %c2 = vm.const.f32 5.5
+ vm.check.eq %v, %c2, "bitcast i32 to f32" : f32
+ vm.return
+ }
+
+ // 1085276160 int32 (0x40b00000 hex) -> 5.5 f32
+ vm.export @test_bitcast_f32_i32
+ vm.func @test_bitcast_f32_i32() {
+ %c1 = vm.const.f32 5.5
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.bitcast.f32.i32 %c1dno : f32 -> i32
+ %c2 = vm.const.i32 1085276160
+ vm.check.eq %v, %c2, "bitcast f32 to i32" : i32
+ vm.return
+ }
+
+ vm.export @test_cast_si32_f32_int_max
+ vm.func @test_cast_si32_f32_int_max() {
+ %c1 = vm.const.i32 2147483647
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %v = vm.cast.si32.f32 %c1dno : i32 -> f32
+ %c2 = vm.const.f32 2147483647.0
+ vm.check.eq %v, %c2, "cast signed integer to a floating-point value" : f32
+ vm.return
+ }
+
+ vm.export @test_cast_si32_f32_int_min
+ vm.func @test_cast_si32_f32_int_min() {
+ %c1 = vm.const.i32 -2147483648
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %v = vm.cast.si32.f32 %c1dno : i32 -> f32
+ %c2 = vm.const.f32 -2147483648.0
+ vm.check.eq %v, %c2, "cast signed integer to a floating-point value" : f32
+ vm.return
+ }
+
+ vm.export @test_cast_ui32_f32_int_max
+ vm.func @test_cast_ui32_f32_int_max() {
+ %c1 = vm.const.i32 4294967295
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %v = vm.cast.ui32.f32 %c1dno : i32 -> f32
+ %c2 = vm.const.f32 4294967295.0
+ vm.check.eq %v, %c2, "cast unsigned integer to a floating-point value" : f32
+ vm.return
+ }
+
+ vm.export @test_cast_f32_si32_int_max
+ vm.func @test_cast_f32_si32_int_max() {
+ %c1 = vm.const.f32 2147483647.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.cast.f32.si32 %c1dno : f32 -> i32
+ %c2 = vm.const.i32 -2147483648
+ vm.check.eq %v, %c2, "cast floating-point value to a signed integer" : i32
+ vm.return
+ }
+
+ vm.export @test_cast_f32_si32_int_min
+ vm.func @test_cast_f32_si32_int_min() {
+ %c1 = vm.const.f32 -2147483648.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.cast.f32.si32 %c1dno : f32 -> i32
+ %c2 = vm.const.i32 -2147483648
+ vm.check.eq %v, %c2, "cast floating-point value to a signed integer" : i32
+ vm.return
+ }
+
+ vm.export @test_cast_f32_si32_away_from_zero_pos
+ vm.func @test_cast_f32_si32_away_from_zero_pos() {
+ %c1 = vm.const.f32 2.5
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.cast.f32.si32 %c1dno : f32 -> i32
+ %c2 = vm.const.i32 3
+ vm.check.eq %v, %c2, "cast floating-point value to a signed integer" : i32
+ vm.return
+ }
+
+ vm.export @test_cast_f32_si32_away_from_zero_neg
+ vm.func @test_cast_f32_si32_away_from_zero_neg() {
+ %c1 = vm.const.f32 -2.5
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.cast.f32.si32 %c1dno : f32 -> i32
+ %c2 = vm.const.i32 -3
+ vm.check.eq %v, %c2, "cast floating-point value to a signed integer" : i32
+ vm.return
+ }
+
+ vm.export @test_cast_f32_ui32_int_max
+ vm.func @test_cast_f32_ui32_int_max() {
+ %c1 = vm.const.f32 4294967295.0
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.cast.f32.ui32 %c1dno : f32 -> i32
+ %c2 = vm.const.i32 0
+ vm.check.eq %v, %c2, "cast floating-point value to an unsigned integer" : i32
+ vm.return
+ }
+
+ vm.export @test_cast_f32_ui32_away_from_zero
+ vm.func @test_cast_f32_ui32_away_from_zero() {
+ %c1 = vm.const.f32 2.5
+ %c1dno = util.do_not_optimize(%c1) : f32
+ %v = vm.cast.f32.ui32 %c1dno : f32 -> i32
+ %c2 = vm.const.i32 3
+ vm.check.eq %v, %c2, "cast floating-point value to a signed integer" : i32
+ vm.return
+ }
+
+}
diff --git a/runtime/src/iree/vm/test/conversion_ops_i64.mlir b/runtime/src/iree/vm/test/conversion_ops_i64.mlir
new file mode 100644
index 0000000..f790e5a
--- /dev/null
+++ b/runtime/src/iree/vm/test/conversion_ops_i64.mlir
@@ -0,0 +1,17 @@
+vm.module @conversion_ops_i64 {
+
+ //===----------------------------------------------------------------------===//
+ // ExtI64: Casting and type conversion/emulation
+ //===----------------------------------------------------------------------===//
+
+ vm.export @test_trunc_i64_i32
+ vm.func @test_trunc_i64_i32() {
+ %c1 = vm.const.i64 9223372036854775807
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %v = vm.trunc.i64.i32 %c1dno : i64 -> i32
+ %c2 = vm.const.i32 4294967295
+ vm.check.eq %v, %c2, "truncate unsigned i64 to unsigned i32" : i32
+ vm.return
+ }
+
+}
diff --git a/runtime/src/iree/vm/test/emitc/CMakeLists.txt b/runtime/src/iree/vm/test/emitc/CMakeLists.txt
new file mode 100644
index 0000000..68df57d
--- /dev/null
+++ b/runtime/src/iree/vm/test/emitc/CMakeLists.txt
@@ -0,0 +1,346 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+iree_add_all_subdirs()
+
+if(${IREE_ENABLE_EMITC})
+
+iree_cc_test(
+ NAME
+ module_test
+ SRCS
+ "module_test.cc"
+ DEPS
+ iree::base::cc
+ iree::base::logging
+ iree::testing::gtest
+ iree::testing::gtest_main
+ iree::vm
+ ::arithmetic_ops
+ ::arithmetic_ops_f32
+ ::arithmetic_ops_i64
+ ::assignment_ops
+ ::assignment_ops_f32
+ ::assignment_ops_i64
+ ::buffer_ops
+ ::call_ops
+ ::comparison_ops
+ ::comparison_ops_f32
+ ::comparison_ops_i64
+ ::control_flow_ops
+ ::conversion_ops
+ ::conversion_ops_f32
+ ::conversion_ops_i64
+ ::global_ops
+ ::global_ops_f32
+ ::global_ops_i64
+ ::list_ops
+ ::list_variant_ops
+ ::ref_ops
+ ::shift_ops
+ ::shift_ops_i64
+)
+
+iree_c_module(
+ NAME
+ arithmetic_ops
+ SRC
+ "../arithmetic_ops.mlir"
+ H_FILE_OUTPUT
+ "arithmetic_ops.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ arithmetic_ops_f32
+ SRC
+ "../arithmetic_ops_f32.mlir"
+ H_FILE_OUTPUT
+ "arithmetic_ops_f32.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ arithmetic_ops_i64
+ SRC
+ "../arithmetic_ops_i64.mlir"
+ H_FILE_OUTPUT
+ "arithmetic_ops_i64.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ assignment_ops
+ SRC
+ "../assignment_ops.mlir"
+ H_FILE_OUTPUT
+ "assignment_ops.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ assignment_ops_f32
+ SRC
+ "../assignment_ops_f32.mlir"
+ H_FILE_OUTPUT
+ "assignment_ops_f32.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ assignment_ops_i64
+ SRC
+ "../assignment_ops_i64.mlir"
+ H_FILE_OUTPUT
+ "assignment_ops_i64.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ buffer_ops
+ SRC
+ "../buffer_ops.mlir"
+ H_FILE_OUTPUT
+ "buffer_ops.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ call_ops
+ SRC
+ "../call_ops.mlir"
+ H_FILE_OUTPUT
+ "call_ops.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ comparison_ops
+ SRC
+ "../comparison_ops.mlir"
+ H_FILE_OUTPUT
+ "comparison_ops.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ comparison_ops_f32
+ SRC
+ "../comparison_ops_f32.mlir"
+ H_FILE_OUTPUT
+ "comparison_ops_f32.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ comparison_ops_i64
+ SRC
+ "../comparison_ops_i64.mlir"
+ H_FILE_OUTPUT
+ "comparison_ops_i64.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ control_flow_ops
+ SRC
+ "../control_flow_ops.mlir"
+ H_FILE_OUTPUT
+ "control_flow_ops.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ conversion_ops
+ SRC
+ "../conversion_ops.mlir"
+ H_FILE_OUTPUT
+ "conversion_ops.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ conversion_ops_f32
+ SRC
+ "../conversion_ops_f32.mlir"
+ H_FILE_OUTPUT
+ "conversion_ops_f32.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ conversion_ops_i64
+ SRC
+ "../conversion_ops_i64.mlir"
+ H_FILE_OUTPUT
+ "conversion_ops_i64.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ global_ops
+ SRC
+ "../global_ops.mlir"
+ H_FILE_OUTPUT
+ "global_ops.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ global_ops_f32
+ SRC
+ "../global_ops_f32.mlir"
+ H_FILE_OUTPUT
+ "global_ops_f32.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ global_ops_i64
+ SRC
+ "../global_ops_i64.mlir"
+ H_FILE_OUTPUT
+ "global_ops_i64.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ list_ops
+ SRC
+ "../list_ops.mlir"
+ H_FILE_OUTPUT
+ "list_ops.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ list_variant_ops
+ SRC
+ "../list_variant_ops.mlir"
+ H_FILE_OUTPUT
+ "list_variant_ops.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ ref_ops
+ SRC
+ "../ref_ops.mlir"
+ H_FILE_OUTPUT
+ "ref_ops.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ shift_ops
+ SRC
+ "../shift_ops.mlir"
+ H_FILE_OUTPUT
+ "shift_ops.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+iree_c_module(
+ NAME
+ shift_ops_i64
+ SRC
+ "../shift_ops_i64.mlir"
+ H_FILE_OUTPUT
+ "shift_ops_i64.h"
+ FLAGS
+ "-iree-vm-ir-to-c-module"
+ TRANSLATE_TOOL
+ iree_tools_iree-translate
+)
+
+endif()
diff --git a/runtime/src/iree/vm/test/emitc/module_test.cc b/runtime/src/iree/vm/test/emitc/module_test.cc
new file mode 100644
index 0000000..eee8d61
--- /dev/null
+++ b/runtime/src/iree/vm/test/emitc/module_test.cc
@@ -0,0 +1,184 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// TODO: We should not be including C implementation-only headers in a C++
+// module like this. In order to make this work for the moment across
+// runtime libraries that are strict, do a global using of the std namespace.
+// See #7605
+#include <cmath>
+using namespace std;
+
+#include "iree/base/logging.h"
+#include "iree/base/status_cc.h"
+#include "iree/testing/gtest.h"
+#include "iree/vm/api.h"
+#define EMITC_IMPLEMENTATION
+#include "iree/vm/test/emitc/arithmetic_ops.h"
+#include "iree/vm/test/emitc/arithmetic_ops_f32.h"
+#include "iree/vm/test/emitc/arithmetic_ops_i64.h"
+#include "iree/vm/test/emitc/assignment_ops.h"
+#include "iree/vm/test/emitc/assignment_ops_f32.h"
+#include "iree/vm/test/emitc/assignment_ops_i64.h"
+#include "iree/vm/test/emitc/buffer_ops.h"
+#include "iree/vm/test/emitc/call_ops.h"
+#include "iree/vm/test/emitc/comparison_ops.h"
+#include "iree/vm/test/emitc/comparison_ops_f32.h"
+#include "iree/vm/test/emitc/comparison_ops_i64.h"
+#include "iree/vm/test/emitc/control_flow_ops.h"
+#include "iree/vm/test/emitc/conversion_ops.h"
+#include "iree/vm/test/emitc/conversion_ops_f32.h"
+#include "iree/vm/test/emitc/conversion_ops_i64.h"
+#include "iree/vm/test/emitc/global_ops.h"
+#include "iree/vm/test/emitc/global_ops_f32.h"
+#include "iree/vm/test/emitc/global_ops_i64.h"
+#include "iree/vm/test/emitc/list_ops.h"
+#include "iree/vm/test/emitc/list_variant_ops.h"
+#include "iree/vm/test/emitc/ref_ops.h"
+#include "iree/vm/test/emitc/shift_ops.h"
+#include "iree/vm/test/emitc/shift_ops_i64.h"
+
+namespace {
+
+typedef iree_status_t (*create_function_t)(iree_allocator_t,
+ iree_vm_module_t**);
+
+struct TestParams {
+ std::string module_name;
+ std::string local_name;
+ create_function_t create_function;
+};
+
+struct ModuleDescription {
+ iree_vm_native_module_descriptor_t descriptor;
+ create_function_t create_function;
+};
+
+std::ostream& operator<<(std::ostream& os, const TestParams& params) {
+ std::string qualified_name = params.module_name + "." + params.local_name;
+ auto name_sv =
+ iree_make_string_view(qualified_name.data(), qualified_name.size());
+ iree_string_view_replace_char(name_sv, ':', '_');
+ iree_string_view_replace_char(name_sv, '.', '_');
+ return os << qualified_name;
+}
+
+std::vector<TestParams> GetModuleTestParams() {
+ std::vector<TestParams> test_params;
+
+ // TODO(simon-camp): get these automatically
+ std::vector<ModuleDescription> modules = {
+ {arithmetic_ops_descriptor_, arithmetic_ops_create},
+ {arithmetic_ops_f32_descriptor_, arithmetic_ops_f32_create},
+ {arithmetic_ops_i64_descriptor_, arithmetic_ops_i64_create},
+ {assignment_ops_descriptor_, assignment_ops_create},
+ {assignment_ops_f32_descriptor_, assignment_ops_f32_create},
+ {assignment_ops_i64_descriptor_, assignment_ops_i64_create},
+ {buffer_ops_descriptor_, buffer_ops_create},
+ {call_ops_descriptor_, call_ops_create},
+ {comparison_ops_descriptor_, comparison_ops_create},
+ {comparison_ops_f32_descriptor_, comparison_ops_f32_create},
+ {comparison_ops_i64_descriptor_, comparison_ops_i64_create},
+ {control_flow_ops_descriptor_, control_flow_ops_create},
+ {conversion_ops_descriptor_, conversion_ops_create},
+ {conversion_ops_f32_descriptor_, conversion_ops_f32_create},
+ {conversion_ops_i64_descriptor_, conversion_ops_i64_create},
+ {global_ops_descriptor_, global_ops_create},
+ {global_ops_f32_descriptor_, global_ops_f32_create},
+ {global_ops_i64_descriptor_, global_ops_i64_create},
+ {list_ops_descriptor_, list_ops_create},
+ {list_variant_ops_descriptor_, list_variant_ops_create},
+ {ref_ops_descriptor_, ref_ops_create},
+ {shift_ops_descriptor_, shift_ops_create},
+ {shift_ops_i64_descriptor_, shift_ops_i64_create}};
+
+ for (size_t i = 0; i < modules.size(); i++) {
+ iree_vm_native_module_descriptor_t descriptor = modules[i].descriptor;
+ create_function_t function = modules[i].create_function;
+
+ std::string module_name =
+ std::string(descriptor.module_name.data, descriptor.module_name.size);
+
+ for (iree_host_size_t i = 0; i < descriptor.export_count; i++) {
+ iree_vm_native_export_descriptor_t export_descriptor =
+ descriptor.exports[i];
+ std::string local_name = std::string(export_descriptor.local_name.data,
+ export_descriptor.local_name.size);
+ test_params.push_back({module_name, local_name, function});
+ }
+ }
+
+ return test_params;
+}
+
+class VMCModuleTest : public ::testing::Test,
+ public ::testing::WithParamInterface<TestParams> {
+ protected:
+ virtual void SetUp() {
+ const auto& test_params = GetParam();
+
+ IREE_CHECK_OK(iree_vm_instance_create(iree_allocator_system(), &instance_));
+
+ iree_vm_module_t* module_ = nullptr;
+ IREE_CHECK_OK(
+ test_params.create_function(iree_allocator_system(), &module_));
+
+ std::vector<iree_vm_module_t*> modules = {module_};
+ IREE_CHECK_OK(iree_vm_context_create_with_modules(
+ instance_, IREE_VM_CONTEXT_FLAG_NONE, modules.data(), modules.size(),
+ iree_allocator_system(), &context_));
+
+ iree_vm_module_release(module_);
+ }
+
+ virtual void TearDown() {
+ iree_vm_context_release(context_);
+ iree_vm_instance_release(instance_);
+ }
+
+ iree_status_t RunFunction(std::string module_name, std::string local_name) {
+ std::string qualified_name = module_name + "." + local_name;
+ iree_vm_function_t function;
+ IREE_CHECK_OK(iree_vm_context_resolve_function(
+ context_,
+ iree_string_view_t{qualified_name.data(), qualified_name.size()},
+ &function));
+
+ return iree_vm_invoke(context_, function, IREE_VM_INVOCATION_FLAG_NONE,
+ /*policy=*/nullptr, /*inputs=*/nullptr,
+ /*outputs=*/nullptr, iree_allocator_system());
+ }
+
+ iree_vm_instance_t* instance_ = nullptr;
+ iree_vm_context_t* context_ = nullptr;
+};
+
+TEST_P(VMCModuleTest, Check) {
+ const auto& test_params = GetParam();
+ bool expect_failure = test_params.local_name.find("fail_") == 0;
+
+ iree::Status result =
+ RunFunction(test_params.module_name, test_params.local_name);
+ if (result.ok()) {
+ if (expect_failure) {
+ GTEST_FAIL() << "Function expected failure but succeeded";
+ } else {
+ GTEST_SUCCEED();
+ }
+ } else {
+ if (expect_failure) {
+ GTEST_SUCCEED();
+ } else {
+ GTEST_FAIL() << "Function expected success but failed with error: "
+ << result.ToString();
+ }
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(VMIRFunctions, VMCModuleTest,
+ ::testing::ValuesIn(GetModuleTestParams()),
+ ::testing::PrintToStringParamName());
+
+} // namespace
diff --git a/runtime/src/iree/vm/test/global_ops.mlir b/runtime/src/iree/vm/test/global_ops.mlir
new file mode 100644
index 0000000..a7c718b
--- /dev/null
+++ b/runtime/src/iree/vm/test/global_ops.mlir
@@ -0,0 +1,50 @@
+vm.module @global_ops {
+
+ //===--------------------------------------------------------------------===//
+ // global.i32
+ //===--------------------------------------------------------------------===//
+
+ vm.global.i32 private @c42 = 42 : i32
+ vm.global.i32 private mutable @c107_mut = 107 : i32
+ vm.global.ref mutable @g0 : !vm.buffer
+ // TODO(simon-camp): Add test for initializer
+
+ vm.rodata private @buffer dense<[1, 2, 3]> : tensor<3xi8>
+
+ // TODO(simon-camp) This test gets constant folded
+ vm.export @test_global_load_i32
+ vm.func @test_global_load_i32() {
+ %actual = vm.global.load.i32 @c42 : i32
+ %expected = vm.const.i32 42
+ vm.check.eq %actual, %expected, "@c42 != 42" : i32
+ vm.return
+ }
+
+ vm.export @test_global_load_ref
+ vm.func @test_global_load_ref() {
+ %actual = vm.global.load.ref @g0 : !vm.buffer
+ %expected = vm.const.ref.zero : !vm.buffer
+ %expecteddno = util.do_not_optimize(%expected) : !vm.buffer
+ vm.check.eq %actual, %expecteddno : !vm.buffer
+ vm.return
+ }
+
+ vm.export @test_global_store_i32
+ vm.func @test_global_store_i32() {
+ %c17 = vm.const.i32 17
+ vm.global.store.i32 %c17, @c107_mut : i32
+ %actual = vm.global.load.i32 @c107_mut : i32
+ vm.check.eq %actual, %c17, "@c107_mut != 17" : i32
+ vm.return
+ }
+
+ vm.export @test_global_store_ref
+ vm.func @test_global_store_ref() {
+ %ref_buffer = vm.const.ref.rodata @buffer : !vm.buffer
+ vm.global.store.ref %ref_buffer, @g0 : !vm.buffer
+ %actual = vm.global.load.ref @g0 : !vm.buffer
+ vm.check.eq %actual, %ref_buffer, "@g0 != buffer" : !vm.buffer
+ vm.return
+ }
+
+}
diff --git a/runtime/src/iree/vm/test/global_ops_f32.mlir b/runtime/src/iree/vm/test/global_ops_f32.mlir
new file mode 100644
index 0000000..865f711
--- /dev/null
+++ b/runtime/src/iree/vm/test/global_ops_f32.mlir
@@ -0,0 +1,28 @@
+vm.module @global_ops_f32 {
+
+ //===--------------------------------------------------------------------===//
+ // global.f32
+ //===--------------------------------------------------------------------===//
+
+ vm.global.f32 private @c42 = 42.5 : f32
+ vm.global.f32 private mutable @c107_mut = 107.5 : f32
+ // TODO(simon-camp): Add test for initializer
+
+ vm.export @test_global_load_f32
+ vm.func @test_global_load_f32() {
+ %actual = vm.global.load.f32 @c42 : f32
+ %expected = vm.const.f32 42.5
+ vm.check.eq %actual, %expected, "@c42 != 42.5" : f32
+ vm.return
+ }
+
+ vm.export @test_global_store_f32
+ vm.func @test_global_store_f32() {
+ %c17 = vm.const.f32 17.5
+ vm.global.store.f32 %c17, @c107_mut : f32
+ %actual = vm.global.load.f32 @c107_mut : f32
+ vm.check.eq %actual, %c17, "@c107_mut != 17.5" : f32
+ vm.return
+ }
+
+}
diff --git a/runtime/src/iree/vm/test/global_ops_i64.mlir b/runtime/src/iree/vm/test/global_ops_i64.mlir
new file mode 100644
index 0000000..b567d71
--- /dev/null
+++ b/runtime/src/iree/vm/test/global_ops_i64.mlir
@@ -0,0 +1,28 @@
+vm.module @global_ops_i64 {
+
+ //===--------------------------------------------------------------------===//
+ // global.i64
+ //===--------------------------------------------------------------------===//
+
+ vm.global.i64 private @c42 = 42 : i64
+ vm.global.i64 private mutable @c107_mut = 107 : i64
+ // TODO(simon-camp): Add test for initializer
+
+ vm.export @test_global_load_i64
+ vm.func @test_global_load_i64() {
+ %actual = vm.global.load.i64 @c42 : i64
+ %expected = vm.const.i64 42
+ vm.check.eq %actual, %expected, "@c42 != 42" : i64
+ vm.return
+ }
+
+ vm.export @test_global_store_i64
+ vm.func @test_global_store_i64() {
+ %c17 = vm.const.i64 17
+ vm.global.store.i64 %c17, @c107_mut : i64
+ %actual = vm.global.load.i64 @c107_mut : i64
+ vm.check.eq %actual, %c17, "@c107_mut != 17" : i64
+ vm.return
+ }
+
+}
diff --git a/runtime/src/iree/vm/test/list_ops.mlir b/runtime/src/iree/vm/test/list_ops.mlir
new file mode 100644
index 0000000..81e6b95
--- /dev/null
+++ b/runtime/src/iree/vm/test/list_ops.mlir
@@ -0,0 +1,124 @@
+vm.module @list_ops {
+
+ //===--------------------------------------------------------------------===//
+ // vm.list.* with I8 types
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_i8
+ vm.func @test_i8() {
+ %c42 = vm.const.i32 42
+ %c100 = vm.const.i32 100
+ %c0 = vm.const.i32 0
+ %list = vm.list.alloc %c42 : (i32) -> !vm.list<i8>
+ vm.list.reserve %list, %c100 : (!vm.list<i8>, i32)
+ %sz = vm.list.size %list : (!vm.list<i8>) -> i32
+ %sz_dno = util.do_not_optimize(%sz) : i32
+ vm.check.eq %sz_dno, %c0, "list<i8>.empty.size()=0" : i32
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.list.* with I16 types
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_i16
+ vm.func @test_i16() {
+ %c0 = vm.const.i32 0
+ %c1 = vm.const.i32 1
+ %c27 = vm.const.i32 27
+ %list = vm.list.alloc %c1 : (i32) -> !vm.list<i16>
+ vm.list.resize %list, %c1 : (!vm.list<i16>, i32)
+ vm.list.set.i32 %list, %c0, %c27 : (!vm.list<i16>, i32, i32)
+ %v = vm.list.get.i32 %list, %c0 : (!vm.list<i16>, i32) -> i32
+ vm.check.eq %v, %c27, "list<i16>.empty.set(0, 27).get(0)=27" : i32
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.list.* with I32 types
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_i32
+ vm.func @test_i32() {
+ %c42 = vm.const.i32 42
+ %list = vm.list.alloc %c42 : (i32) -> !vm.list<i32>
+ %sz = vm.list.size %list : (!vm.list<i32>) -> i32
+ %c100 = vm.const.i32 100
+ %c101 = vm.const.i32 101
+ vm.list.resize %list, %c101 : (!vm.list<i32>, i32)
+ vm.list.set.i32 %list, %c100, %c42 : (!vm.list<i32>, i32, i32)
+ %v = vm.list.get.i32 %list, %c100 : (!vm.list<i32>, i32) -> i32
+ vm.check.eq %v, %c42, "list<i32>.empty.set(100, 42).get(100)=42" : i32
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.list.* with ref types
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_ref
+ vm.func @test_ref() {
+ // TODO(benvanik): test vm.list with ref types.
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Multiple lists within the same block
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_multiple_lists
+ vm.func @test_multiple_lists() {
+ %c0 = vm.const.i32 0
+ %c1 = vm.const.i32 1
+ %c27 = vm.const.i32 27
+ %c42 = vm.const.i32 42
+
+ // These allocs shouldn't be CSE'd.
+ %list0 = vm.list.alloc %c1 : (i32) -> !vm.list<i8>
+ %list1 = vm.list.alloc %c1 : (i32) -> !vm.list<i8>
+ vm.list.resize %list0, %c1 : (!vm.list<i8>, i32)
+ vm.list.resize %list1, %c1 : (!vm.list<i8>, i32)
+ vm.list.set.i32 %list0, %c0, %c27 : (!vm.list<i8>, i32, i32)
+ vm.list.set.i32 %list1, %c0, %c42 : (!vm.list<i8>, i32, i32)
+ %res0 = vm.list.get.i32 %list0, %c0 : (!vm.list<i8>, i32) -> i32
+ %res1 = vm.list.get.i32 %list1, %c0 : (!vm.list<i8>, i32) -> i32
+ vm.check.eq %res0, %c27, "list0.get(0)=27" : i32
+ vm.check.eq %res1, %c42, "list1.get(0)=42" : i32
+
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Failure tests
+ //===--------------------------------------------------------------------===//
+
+ vm.export @fail_uninitialized_access
+ vm.func @fail_uninitialized_access() {
+ %c0 = vm.const.i32 0
+ %c1 = vm.const.i32 1
+ %list = vm.list.alloc %c1 : (i32) -> !vm.list<i32>
+ vm.list.set.i32 %list, %c0, %c1 : (!vm.list<i32>, i32, i32)
+ vm.return
+ }
+
+ vm.export @fail_out_of_bounds_read
+ vm.func @fail_out_of_bounds_read() {
+ %c1 = vm.const.i32 1
+ %list = vm.list.alloc %c1 : (i32) -> !vm.list<i32>
+ vm.list.resize %list, %c1 : (!vm.list<i32>, i32)
+ %v = vm.list.get.i32 %list, %c1 : (!vm.list<i32>, i32) -> i32
+ %v_dno = util.do_not_optimize(%v) : i32
+ // Add a dummy use of %v_dno to please recent versions of clang for the C target
+ vm.list.set.i32 %list, %c1, %v_dno : (!vm.list<i32>, i32, i32)
+ vm.return
+ }
+
+ vm.export @fail_out_of_bounds_write
+ vm.func @fail_out_of_bounds_write() {
+ %c1 = vm.const.i32 1
+ %list = vm.list.alloc %c1 : (i32) -> !vm.list<i32>
+ vm.list.resize %list, %c1 : (!vm.list<i32>, i32)
+ vm.list.set.i32 %list, %c1, %c1 : (!vm.list<i32>, i32, i32)
+ vm.return
+ }
+}
diff --git a/runtime/src/iree/vm/test/list_ops_i64.mlir b/runtime/src/iree/vm/test/list_ops_i64.mlir
new file mode 100644
index 0000000..97f8681
--- /dev/null
+++ b/runtime/src/iree/vm/test/list_ops_i64.mlir
@@ -0,0 +1,21 @@
+vm.module @list_ops_i64 {
+
+ //===--------------------------------------------------------------------===//
+ // vm.list.* with I64 types
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_i64
+ vm.func @test_i64() {
+ %capacity = vm.const.i32 42
+ %index = vm.const.i32 41
+ %max_int_plus_1 = vm.const.i64 2147483648
+ %list = vm.list.alloc %capacity : (i32) -> !vm.list<i64>
+ %sz = vm.list.size %list : (!vm.list<i64>) -> i32
+ vm.list.resize %list, %capacity : (!vm.list<i64>, i32)
+ vm.list.set.i64 %list, %index, %max_int_plus_1 : (!vm.list<i64>, i32, i64)
+ %v = vm.list.get.i64 %list, %index : (!vm.list<i64>, i32) -> i64
+ vm.check.eq %v, %max_int_plus_1, "list<i64>.empty.set(41, MAX_INT_PLUS_1).get(41)=MAX_INT_PLUS_1" : i64
+ vm.return
+ }
+
+}
diff --git a/runtime/src/iree/vm/test/list_variant_ops.mlir b/runtime/src/iree/vm/test/list_variant_ops.mlir
new file mode 100644
index 0000000..5a8d23c
--- /dev/null
+++ b/runtime/src/iree/vm/test/list_variant_ops.mlir
@@ -0,0 +1,162 @@
+vm.module @list_variant_ops {
+
+ //===--------------------------------------------------------------------===//
+ // vm.list.* with list types (nesting)
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_listception
+ vm.func @test_listception() {
+ %c0 = vm.const.i32 0
+ %c1 = vm.const.i32 1
+ %c2 = vm.const.i32 2
+ %c3 = vm.const.i32 3
+ %c100 = vm.const.i32 100
+ %c101 = vm.const.i32 101
+ %c102 = vm.const.i32 102
+
+ // [100, 101, 102]
+ %inner0 = vm.list.alloc %c3 : (i32) -> !vm.list<i32>
+ vm.list.resize %inner0, %c3 : (!vm.list<i32>, i32)
+ vm.list.set.i32 %inner0, %c0, %c100 : (!vm.list<i32>, i32, i32)
+ vm.list.set.i32 %inner0, %c1, %c101 : (!vm.list<i32>, i32, i32)
+ vm.list.set.i32 %inner0, %c2, %c102 : (!vm.list<i32>, i32, i32)
+
+ // [102, 101, 100]
+ %inner1 = vm.list.alloc %c3 : (i32) -> !vm.list<i32>
+ vm.list.resize %inner1, %c3 : (!vm.list<i32>, i32)
+ vm.list.set.i32 %inner1, %c0, %c102 : (!vm.list<i32>, i32, i32)
+ vm.list.set.i32 %inner1, %c1, %c101 : (!vm.list<i32>, i32, i32)
+ vm.list.set.i32 %inner1, %c2, %c100 : (!vm.list<i32>, i32, i32)
+
+ // [ [100, 101, 102], [102, 101, 100] ]
+ %capacity = vm.const.i32 8
+ %outer = vm.list.alloc %capacity : (i32) -> !vm.list<!vm.list<i32>>
+ vm.list.resize %outer, %c2 : (!vm.list<!vm.list<i32>>, i32)
+ vm.list.set.ref %outer, %c0, %inner0 : (!vm.list<!vm.list<i32>>, i32, !vm.list<i32>)
+ vm.list.set.ref %outer, %c1, %inner1 : (!vm.list<!vm.list<i32>>, i32, !vm.list<i32>)
+
+ %inner0_ret = vm.list.get.ref %outer, %c0 : (!vm.list<!vm.list<i32>>, i32) -> !vm.list<i32>
+ vm.check.eq %inner0_ret, %inner0 : !vm.list<i32>
+ %inner0_e2 = vm.list.get.i32 %inner0_ret, %c2 : (!vm.list<i32>, i32) -> i32
+ vm.check.eq %inner0_e2, %c102 : i32
+
+ %inner1_ret = vm.list.get.ref %outer, %c1 : (!vm.list<!vm.list<i32>>, i32) -> !vm.list<i32>
+ vm.check.eq %inner1_ret, %inner1 : !vm.list<i32>
+ %inner1_e2 = vm.list.get.i32 %inner1_ret, %c2 : (!vm.list<i32>, i32) -> i32
+ vm.check.eq %inner1_e2, %c100 : i32
+
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.list.* with variant types
+ //===--------------------------------------------------------------------===//
+
+ vm.rodata private @byte_buffer dense<[1, 2, 3]> : tensor<3xi32>
+
+ vm.export @test_variant
+ vm.func @test_variant() {
+ %capacity = vm.const.i32 42
+ %list = vm.list.alloc %capacity : (i32) -> !vm.list<?>
+ vm.list.resize %list, %capacity : (!vm.list<?>, i32)
+
+ // Access element 10 as an i32.
+ %c10 = vm.const.i32 10
+ %v10_i32 = vm.const.i32 1234
+ vm.list.set.i32 %list, %c10, %v10_i32 : (!vm.list<?>, i32, i32)
+ %e10_i32 = vm.list.get.i32 %list, %c10 : (!vm.list<?>, i32) -> i32
+ vm.check.eq %e10_i32, %v10_i32 : i32
+
+ // Access element 10 as an i64.
+ %v10_i64 = vm.const.i64 1234
+ vm.list.set.i64 %list, %c10, %v10_i64 : (!vm.list<?>, i32, i64)
+ %e10_i64 = vm.list.get.i64 %list, %c10 : (!vm.list<?>, i32) -> i64
+ vm.check.eq %e10_i64, %v10_i64 : i64
+
+ // Access element 11 as a ref object.
+ %c11 = vm.const.i32 11
+ %v11_buf = vm.const.ref.rodata @byte_buffer : !vm.buffer
+ vm.list.set.ref %list, %c11, %v11_buf : (!vm.list<?>, i32, !vm.buffer)
+ %e11_buf = vm.list.get.ref %list, %c11 : (!vm.list<?>, i32) -> !vm.buffer
+ vm.check.eq %e11_buf, %v11_buf : !vm.buffer
+
+ // Access element 11 as a different kind of ref object (incompatible).
+ // Should return null.
+ %e11_bad = vm.list.get.ref %list, %c11 : (!vm.list<?>, i32) -> !vm.list<i8>
+ %null = vm.const.ref.zero : !vm.list<i8>
+ vm.check.eq %e11_bad, %null : !vm.list<i8>
+
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Failure tests
+ //===--------------------------------------------------------------------===//
+
+ vm.export @fail_uninitialized_access
+ vm.func @fail_uninitialized_access() {
+ %c0 = vm.const.i32 0
+ %c1 = vm.const.i32 1
+
+ %ref = vm.const.ref.rodata @byte_buffer : !vm.buffer
+ %list = vm.list.alloc %c1 : (i32) -> !vm.list<?>
+
+ vm.list.set.ref %list, %c0, %ref : (!vm.list<?>, i32, !vm.buffer)
+ vm.return
+ }
+
+ vm.export @fail_out_of_bounds_read
+ vm.func @fail_out_of_bounds_read() {
+ %c1 = vm.const.i32 1
+
+ %list = vm.list.alloc %c1 : (i32) -> !vm.list<?>
+ vm.list.resize %list, %c1 : (!vm.list<?>, i32)
+
+ %ref = vm.list.get.ref %list, %c1 : (!vm.list<?>, i32) -> !vm.buffer
+ %ref_dno = util.do_not_optimize(%ref) : !vm.buffer
+ vm.return
+ }
+
+ vm.export @fail_out_of_bounds_write
+ vm.func @fail_out_of_bounds_write() {
+ %c0 = vm.const.i32 0
+ %c1 = vm.const.i32 1
+
+ %ref = vm.const.ref.rodata @byte_buffer : !vm.buffer
+ %list = vm.list.alloc %c1 : (i32) -> !vm.list<?>
+ vm.list.resize %list, %c1 : (!vm.list<?>, i32)
+
+ vm.list.set.ref %list, %c1, %ref : (!vm.list<?>, i32, !vm.buffer)
+ vm.return
+ }
+
+ vm.export @fail_variant_slot_change
+ vm.func @fail_variant_slot_change() {
+ %capacity = vm.const.i32 42
+ %list = vm.list.alloc %capacity : (i32) -> !vm.list<?>
+ vm.list.resize %list, %capacity : (!vm.list<?>, i32)
+
+ %c10 = vm.const.i32 10
+
+ // Access element 10 as an i32.
+ %v10_i32 = vm.const.i32 1234
+ vm.list.set.i32 %list, %c10, %v10_i32 : (!vm.list<?>, i32, i32)
+ %e10_i32 = vm.list.get.i32 %list, %c10 : (!vm.list<?>, i32) -> i32
+ vm.check.eq %e10_i32, %v10_i32 : i32
+
+ // Access element 10 as a ref object.
+ %v10_buf = vm.const.ref.rodata @byte_buffer : !vm.buffer
+ vm.list.set.ref %list, %c10, %v10_buf : (!vm.list<?>, i32, !vm.buffer)
+ %e10_buf = vm.list.get.ref %list, %c10 : (!vm.list<?>, i32) -> !vm.buffer
+ vm.check.eq %e10_buf, %v10_buf : !vm.buffer
+
+ // Accessing it as an i32 now that it stores the ref should fail at runtime.
+ // TODO(benvanik): support type queries and/or make this silently return 0.
+ %e10_any = vm.list.get.i32 %list, %c10 : (!vm.list<?>, i32) -> i32
+ // -- FAILURE HERE --
+ %zero = vm.const.i32.zero
+ vm.check.eq %e10_any, %zero : i32
+
+ vm.return
+ }
+}
diff --git a/runtime/src/iree/vm/test/ref_ops.mlir b/runtime/src/iree/vm/test/ref_ops.mlir
new file mode 100644
index 0000000..862a75e
--- /dev/null
+++ b/runtime/src/iree/vm/test/ref_ops.mlir
@@ -0,0 +1,47 @@
+vm.module @ref_ops {
+
+ vm.rodata private @buffer_i8 dense<[1, 2, 3]> : tensor<3xi8>
+ vm.rodata private @buffer_i32 dense<[1, 2, 3]> : tensor<3xi32>
+
+ vm.export @test_zero_ref_eq
+ vm.func @test_zero_ref_eq() {
+ %ref = vm.const.ref.zero : !vm.ref<?>
+ %ref_dno = util.do_not_optimize(%ref) : !vm.ref<?>
+ vm.check.eq %ref_dno, %ref_dno : !vm.ref<?>
+ vm.return
+ }
+
+ // TODO(simon-camp): In the C target we run the DropCompilerHintsPass after
+ // ordinal allocation and vm to EmitC conversion to prevent constant folding
+ // of the tests during the lattter. This means we would need to add a pattern
+ // that inserts calls to `iree_vm_ref_retain` for operand/result pairs of the
+ // do_not_optimize op.
+ vm.export @test_ref_eq attributes {emitc.exclude}
+ vm.func @test_ref_eq() {
+ %ref_1 = vm.const.ref.rodata @buffer_i8 : !vm.buffer
+ %ref_1_dno = util.do_not_optimize(%ref_1) : !vm.buffer
+ %ref_2 = vm.const.ref.rodata @buffer_i8 : !vm.buffer
+ %ref_2_dno = util.do_not_optimize(%ref_2) : !vm.buffer
+ vm.check.eq %ref_1_dno, %ref_2_dno : !vm.buffer
+ vm.return
+ }
+
+ vm.export @test_ref_ne
+ vm.func @test_ref_ne() {
+ %ref_i8 = vm.const.ref.rodata @buffer_i8 : !vm.buffer
+ %ref_i8_dno = util.do_not_optimize(%ref_i8) : !vm.buffer
+ %ref_i32 = vm.const.ref.rodata @buffer_i32 : !vm.buffer
+ %ref_i32_dno = util.do_not_optimize(%ref_i32) : !vm.buffer
+ vm.check.ne %ref_i8_dno, %ref_i32_dno : !vm.buffer
+ vm.return
+ }
+
+ vm.export @test_ref_nz
+ vm.func @test_ref_nz() {
+ %ref = vm.const.ref.rodata @buffer_i8 : !vm.buffer
+ %ref_dno = util.do_not_optimize(%ref) : !vm.buffer
+ vm.check.nz %ref_dno : !vm.buffer
+ vm.return
+ }
+
+}
diff --git a/runtime/src/iree/vm/test/shift_ops.mlir b/runtime/src/iree/vm/test/shift_ops.mlir
new file mode 100644
index 0000000..4905ea9
--- /dev/null
+++ b/runtime/src/iree/vm/test/shift_ops.mlir
@@ -0,0 +1,38 @@
+vm.module @shift_ops {
+
+ //===--------------------------------------------------------------------===//
+ // Native bitwise shifts and rotates
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_shl_i32
+ vm.func @test_shl_i32() {
+ %c1 = vm.const.i32 1
+ %c1dno = util.do_not_optimize(%c1) : i32
+ %c2 = vm.const.i32 2
+ %v = vm.shl.i32 %c1dno, %c2 : i32
+ %c4 = vm.const.i32 4
+ vm.check.eq %v, %c4, "1<<2=4" : i32
+ vm.return
+ }
+
+ vm.export @test_shr_i32s
+ vm.func @test_shr_i32s() {
+ %cn1 = vm.const.i32 -1
+ %cn1dno = util.do_not_optimize(%cn1) : i32
+ %c2 = vm.const.i32 2
+ %v = vm.shr.i32.s %cn1dno, %c2 : i32
+ vm.check.eq %v, %cn1dno, "-1>>2=-1" : i32
+ vm.return
+ }
+
+ vm.export @test_shr_i32u
+ vm.func @test_shr_i32u() {
+ %c4 = vm.const.i32 4
+ %c4dno = util.do_not_optimize(%c4) : i32
+ %c2 = vm.const.i32 2
+ %v = vm.shr.i32.u %c4dno, %c2 : i32
+ %c1 = vm.const.i32 1
+ vm.check.eq %v, %c1, "4>>2=1" : i32
+ vm.return
+ }
+}
diff --git a/runtime/src/iree/vm/test/shift_ops_i64.mlir b/runtime/src/iree/vm/test/shift_ops_i64.mlir
new file mode 100644
index 0000000..6632b2e
--- /dev/null
+++ b/runtime/src/iree/vm/test/shift_ops_i64.mlir
@@ -0,0 +1,39 @@
+vm.module @shift_ops_i64 {
+
+ //===--------------------------------------------------------------------===//
+ // ExtI64: Native bitwise shifts and rotates
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_shl_i64
+ vm.func @test_shl_i64() {
+ %c1 = vm.const.i64 1
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %shamt = vm.const.i32 2
+ %v = vm.shl.i64 %c1dno, %shamt : i64
+ %c4 = vm.const.i64 4
+ vm.check.eq %v, %c4, "1<<2=4" : i64
+ vm.return
+ }
+
+ vm.export @test_shr_i64s
+ vm.func @test_shr_i64s() {
+ %c1 = vm.const.i64 -1
+ %c1dno = util.do_not_optimize(%c1) : i64
+ %shamt = vm.const.i32 2
+ %v = vm.shr.i64.s %c1dno, %shamt : i64
+ %cn1 = vm.const.i64 -1
+ vm.check.eq %v, %cn1, "-1>>2=-1" : i64
+ vm.return
+ }
+
+ vm.export @test_shr_i64u
+ vm.func @test_shr_i64u() {
+ %c4 = vm.const.i64 4
+ %c4dno = util.do_not_optimize(%c4) : i64
+ %shamt = vm.const.i32 2
+ %v = vm.shr.i64.u %c4dno, %shamt : i64
+ %c1 = vm.const.i64 1
+ vm.check.eq %v, %c1, "4>>2=1" : i64
+ vm.return
+ }
+}
diff --git a/runtime/src/iree/vm/type_def.h b/runtime/src/iree/vm/type_def.h
new file mode 100644
index 0000000..d8cc8b6
--- /dev/null
+++ b/runtime/src/iree/vm/type_def.h
@@ -0,0 +1,91 @@
+// Copyright 2020 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_TYPE_DEF_H_
+#define IREE_VM_TYPE_DEF_H_
+
+#include <stdint.h>
+
+#include "iree/vm/ref.h"
+#include "iree/vm/value.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Describes a type in the type table, mapping from a local module type ID to
+// either a primitive value type or registered ref type.
+//
+// * ?: variant (value_type/ref_type == 0)
+// * i8: primitive value (value_type != 0)
+// * !vm.ref<?>: any ref value (ref_type == IREE_VM_REF_TYPE_ANY)
+// * !vm.ref<!foo>: ref value of type !foo (ref_type > 0)
+typedef struct iree_vm_type_def_t {
+ iree_vm_value_type_t value_type : 8;
+ iree_vm_ref_type_t ref_type : 24;
+} iree_vm_type_def_t;
+
+static inline iree_vm_type_def_t iree_vm_type_def_make_variant_type(void) {
+ iree_vm_type_def_t result;
+ result.value_type = IREE_VM_VALUE_TYPE_NONE;
+ result.ref_type = IREE_VM_REF_TYPE_NULL;
+ return result;
+}
+
+static inline iree_vm_type_def_t iree_vm_type_def_make_value_type(
+ iree_vm_value_type_t value_type) {
+ iree_vm_type_def_t result;
+ result.value_type = value_type;
+ result.ref_type = IREE_VM_REF_TYPE_NULL;
+ return result;
+}
+
+static inline iree_vm_type_def_t iree_vm_type_def_make_ref_type(
+ iree_vm_ref_type_t ref_type) {
+ iree_vm_type_def_t result;
+ result.value_type = IREE_VM_VALUE_TYPE_NONE;
+ result.ref_type = ref_type;
+ return result;
+}
+
+#define iree_vm_type_def_is_value(v) \
+ ((v)->value_type != IREE_VM_VALUE_TYPE_NONE)
+#define iree_vm_type_def_is_ref(v) ((v)->ref_type != IREE_VM_REF_TYPE_NULL)
+#define iree_vm_type_def_is_variant(v) \
+ ((v)->value_type == IREE_VM_VALUE_TYPE_NONE && \
+ (v)->ref_type == IREE_VM_REF_TYPE_NULL)
+
+// An variant value that can be either a primitive value type or a ref type.
+// Each variant value stores its type but users are required to check the type
+// prior to accessing any of the data.
+typedef struct iree_vm_variant_t {
+ iree_vm_type_def_t type;
+ union {
+ // TODO(benvanik): replace with iree_vm_value_t.
+ int8_t i8;
+ int16_t i16;
+ int32_t i32;
+ int64_t i64;
+ float f32;
+ double f64;
+ iree_vm_ref_t ref;
+
+ uint8_t value_storage[IREE_VM_VALUE_STORAGE_SIZE]; // max size of all value
+ // types
+ };
+} iree_vm_variant_t;
+
+#define iree_vm_variant_empty() \
+ { {IREE_VM_VALUE_TYPE_NONE, IREE_VM_REF_TYPE_NULL}, {0}, }
+#define iree_vm_variant_is_value(v) iree_vm_type_def_is_value(&(v).type)
+#define iree_vm_variant_is_ref(v) iree_vm_type_def_is_ref(&(v).type)
+#define iree_vm_variant_is_empty(v) iree_vm_type_def_is_variant(&(v).type)
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_VM_TYPE_DEF_H_
diff --git a/runtime/src/iree/vm/value.h b/runtime/src/iree/vm/value.h
new file mode 100644
index 0000000..445d80f
--- /dev/null
+++ b/runtime/src/iree/vm/value.h
@@ -0,0 +1,133 @@
+// Copyright 2019 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_VALUE_H_
+#define IREE_VM_VALUE_H_
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// TODO(benvanik): support variable size in modules. vm.imports would need index
+// type and we'd have to make sure all native modules used this size type. It
+// would be a compiler runtime flag and runtime compile flag.
+typedef int32_t iree_vm_size_t;
+
+// Defines the type of a primitive value.
+typedef enum iree_vm_value_type_e {
+ // Not a value type.
+ IREE_VM_VALUE_TYPE_NONE = 0,
+ // int8_t.
+ IREE_VM_VALUE_TYPE_I8 = 1,
+ // int16_t.
+ IREE_VM_VALUE_TYPE_I16 = 2,
+ // int32_t.
+ IREE_VM_VALUE_TYPE_I32 = 3,
+ // int64_t.
+ IREE_VM_VALUE_TYPE_I64 = 4,
+ // float.
+ IREE_VM_VALUE_TYPE_F32 = 5,
+ // double.
+ IREE_VM_VALUE_TYPE_F64 = 6,
+
+ IREE_VM_VALUE_TYPE_MAX = IREE_VM_VALUE_TYPE_F64,
+ IREE_VM_VALUE_TYPE_COUNT = IREE_VM_VALUE_TYPE_MAX + 1, // used for lookup
+} iree_vm_value_type_t;
+
+// Maximum size, in bytes, of any value type we can represent.
+#define IREE_VM_VALUE_STORAGE_SIZE 8
+
+// A variant value type.
+typedef struct iree_vm_value_t {
+ iree_vm_value_type_t type;
+ union {
+ int8_t i8;
+ int16_t i16;
+ int32_t i32;
+ int64_t i64;
+ float f32;
+ double f64;
+
+ uint8_t value_storage[IREE_VM_VALUE_STORAGE_SIZE]; // max size of all value
+ // types
+ };
+} iree_vm_value_t;
+
+static inline iree_vm_value_t iree_vm_value_make_none() {
+ iree_vm_value_t result;
+ result.type = IREE_VM_VALUE_TYPE_NONE;
+ return result;
+}
+
+static inline iree_vm_value_t iree_vm_value_make_i8(int8_t value) {
+ iree_vm_value_t result;
+ result.type = IREE_VM_VALUE_TYPE_I8;
+ result.i8 = value;
+ return result;
+}
+
+static inline iree_vm_value_t iree_vm_value_make_i16(int16_t value) {
+ iree_vm_value_t result;
+ result.type = IREE_VM_VALUE_TYPE_I16;
+ result.i16 = value;
+ return result;
+}
+
+static inline iree_vm_value_t iree_vm_value_make_i32(int32_t value) {
+ iree_vm_value_t result;
+ result.type = IREE_VM_VALUE_TYPE_I32;
+ result.i32 = value;
+ return result;
+}
+
+// TODO(#5542): check the value type before accessing the union.
+static inline int32_t iree_vm_value_get_i32(iree_vm_value_t *value) {
+ return value->i32;
+}
+
+static inline iree_vm_value_t iree_vm_value_make_i64(int64_t value) {
+ iree_vm_value_t result;
+ result.type = IREE_VM_VALUE_TYPE_I64;
+ result.i64 = value;
+ return result;
+}
+
+// TODO(#5542): check the value type before accessing the union.
+static inline int64_t iree_vm_value_get_i64(iree_vm_value_t *value) {
+ return value->i64;
+}
+
+static inline iree_vm_value_t iree_vm_value_make_f32(float value) {
+ iree_vm_value_t result;
+ result.type = IREE_VM_VALUE_TYPE_F32;
+ result.f32 = value;
+ return result;
+}
+
+// TODO(#5542): check the value type before accessing the union.
+static inline float iree_vm_value_get_f32(iree_vm_value_t *value) {
+ return value->f32;
+}
+
+static inline iree_vm_value_t iree_vm_value_make_f64(double value) {
+ iree_vm_value_t result;
+ result.type = IREE_VM_VALUE_TYPE_F64;
+ result.f64 = value;
+ return result;
+}
+
+// TODO(#5542): check the value type before accessing the union.
+static inline double iree_vm_value_get_f64(iree_vm_value_t *value) {
+ return value->f64;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_VM_VALUE_H_