Rollup of minor runtime fixes/cleanup from the AMDGPU branch. (#19621)
diff --git a/build_tools/cmake/iree_bitcode_library.cmake b/build_tools/cmake/iree_bitcode_library.cmake
index 4ae800d..63b0d03 100644
--- a/build_tools/cmake/iree_bitcode_library.cmake
+++ b/build_tools/cmake/iree_bitcode_library.cmake
@@ -62,7 +62,7 @@
# Enable inline asm.
"-fasm"
- # Object file only in bitcode format:
+ # Object file only in bitcode format.
"-c"
"-emit-llvm"
@@ -175,7 +175,7 @@
# Optimized and unstamped.
"-O3"
- # Object file only in bitcode format:
+ # Object file only in bitcode format.
"-c"
"-emit-llvm"
)
@@ -239,7 +239,6 @@
# supported.
# COPTS: Additional flags to pass to clang.
# OUT: Output file name. Defaults to {source.c}.{gpu_arch}.bc.
-#
function(iree_amdgpu_bitcode_library)
cmake_parse_arguments(
_RULE
diff --git a/build_tools/cmake/iree_hal_cts_test_suite.cmake b/build_tools/cmake/iree_hal_cts_test_suite.cmake
index ce229cb..7780922 100644
--- a/build_tools/cmake/iree_hal_cts_test_suite.cmake
+++ b/build_tools/cmake/iree_hal_cts_test_suite.cmake
@@ -19,10 +19,12 @@
# other parameters.
# DRIVER_REGISTRATION_HDR: The C #include path for `DRIVER_REGISTRATION_FN`.
# DRIVER_REGISTRATION_FN: The C function which registers `DRIVER_NAME`.
-# COMPILER_TARGET_BACKEND: Optional target backend name to pass to the
-# `-iree-hal-target-backends` option of `iree-compile` to use for
+# COMPILER_TARGET_BACKEND: Optional target backend name used for
# executable generation. If this is omitted, or the associated compiler
# target is not enabled, tests which use executables will be disabled.
+# COMPILER_TARGET_DEVICE: Optional target device name to pass to the
+# `--iree-hal-target-device` option of `iree-compile` used for
+# executable generation. If omitted the target backend name will be used.
# COMPILER_FLAGS: Additional compiler flags.
# Example: "--iree-llvmcpu-target-float-abi=hard --iree-llvmcpu-loop-unrolling"
# EXECUTABLE_FORMAT: Executable format identifier. Will be interpreted
@@ -47,7 +49,7 @@
cmake_parse_arguments(
_RULE
""
- "DRIVER_NAME;VARIANT_SUFFIX;DRIVER_REGISTRATION_HDR;DRIVER_REGISTRATION_FN;COMPILER_TARGET_BACKEND;EXECUTABLE_FORMAT"
+ "DRIVER_NAME;VARIANT_SUFFIX;DRIVER_REGISTRATION_HDR;DRIVER_REGISTRATION_FN;COMPILER_TARGET_BACKEND;COMPILER_TARGET_DEVICE;EXECUTABLE_FORMAT"
"DEPS;ARGS;COMPILER_FLAGS;INCLUDED_TESTS;EXCLUDED_TESTS;LABELS;"
${ARGN}
)
@@ -84,10 +86,15 @@
set(_TRANSLATE_FLAGS
"--compile-mode=hal-executable"
- "--iree-hal-target-backends=${_RULE_COMPILER_TARGET_BACKEND}"
${_RULE_COMPILER_FLAGS}
)
+ if(DEFINED _RULE_COMPILER_TARGET_DEVICE)
+ list(APPEND _TRANSLATE_FLAGS "--iree-hal-target-device=${_RULE_COMPILER_TARGET_DEVICE}")
+ else()
+ list(APPEND _TRANSLATE_FLAGS "--iree-hal-target-backends=${_RULE_COMPILER_TARGET_BACKEND}")
+ endif()
+
# Skip if already created (multiple suites using the same compiler setting).
iree_package_name(_PACKAGE_NAME)
if(NOT TARGET ${_PACKAGE_NAME}_${_EXECUTABLES_TESTDATA_NAME}_c)
@@ -183,6 +190,7 @@
set(IREE_CTS_DRIVER_REGISTRATION_FN "${_RULE_DRIVER_REGISTRATION_FN}")
set(IREE_CTS_DRIVER_NAME "${_RULE_DRIVER_NAME}")
set(IREE_CTS_TARGET_BACKEND "${_RULE_COMPILER_TARGET_BACKEND}")
+ set(IREE_CTS_TARGET_DEVICE "${_RULE_COMPILER_TARGET_DEVICE}")
configure_file(
"${IREE_ROOT_DIR}/runtime/src/iree/hal/cts/cts_test_template.cc.in"
diff --git a/compiler/plugins/target/ROCM/ROCMTarget.cpp b/compiler/plugins/target/ROCM/ROCMTarget.cpp
index 851b6ed..8558f0b 100644
--- a/compiler/plugins/target/ROCM/ROCMTarget.cpp
+++ b/compiler/plugins/target/ROCM/ROCMTarget.cpp
@@ -665,7 +665,9 @@
}
int64_t ordinal = ordinalAttr.getInt();
- auto symbolNameRef = builder.createString(exportOp.getName());
+ // Symbol names include a `.kd` suffix as that's what HSA expects.
+ auto symbolNameKd = (exportOp.getName() + ".kd").str();
+ auto symbolNameRef = builder.createString(symbolNameKd);
iree_hal_amdgpu_Dims_t workgroupSize = {0};
if (auto workgroupSizeAttr = exportOp.getWorkgroupSize()) {
diff --git a/runtime/src/iree/base/allocator.h b/runtime/src/iree/base/allocator.h
index 480e9ad..0d5f7a9 100644
--- a/runtime/src/iree/base/allocator.h
+++ b/runtime/src/iree/base/allocator.h
@@ -97,6 +97,18 @@
return iree_make_byte_span((uint8_t*)span.data, span.data_length);
}
+// Copies |size| bytes from |src| to |dst| without polluting the cache with
+// |dst| lines. Used when streaming data that will not be read again.
+static inline void iree_memcpy_stream_dst(void* IREE_RESTRICT dst,
+ const void* IREE_RESTRICT src,
+ iree_host_size_t size) {
+ // TODO(benvanik): implement a proper non-temporal copy. This will be
+ // architecture-specific and may have compiler-specific paths in order to emit
+ // the proper instructions. On x64 this should be using MOVNTDQ (or something
+ // in that family).
+ memcpy(dst, src, size);
+}
+
//===----------------------------------------------------------------------===//
// Totally shady stack allocation
//===----------------------------------------------------------------------===//
diff --git a/runtime/src/iree/base/assert.h b/runtime/src/iree/base/assert.h
index f6fd83e..0d6b4eb 100644
--- a/runtime/src/iree/base/assert.h
+++ b/runtime/src/iree/base/assert.h
@@ -57,7 +57,7 @@
// Assertions enabled:
-#define IREE_ASSERT(condition, ...) assert(condition)
+#define IREE_ASSERT(condition, ...) assert(IREE_UNLIKELY(condition))
// TODO(#2843): better logging of status assertions.
// #define IREE_ASSERT_OK(status) IREE_ASSERT(iree_status_is_ok(status))
diff --git a/runtime/src/iree/base/internal/arena.c b/runtime/src/iree/base/internal/arena.c
index ba8f9e8..17f361d 100644
--- a/runtime/src/iree/base/internal/arena.c
+++ b/runtime/src/iree/base/internal/arena.c
@@ -41,6 +41,27 @@
IREE_TRACE_ZONE_END(z0);
}
+iree_status_t iree_arena_block_pool_preallocate(
+ iree_arena_block_pool_t* block_pool, iree_host_size_t count) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+ IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, count);
+
+ for (iree_host_size_t i = 0; i < count; ++i) {
+ uint8_t* block_base = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc_uninitialized(block_pool->block_allocator,
+ block_pool->total_block_size,
+ (void**)&block_base));
+ iree_arena_block_t* block =
+ iree_arena_block_trailer(block_pool, block_base);
+ iree_atomic_arena_block_slist_concat(&block_pool->available_slist, block,
+ block);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return iree_ok_status();
+}
+
void iree_arena_block_pool_trim(iree_arena_block_pool_t* block_pool) {
IREE_TRACE_ZONE_BEGIN(z0);
@@ -125,6 +146,7 @@
} while (head);
arena->allocation_head = NULL;
}
+
if (arena->block_head != NULL) {
#if defined(IREE_SANITIZER_ADDRESS)
iree_arena_block_t* block = arena->block_head;
@@ -141,6 +163,10 @@
arena->block_tail = NULL;
}
+ arena->total_allocation_size = 0;
+ arena->used_allocation_size = 0;
+ arena->block_bytes_remaining = 0;
+
IREE_TRACE_ZONE_END(z0);
}
diff --git a/runtime/src/iree/base/internal/arena.h b/runtime/src/iree/base/internal/arena.h
index 199e3fc..ad252b5 100644
--- a/runtime/src/iree/base/internal/arena.h
+++ b/runtime/src/iree/base/internal/arena.h
@@ -77,6 +77,10 @@
// back to it.
void iree_arena_block_pool_deinitialize(iree_arena_block_pool_t* block_pool);
+// Preallocates |count| blocks and adds them to the pool free list.
+iree_status_t iree_arena_block_pool_preallocate(
+ iree_arena_block_pool_t* block_pool, iree_host_size_t count);
+
// Trims the pool by freeing unused blocks back to the allocator.
// Acquired blocks are not freed and remain valid.
void iree_arena_block_pool_trim(iree_arena_block_pool_t* block_pool);
diff --git a/runtime/src/iree/base/string_view.h b/runtime/src/iree/base/string_view.h
index 01e243c..048b468 100644
--- a/runtime/src/iree/base/string_view.h
+++ b/runtime/src/iree/base/string_view.h
@@ -89,6 +89,20 @@
return v;
}
+// A list of string key-value pairs.
+typedef struct iree_string_pair_list_t {
+ // Total number of pairs in the list.
+ iree_host_size_t count;
+ // Value list or NULL if no values.
+ const iree_string_pair_t* pairs;
+} iree_string_pair_list_t;
+
+// Returns an empty string pair list.
+static inline iree_string_pair_list_t iree_string_pair_list_empty(void) {
+ iree_string_pair_list_t v = {0, NULL};
+ return v;
+}
+
#define iree_string_view_literal(str) \
{ .data = (str), .size = IREE_ARRAYSIZE(str) - 1 }
@@ -106,6 +120,12 @@
const iree_string_view_t* values;
} iree_string_view_list_t;
+// Returns an empty string list.
+static inline iree_string_view_list_t iree_string_view_list_empty(void) {
+ iree_string_view_list_t v = {0, NULL};
+ return v;
+}
+
// Returns true if the two strings are equal (compare == 0).
IREE_API_EXPORT bool iree_string_view_equal(iree_string_view_t lhs,
iree_string_view_t rhs);
diff --git a/runtime/src/iree/base/time.h b/runtime/src/iree/base/time.h
index 89cad70..aaaa85e 100644
--- a/runtime/src/iree/base/time.h
+++ b/runtime/src/iree/base/time.h
@@ -173,6 +173,14 @@
: iree_relative_timeout_to_deadline_ns(timeout.nanos);
}
+// Returns a relative timeout duration in nanoseconds from the given timeout.
+static inline iree_duration_t iree_timeout_as_duration_ns(
+ iree_timeout_t timeout) {
+ return timeout.type == IREE_TIMEOUT_ABSOLUTE
+ ? iree_absolute_deadline_to_timeout_ns(timeout.nanos)
+ : timeout.nanos;
+}
+
// Returns the earliest timeout between |lhs| and |rhs|.
static inline iree_timeout_t iree_timeout_min(iree_timeout_t lhs,
iree_timeout_t rhs) {
diff --git a/runtime/src/iree/base/tracing.h b/runtime/src/iree/base/tracing.h
index e0c339a..c0fbed9 100644
--- a/runtime/src/iree/base/tracing.h
+++ b/runtime/src/iree/base/tracing.h
@@ -165,14 +165,14 @@
//===----------------------------------------------------------------------===//
// Matches Tracy's PlotFormatType enum.
-enum {
+typedef enum {
// Values will be displayed as plain numbers.
IREE_TRACING_PLOT_TYPE_NUMBER = 0,
// Treats the values as memory sizes. Will display kilobytes, megabytes, etc.
IREE_TRACING_PLOT_TYPE_MEMORY = 1,
// Values will be displayed as percentage with value 100 being equal to 100%.
IREE_TRACING_PLOT_TYPE_PERCENTAGE = 2,
-};
+} iree_tracing_plot_type_t;
// Colors used for messages based on the level provided to the macro.
enum {
diff --git a/runtime/src/iree/hal/command_buffer.h b/runtime/src/iree/hal/command_buffer.h
index e6523f2..82fdd61 100644
--- a/runtime/src/iree/hal/command_buffer.h
+++ b/runtime/src/iree/hal/command_buffer.h
@@ -443,10 +443,15 @@
// An RGBA color.
typedef struct iree_hal_label_color_t {
- uint8_t r;
- uint8_t g;
- uint8_t b;
- uint8_t a;
+ union {
+ struct {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ };
+ uint32_t value;
+ };
} iree_hal_label_color_t;
// A source location attached to debug labels.
@@ -457,7 +462,7 @@
// An unspecified color; debugging tools are to choose their own.
static inline iree_hal_label_color_t iree_hal_label_color_unspecified() {
- iree_hal_label_color_t color = {0, 0, 0, 0};
+ iree_hal_label_color_t color = {{{0, 0, 0, 0}}};
return color;
}
diff --git a/runtime/src/iree/hal/cts/cts_test_template.cc.in b/runtime/src/iree/hal/cts/cts_test_template.cc.in
index 15fb53f..15ad6cc 100644
--- a/runtime/src/iree/hal/cts/cts_test_template.cc.in
+++ b/runtime/src/iree/hal/cts/cts_test_template.cc.in
@@ -12,6 +12,7 @@
#cmakedefine IREE_CTS_EXECUTABLE_FORMAT @IREE_CTS_EXECUTABLE_FORMAT@
#cmakedefine IREE_CTS_EXECUTABLES_TESTDATA_HDR "@IREE_CTS_EXECUTABLES_TESTDATA_HDR@"
#cmakedefine IREE_CTS_TARGET_BACKEND "@IREE_CTS_TARGET_BACKEND@"
+#cmakedefine IREE_CTS_TARGET_DEVICE "@IREE_CTS_TARGET_DEVICE@"
// clang-format on
diff --git a/runtime/src/iree/hal/cts/driver_test.h b/runtime/src/iree/hal/cts/driver_test.h
index 515244f..5c276c0 100644
--- a/runtime/src/iree/hal/cts/driver_test.h
+++ b/runtime/src/iree/hal/cts/driver_test.h
@@ -139,15 +139,17 @@
std::cout << "Driver has " << device_info_count << " device(s)\n";
if (device_info_count == 0) GTEST_SKIP() << "No available devices";
- // Check creation via empty path.
- iree_string_view_t name = device_infos[0].name;
- CheckCreateDeviceViaPath(device_infos[0].name, iree_string_view_empty());
-
- // Check creation via index path.
+ // Check creation via explicit path.
+ bool tested_empty_path = false;
for (iree_host_size_t i = 0; i < device_info_count; ++i) {
- char index[8];
- snprintf(index, 8, "%d", i);
- CheckCreateDeviceViaPath(device_infos[i].name, IREE_SV(index));
+ tested_empty_path |= iree_string_view_is_empty(device_infos[i].path);
+ CheckCreateDeviceViaPath(device_infos[i].name, device_infos[i].path);
+ }
+
+ // Check creation via empty path if we didn't already.
+ if (!tested_empty_path) {
+ iree_string_view_t name = device_infos[0].name;
+ CheckCreateDeviceViaPath(device_infos[0].name, iree_string_view_empty());
}
iree_allocator_free(iree_allocator_system(), device_infos);
diff --git a/runtime/src/iree/hal/drivers/null/api.h b/runtime/src/iree/hal/drivers/null/api.h
index ad47173..2ae6ecd 100644
--- a/runtime/src/iree/hal/drivers/null/api.h
+++ b/runtime/src/iree/hal/drivers/null/api.h
@@ -27,11 +27,11 @@
int reserved;
} iree_hal_null_device_options_t;
-// Initializes |out_params| to default values.
+// Initializes |out_options| to default values.
IREE_API_EXPORT void iree_hal_null_device_options_initialize(
- iree_hal_null_device_options_t* out_params);
+ iree_hal_null_device_options_t* out_options);
-// Creates a {Null} HAL device with the given |params|.
+// Creates a {Null} HAL device with the given |options|.
//
// The provided |identifier| will be used by programs to distinguish the device
// type from other HAL implementations. If compiling programs with the IREE
diff --git a/runtime/src/iree/hal/drivers/null/device.c b/runtime/src/iree/hal/drivers/null/device.c
index d91c23e..937b213 100644
--- a/runtime/src/iree/hal/drivers/null/device.c
+++ b/runtime/src/iree/hal/drivers/null/device.c
@@ -18,6 +18,26 @@
#include "iree/hal/utils/file_transfer.h"
//===----------------------------------------------------------------------===//
+// iree_hal_null_device_options_t
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT void iree_hal_null_device_options_initialize(
+ iree_hal_null_device_options_t* out_options) {
+ memset(out_options, 0, sizeof(*out_options));
+ // TODO(null): set defaults based on compiler configuration. Flags should not
+ // be used as multiple devices may be configured within the process or the
+ // hosting application may be authored in python/etc that does not use a flags
+ // mechanism accessible here.
+}
+
+static iree_status_t iree_hal_null_device_options_verify(
+ const iree_hal_null_device_options_t* options) {
+ // TODO(null): verify that the parameters are within expected ranges and any
+ // requested features are supported.
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
// iree_hal_null_device_t
//===----------------------------------------------------------------------===//
@@ -42,22 +62,6 @@
return (iree_hal_null_device_t*)base_value;
}
-void iree_hal_null_device_options_initialize(
- iree_hal_null_device_options_t* out_options) {
- memset(out_options, 0, sizeof(*out_options));
- // TODO(null): set defaults based on compiler configuration. Flags should not
- // be used as multiple devices may be configured within the process or the
- // hosting application may be authored in python/etc that does not use a flags
- // mechanism accessible here.
-}
-
-static iree_status_t iree_hal_null_device_options_verify(
- const iree_hal_null_device_options_t* options) {
- // TODO(null): verify that the parameters are within expected ranges and any
- // requested features are supported.
- return iree_ok_status();
-}
-
iree_status_t iree_hal_null_device_create(
iree_string_view_t identifier,
const iree_hal_null_device_options_t* options,
diff --git a/runtime/src/iree/hal/drivers/null/driver.c b/runtime/src/iree/hal/drivers/null/driver.c
index 78cf511..d9fd2a3 100644
--- a/runtime/src/iree/hal/drivers/null/driver.c
+++ b/runtime/src/iree/hal/drivers/null/driver.c
@@ -9,6 +9,30 @@
#include "iree/hal/drivers/null/api.h"
//===----------------------------------------------------------------------===//
+// iree_hal_null_driver_options_t
+//===----------------------------------------------------------------------===//
+
+IREE_API_EXPORT void iree_hal_null_driver_options_initialize(
+ iree_hal_null_driver_options_t* out_options) {
+ memset(out_options, 0, sizeof(*out_options));
+
+ // TODO(null): set defaults based on compiler configuration. Flags should not
+ // be used as multiple devices may be configured within the process or the
+ // hosting application may be authored in python/etc that does not use a flags
+ // mechanism accessible here.
+
+ iree_hal_null_device_options_initialize(&out_options->default_device_options);
+}
+
+static iree_status_t iree_hal_null_driver_options_verify(
+ const iree_hal_null_driver_options_t* options) {
+ // TODO(null): verify that the parameters are within expected ranges and any
+ // requested features are supported.
+
+ return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
// iree_hal_null_driver_t
//===----------------------------------------------------------------------===//
@@ -35,26 +59,6 @@
return (iree_hal_null_driver_t*)base_value;
}
-void iree_hal_null_driver_options_initialize(
- iree_hal_null_driver_options_t* out_options) {
- memset(out_options, 0, sizeof(*out_options));
-
- // TODO(null): set defaults based on compiler configuration. Flags should not
- // be used as multiple devices may be configured within the process or the
- // hosting application may be authored in python/etc that does not use a flags
- // mechanism accessible here.
-
- iree_hal_null_device_options_initialize(&out_options->default_device_options);
-}
-
-static iree_status_t iree_hal_null_driver_options_verify(
- const iree_hal_null_driver_options_t* options) {
- // TODO(null): verify that the parameters are within expected ranges and any
- // requested features are supported.
-
- return iree_ok_status();
-}
-
IREE_API_EXPORT iree_status_t iree_hal_null_driver_create(
iree_string_view_t identifier,
const iree_hal_null_driver_options_t* options,
diff --git a/runtime/src/iree/hal/queue.h b/runtime/src/iree/hal/queue.h
index 4e54e0b..b52627e 100644
--- a/runtime/src/iree/hal/queue.h
+++ b/runtime/src/iree/hal/queue.h
@@ -36,6 +36,60 @@
#define IREE_HAL_QUEUE_AFFINITY_ANY ((iree_hal_queue_affinity_t)(-1))
#define IREE_HAL_MAX_QUEUES (sizeof(iree_hal_queue_affinity_t) / 8)
+// Returns true if the |queue_affinity| is empty (none specified).
+#define iree_hal_queue_affinity_is_empty(queue_affinity) ((queue_affinity) == 0)
+
+// Returns true if the |queue_affinity| is indicating any/all queues.
+#define iree_hal_queue_affinity_is_any(queue_affinity) \
+ ((queue_affinity) == IREE_HAL_QUEUE_AFFINITY_ANY)
+
+// Returns the total number of queues specified in the |queue_affinity| mask.
+#define iree_hal_queue_affinity_count(queue_affinity) \
+ iree_math_count_ones_u64(queue_affinity)
+
+// Returns the index of the first set bit in |queue_affinity|.
+// Requires that at least one bit be set.
+#define iree_hal_queue_affinity_find_first_set(queue_affinity) \
+ iree_math_count_trailing_zeros_u64(queue_affinity)
+
+// Logically shifts the queue affinity to the right by the given amount.
+#define iree_hal_queue_affinity_shr(queue_affinity, amount) \
+ iree_shr((queue_affinity), (amount))
+
+// Updates |inout_affinity| to only include those bits set in |mask_affinity|.
+#define iree_hal_queue_affinity_and_into(inout_affinity, mask_affinity) \
+ (inout_affinity) = ((inout_affinity) & (mask_affinity))
+
+// Updates |inout_affinity| to include bits set in |mask_affinity|.
+#define iree_hal_queue_affinity_or_into(inout_affinity, mask_affinity) \
+ (inout_affinity) = ((inout_affinity) | (mask_affinity))
+
+// Loops over each queue in the given |queue_affinity| bitmap.
+//
+// The following variables are available within the loop:
+// queue_count: total number of queues used
+// queue_index: loop index (0 to queue_count)
+// queue_ordinal: queue ordinal (0 to the total number of queues)
+//
+// Example:
+// IREE_HAL_FOR_QUEUE_AFFINITY(my_queue_affinity) {
+// compact_queue_list[queue_index]; // 0 to my_queue_affinity count
+// full_queue_list[queue_ordinal]; // 0 to available queues
+// }
+#define IREE_HAL_FOR_QUEUE_AFFINITY(queue_affinity) \
+ iree_hal_queue_affinity_t _queue_bits = (queue_affinity); \
+ for (int queue_index = 0, _queue_ordinal_base = 0, \
+ queue_count = iree_hal_queue_affinity_count(_queue_bits), \
+ _bit_offset = 0, \
+ queue_ordinal = \
+ iree_hal_queue_affinity_find_first_set(_queue_bits); \
+ queue_index < queue_count; \
+ ++queue_index, _queue_ordinal_base += _bit_offset + 1, \
+ _queue_bits = \
+ iree_hal_queue_affinity_shr(_queue_bits, _bit_offset + 1), \
+ _bit_offset = iree_hal_queue_affinity_find_first_set(_queue_bits), \
+ queue_ordinal = _queue_ordinal_base + _bit_offset)
+
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
diff --git a/runtime/src/iree/hal/semaphore.h b/runtime/src/iree/hal/semaphore.h
index 52571ed..5320666 100644
--- a/runtime/src/iree/hal/semaphore.h
+++ b/runtime/src/iree/hal/semaphore.h
@@ -102,7 +102,8 @@
}
// Frees an iree_status_t encoded in a semaphore |value|, if any.
-static inline void iree_hal_semaphore_failure_free(uint64_t value) {
+IREE_ATTRIBUTE_ALWAYS_INLINE static inline void iree_hal_semaphore_failure_free(
+ uint64_t value) {
if (value & IREE_HAL_SEMAPHORE_FAILURE_VALUE_STATUS_BIT) {
iree_status_free((iree_status_t)(((int64_t)value << 1) >> 1));
}
diff --git a/runtime/src/iree/hal/utils/fd_file.c b/runtime/src/iree/hal/utils/fd_file.c
index 6941e2e..a502c1b 100644
--- a/runtime/src/iree/hal/utils/fd_file.c
+++ b/runtime/src/iree/hal/utils/fd_file.c
@@ -223,17 +223,16 @@
// Verify that the requested access can be satisfied.
if (iree_all_bits_set(access, IREE_HAL_MEMORY_ACCESS_READ) &&
!iree_all_bits_set(allowed_access, IREE_HAL_MEMORY_ACCESS_READ)) {
- IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0,
- iree_make_status(
- IREE_STATUS_PERMISSION_DENIED,
- "read access requested on a file descriptor that is not readable"));
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(
+ IREE_STATUS_PERMISSION_DENIED,
+ "read access requested on a file descriptor that is not readable");
} else if (iree_all_bits_set(access, IREE_HAL_MEMORY_ACCESS_WRITE) &&
!iree_all_bits_set(allowed_access, IREE_HAL_MEMORY_ACCESS_WRITE)) {
- IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0, iree_make_status(IREE_STATUS_PERMISSION_DENIED,
- "write access requested on a file descriptor that "
- "is not writable"));
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(IREE_STATUS_PERMISSION_DENIED,
+ "write access requested on a file descriptor that "
+ "is not writable");
}
// Allocate object that retains the underlying file handle and our opened
diff --git a/runtime/src/iree/io/formats/irpa/irpa_builder.c b/runtime/src/iree/io/formats/irpa/irpa_builder.c
index 330691f..e1459e8 100644
--- a/runtime/src/iree/io/formats/irpa/irpa_builder.c
+++ b/runtime/src/iree/io/formats/irpa/irpa_builder.c
@@ -189,10 +189,10 @@
break;
}
default: {
- IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0, iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
- "unhandled entry type %d",
- (int)source_entry->type));
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "unhandled entry type %d",
+ (int)source_entry->type);
}
}
diff --git a/runtime/src/iree/io/memory_stream.c b/runtime/src/iree/io/memory_stream.c
index 84043c0..f16d300 100644
--- a/runtime/src/iree/io/memory_stream.c
+++ b/runtime/src/iree/io/memory_stream.c
@@ -199,15 +199,14 @@
z0, iree_io_stream_validate_fixed_range(stream->offset, stream->length,
buffer_capacity, &read_length));
if (!out_buffer_length && read_length != buffer_capacity) {
- IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0,
- iree_make_status(IREE_STATUS_OUT_OF_RANGE,
- "read of range [%" PRIu64 ", %" PRIu64 ") (%" PRIu64
- " bytes) out of range; stream offset %" PRIu64
- " and length %" PRIu64 " insufficient",
- stream->offset, stream->offset + buffer_capacity,
- (iree_io_stream_pos_t)buffer_capacity, stream->offset,
- stream->length));
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+ "read of range [%" PRIu64 ", %" PRIu64 ") (%" PRIu64
+ " bytes) out of range; stream offset %" PRIu64
+ " and length %" PRIu64 " insufficient",
+ stream->offset, stream->offset + buffer_capacity,
+ (iree_io_stream_pos_t)buffer_capacity,
+ stream->offset, stream->length);
}
memcpy(buffer, stream->contents + stream->offset,
diff --git a/runtime/src/iree/schemas/amdgpu_executable_def.fbs b/runtime/src/iree/schemas/amdgpu_executable_def.fbs
index 43efdb0..7c0510c 100644
--- a/runtime/src/iree/schemas/amdgpu_executable_def.fbs
+++ b/runtime/src/iree/schemas/amdgpu_executable_def.fbs
@@ -28,6 +28,7 @@
// Information about an exported function on the executable.
table ExportDef {
// String name of the exported function symbol in the module.
+ // Includes a `.kd` suffix as that's what HSA expects.
symbol_name:string;
// Workgroup size for the export.
diff --git a/runtime/src/iree/tooling/context_util.c b/runtime/src/iree/tooling/context_util.c
index f088ee8..8dbd959 100644
--- a/runtime/src/iree/tooling/context_util.c
+++ b/runtime/src/iree/tooling/context_util.c
@@ -66,10 +66,10 @@
} else if (strcmp(FLAG_module_mode, "preload") == 0) {
read_flags |= IREE_FILE_READ_FLAG_PRELOAD;
} else {
- IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0, iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
- "unrecognized --module_mode= value '%s'",
- FLAG_module_mode));
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "unrecognized --module_mode= value '%s'",
+ FLAG_module_mode);
}
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_file_read_contents(path_str, read_flags, host_allocator,