Tracing improvements for the VM, executables, and iree-benchmark-module. (#3560)
diff --git a/iree/base/api.h b/iree/base/api.h
index d733293..61df05c 100644
--- a/iree/base/api.h
+++ b/iree/base/api.h
@@ -540,6 +540,13 @@
if (IREE_UNLIKELY(var)) { \
return IREE_STATUS_IMPL_ANNOTATE_SWITCH_(var, __VA_ARGS__); \
}
+#define IREE_STATUS_IMPL_RETURN_AND_EVAL_IF_API_ERROR_(tail_expr, var, ...) \
+ iree_status_t var = (IREE_STATUS_IMPL_IDENTITY_( \
+ IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_GET_EXPR_)(__VA_ARGS__))); \
+ if (IREE_UNLIKELY(var)) { \
+ (tail_expr); \
+ return IREE_STATUS_IMPL_ANNOTATE_SWITCH_(var, __VA_ARGS__); \
+ }
#define IREE_STATUS_IMPL_IGNORE_ERROR_(var, expr) \
iree_status_t var = (expr); \
if (IREE_UNLIKELY(var)) iree_status_ignore(var);
@@ -552,6 +559,14 @@
#define IREE_STATUS_IMPL_RETURN_IF_API_ERROR_(var, expr, ...) \
iree_status_t var = (expr); \
if (IREE_UNLIKELY(var)) return var;
+#undef IREE_STATUS_IMPL_RETURN_AND_EVAL_IF_API_ERROR_
+#define IREE_STATUS_IMPL_RETURN_AND_EVAL_IF_API_ERROR_(tail_expr, var, expr, \
+ ...) \
+ iree_status_t var = (expr); \
+ if (IREE_UNLIKELY(var)) { \
+ (tail_expr); \
+ return var; \
+ }
#undef IREE_STATUS_IMPL_IGNORE_ERROR_
#define IREE_STATUS_IMPL_IGNORE_ERROR_(var, expr) \
iree_status_t var = (expr); \
@@ -593,6 +608,12 @@
IREE_STATUS_IMPL_CONCAT_(__status_, __COUNTER__), \
IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_IDENTITY_(__VA_ARGS__)))
+// IREE_RETURN_IF_ERROR with a custom expression to evaluate before returning.
+#define IREE_RETURN_AND_EVAL_IF_ERROR(tail_expr, ...) \
+ IREE_STATUS_IMPL_RETURN_AND_EVAL_IF_API_ERROR_( \
+ tail_expr, IREE_STATUS_IMPL_CONCAT_(__status_, __COUNTER__), \
+ IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_IDENTITY_(__VA_ARGS__)))
+
// Ignores the status result of (expr) regardless of its value.
//
// Example:
diff --git a/iree/base/flatbuffer_util.cc b/iree/base/flatbuffer_util.cc
index 379be73..117aea2 100644
--- a/iree/base/flatbuffer_util.cc
+++ b/iree/base/flatbuffer_util.cc
@@ -66,8 +66,7 @@
std::function<void()> deleter,
size_t root_type_size,
VerifierFn verifier_fn) {
- IREE_TRACE_SCOPE("FlatBufferFileBase::FromBuffer:size", int)
- (static_cast<int>(buffer_data.size()));
+ IREE_TRACE_SCOPE();
// Sanity check buffer for the minimum size as FlatBuffers doesn't.
if (buffer_data.size() < 16) {
diff --git a/iree/base/tracing.h b/iree/base/tracing.h
index c8594de..a3226ad 100644
--- a/iree/base/tracing.h
+++ b/iree/base/tracing.h
@@ -263,12 +263,12 @@
// Begins a new zone with the given runtime dynamic string name.
// The |value| string will be copied into the trace buffer.
-#define IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(zone_id, name, name_length) \
- static const iree_tracing_location_t TracyConcat( \
- __tracy_source_location, __LINE__) = {NULL, __FUNCTION__, __FILE__, \
- (uint32_t)__LINE__, 0}; \
- iree_zone_id_t zone_id = iree_tracing_zone_begin_impl( \
- &TracyConcat(__tracy_source_location, __LINE__), name, name_length);
+#define IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(zone_id, name, name_length) \
+ static const iree_tracing_location_t TracyConcat( \
+ __tracy_source_location, __LINE__) = {0, __FUNCTION__, __FILE__, \
+ (uint32_t)__LINE__, 0}; \
+ iree_zone_id_t zone_id = iree_tracing_zone_begin_impl( \
+ &TracyConcat(__tracy_source_location, __LINE__), (name), (name_length));
// Begins an externally defined zone with a dynamic source location.
// The |file_name|, |function_name|, and optional |name| strings will be copied
@@ -280,6 +280,10 @@
file_name, file_name_length, line, function_name, function_name_length, \
name, name_length)
+// Appends an integer value to the parent zone. May be called multiple times.
+#define IREE_TRACE_ZONE_APPEND_VALUE(zone_id, value) \
+ ___tracy_emit_zone_value((struct ___tracy_c_zone_context){zone_id, 1}, value);
+
// Appends a string value to the parent zone. May be called multiple times.
// The |value| string will be copied into the trace buffer.
#define IREE_TRACE_ZONE_APPEND_TEXT(...) \
@@ -297,6 +301,11 @@
#define IREE_TRACE_ZONE_END(zone_id) \
___tracy_emit_zone_end((struct ___tracy_c_zone_context){zone_id, 1})
+// Ends the current zone before returning on a failure.
+// Sugar for IREE_TRACE_ZONE_END+IREE_RETURN_IF_ERROR.
+#define IREE_RETURN_AND_END_ZONE_IF_ERROR(zone_id, ...) \
+ IREE_RETURN_AND_EVAL_IF_ERROR(IREE_TRACE_ZONE_END(zone_id), __VA_ARGS__)
+
// Configures the named plot with an IREE_TRACING_PLOT_TYPE_* representation.
#define IREE_TRACE_SET_PLOT_TYPE(name_literal, plot_type) \
iree_tracing_set_plot_type_impl(name_literal, plot_type)
@@ -347,8 +356,11 @@
#define IREE_TRACE_ZONE_BEGIN_EXTERNAL( \
zone_id, file_name, file_name_length, line, function_name, \
function_name_length, name, name_length)
+#define IREE_TRACE_ZONE_APPEND_VALUE(zone_id, value)
#define IREE_TRACE_ZONE_APPEND_TEXT(zone_id, value, value_length)
#define IREE_TRACE_ZONE_END(zone_id)
+#define IREE_RETURN_AND_END_ZONE_IF_ERROR(zone_id, ...) \
+ IREE_RETURN_IF_ERROR(__VA_ARGS__)
#define IREE_TRACE_SET_PLOT_TYPE(name_literal, plot_type)
#define IREE_TRACE_PLOT_VALUE_I64(name_literal, value)
#define IREE_TRACE_PLOT_VALUE_F32(name_literal, value)
@@ -411,17 +423,20 @@
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
// TODO(#1886): update these to tracy and drop the 0.
-#define IREE_TRACE_SCOPE0(name_spec) ZoneScopedN(name_spec)
-#define IREE_TRACE_SCOPE(name_spec, ...)
-#define IREE_TRACE_EVENT0
+#define IREE_TRACE_SCOPE() ZoneScoped
+#define IREE_TRACE_SCOPE_DYNAMIC(name_cstr) \
+ ZoneTransientN(___tracy_scoped_zone, name_cstr, true)
+#define IREE_TRACE_SCOPE0(name_literal) ZoneScopedN(name_literal)
#define IREE_TRACE_EVENT
+#define IREE_TRACE_EVENT0
#else
#define IREE_TRACE_THREAD_ENABLE(name)
-#define IREE_TRACE_SCOPE0(name_spec)
-#define IREE_TRACE_SCOPE(name_spec, ...) (void)
-#define IREE_TRACE_EVENT0
+#define IREE_TRACE_SCOPE()
+#define IREE_TRACE_SCOPE_DYNAMIC(name_string_view)
+#define IREE_TRACE_SCOPE0(name_literal)
#define IREE_TRACE_EVENT(void)
+#define IREE_TRACE_EVENT0
#endif // IREE_TRACING_FEATURE_INSTRUMENTATION
// TODO(benvanik): macros for LockableCtx / Lockable mutex tracking.
diff --git a/iree/hal/device_manager.cc b/iree/hal/device_manager.cc
index d3c3301..5a527f3 100644
--- a/iree/hal/device_manager.cc
+++ b/iree/hal/device_manager.cc
@@ -108,8 +108,7 @@
MemoryTypeBitfield memory_type, BufferUsageBitfield buffer_usage,
device_size_t allocation_size,
absl::Span<const DevicePlacement> device_placements) {
- IREE_TRACE_SCOPE("DeviceManager::TryAllocateDeviceVisibleBuffer:size", int)
- (static_cast<int>(allocation_size));
+ IREE_TRACE_SCOPE0("DeviceManager::TryAllocateDeviceVisibleBuffer:size");
if (!AnyBitSet(memory_type & MemoryType::kHostLocal)) {
return InvalidArgumentErrorBuilder(IREE_LOC)
<< "Host-local buffers require the kHostLocal bit: "
@@ -138,8 +137,7 @@
MemoryTypeBitfield memory_type, BufferUsageBitfield buffer_usage,
device_size_t allocation_size,
absl::Span<const DevicePlacement> device_placements) {
- IREE_TRACE_SCOPE("DeviceManager::AllocateDeviceVisibleBuffer:size", int)
- (static_cast<int>(allocation_size));
+ IREE_TRACE_SCOPE0("DeviceManager::AllocateDeviceVisibleBuffer:size");
if (!AnyBitSet(memory_type & MemoryType::kHostLocal)) {
return InvalidArgumentErrorBuilder(IREE_LOC)
<< "Host-local buffers require the kHostLocal bit: "
@@ -160,8 +158,7 @@
MemoryTypeBitfield memory_type, BufferUsageBitfield buffer_usage,
device_size_t allocation_size,
absl::Span<const DevicePlacement> device_placements) {
- IREE_TRACE_SCOPE("DeviceManager::AllocateDeviceLocalBuffer:size", int)
- (static_cast<int>(allocation_size));
+ IREE_TRACE_SCOPE0("DeviceManager::AllocateDeviceLocalBuffer:size");
if (!AnyBitSet(memory_type & MemoryType::kDeviceLocal)) {
return InvalidArgumentErrorBuilder(IREE_LOC)
<< "Device-local buffers require the kDeviceLocal bit: "
diff --git a/iree/hal/dylib/dylib_executable.cc b/iree/hal/dylib/dylib_executable.cc
index def1786..10dbf29 100644
--- a/iree/hal/dylib/dylib_executable.cc
+++ b/iree/hal/dylib/dylib_executable.cc
@@ -16,7 +16,6 @@
#include "flatbuffers/flatbuffers.h"
#include "iree/base/file_io.h"
-#include "iree/base/tracing.h"
#include "iree/schemas/dylib_executable_def_generated.h"
namespace iree {
@@ -89,6 +88,10 @@
<< "Could not find symbol: " << entry_points[i];
}
entry_functions_[i] = symbol;
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ entry_names_[i] = entry_points[i]->c_str();
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
}
return OkStatus();
@@ -96,6 +99,11 @@
struct DyLibDispatchState : public HostExecutable::DispatchState {
DyLibDispatchState() = default;
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ const char* entry_name = nullptr;
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
void* entry_function = nullptr;
absl::InlinedVector<void*, 4> args;
absl::InlinedVector<int32_t, 4> push_constant;
@@ -111,6 +119,9 @@
}
auto dispatch_state = make_ref<DyLibDispatchState>();
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ dispatch_state->entry_name = entry_names_[params.entry_point];
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
dispatch_state->entry_function = entry_functions_[params.entry_point];
for (size_t set = 0; set < params.set_bindings.size(); ++set) {
@@ -135,8 +146,8 @@
Status DyLibExecutable::DispatchTile(DispatchState* state,
std::array<uint32_t, 3> workgroup_xyz) {
- IREE_TRACE_SCOPE0("DyLibExecutable::DispatchTile");
auto* dispatch_state = static_cast<DyLibDispatchState*>(state);
+ IREE_TRACE_SCOPE_DYNAMIC(dispatch_state->entry_name);
auto entry_function =
(void (*)(void**, int32_t*))dispatch_state->entry_function;
diff --git a/iree/hal/dylib/dylib_executable.h b/iree/hal/dylib/dylib_executable.h
index 63c4a26..210be15 100644
--- a/iree/hal/dylib/dylib_executable.h
+++ b/iree/hal/dylib/dylib_executable.h
@@ -21,6 +21,7 @@
#include "absl/container/inlined_vector.h"
#include "iree/base/dynamic_library.h"
#include "iree/base/status.h"
+#include "iree/base/tracing.h"
#include "iree/hal/executable_spec.h"
#include "iree/hal/host/host_executable.h"
@@ -50,6 +51,10 @@
std::string executable_library_temp_path_;
std::unique_ptr<DynamicLibrary> executable_library_;
absl::InlinedVector<void*, 4> entry_functions_;
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ absl::InlinedVector<const char*, 4> entry_names_;
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
};
} // namespace dylib
diff --git a/iree/hal/host/BUILD b/iree/hal/host/BUILD
index c7b28aa..a26fa0f 100644
--- a/iree/hal/host/BUILD
+++ b/iree/hal/host/BUILD
@@ -55,6 +55,7 @@
deps = [
"//iree/base:logging",
"//iree/base:status",
+ "//iree/base:tracing",
"//iree/hal:buffer",
],
)
diff --git a/iree/hal/host/CMakeLists.txt b/iree/hal/host/CMakeLists.txt
index 803d7b6..953a777 100644
--- a/iree/hal/host/CMakeLists.txt
+++ b/iree/hal/host/CMakeLists.txt
@@ -55,6 +55,7 @@
DEPS
iree::base::logging
iree::base::status
+ iree::base::tracing
iree::hal::buffer
PUBLIC
)
diff --git a/iree/hal/host/host_buffer.cc b/iree/hal/host/host_buffer.cc
index c5016f3..265e048 100644
--- a/iree/hal/host/host_buffer.cc
+++ b/iree/hal/host/host_buffer.cc
@@ -20,6 +20,7 @@
#include "iree/base/logging.h"
#include "iree/base/status.h"
+#include "iree/base/tracing.h"
namespace iree {
namespace hal {
@@ -36,6 +37,7 @@
owns_data_(owns_data) {}
HostBuffer::~HostBuffer() {
+ IREE_TRACE_SCOPE();
if (owns_data_ && data_) {
std::free(data_);
data_ = nullptr;
diff --git a/iree/hal/vmla/vmla_executable.cc b/iree/hal/vmla/vmla_executable.cc
index 336c610..4cc447a 100644
--- a/iree/hal/vmla/vmla_executable.cc
+++ b/iree/hal/vmla/vmla_executable.cc
@@ -156,8 +156,9 @@
Status VMLAExecutable::DispatchTile(DispatchState* state,
std::array<uint32_t, 3> workgroup_xyz) {
- IREE_TRACE_SCOPE0("VMLAExecutable::DispatchTile");
auto* dispatch_state = static_cast<VMLADispatchState*>(state);
+ IREE_TRACE_SCOPE_DYNAMIC(
+ iree_vm_function_name(&dispatch_state->function).data);
auto* input_list_storage = alloca(dispatch_state->input_list_size);
iree_vm_list_t* input_list = nullptr;
diff --git a/iree/modules/hal/hal_module.cc b/iree/modules/hal/hal_module.cc
index c990cf0..be8a730 100644
--- a/iree/modules/hal/hal_module.cc
+++ b/iree/modules/hal/hal_module.cc
@@ -156,10 +156,13 @@
IREE_RETURN_IF_ERROR(iree_hal_semaphore_wait_with_deadline(
semaphore.get(), 1ull, IREE_TIME_INFINITE_FUTURE));
- for (auto& ref : deferred_releases_) {
- iree_vm_ref_release(&ref);
+ {
+ IREE_TRACE_SCOPE0("HALModuleState::DeferredReleases");
+ for (auto& ref : deferred_releases_) {
+ iree_vm_ref_release(&ref);
+ }
+ deferred_releases_.clear();
}
- deferred_releases_.clear();
return OkStatus();
}
diff --git a/iree/tools/iree-benchmark-module-main.cc b/iree/tools/iree-benchmark-module-main.cc
index 96dc0c9..af0ab9f 100644
--- a/iree/tools/iree-benchmark-module-main.cc
+++ b/iree/tools/iree-benchmark-module-main.cc
@@ -59,38 +59,39 @@
namespace iree {
namespace {
+static void BenchmarkFunction(
+ const std::string& benchmark_name, iree_vm_context_t* context,
+ iree_vm_function_t function, iree_vm_list_t* inputs,
+ const std::vector<RawSignatureParser::Description>& output_descs,
+ benchmark::State& state) {
+ IREE_TRACE_SCOPE_DYNAMIC(benchmark_name.c_str());
+ IREE_TRACE_FRAME_MARK();
+
+ // Benchmarking loop.
+ for (auto _ : state) {
+ IREE_TRACE_SCOPE0("BenchmarkIteration");
+ IREE_TRACE_FRAME_MARK_NAMED("Iteration");
+ vm::ref<iree_vm_list_t> outputs;
+ IREE_CHECK_OK(iree_vm_list_create(/*element_type=*/nullptr,
+ output_descs.size(),
+ iree_allocator_system(), &outputs));
+ IREE_CHECK_OK(iree_vm_invoke(context, function, /*policy=*/nullptr, inputs,
+ outputs.get(), iree_allocator_system()));
+ }
+}
+
void RegisterModuleBenchmarks(
const std::string& function_name, iree_vm_context_t* context,
iree_vm_function_t function, iree_vm_list_t* inputs,
const std::vector<RawSignatureParser::Description>& output_descs) {
auto benchmark_name = "BM_" + function_name;
- benchmark::RegisterBenchmark(
- benchmark_name.c_str(),
- [context, function, inputs,
- output_descs](benchmark::State& state) -> void {
- // Warmup run step.
- {
- vm::ref<iree_vm_list_t> outputs;
- IREE_CHECK_OK(iree_vm_list_create(/*element_type=*/nullptr,
- output_descs.size(),
- iree_allocator_system(), &outputs));
- IREE_CHECK_OK(iree_vm_invoke(context, function, /*policy=*/nullptr,
- inputs, outputs.get(),
- iree_allocator_system()));
- }
- // Benchmarking loop.
- for (auto _ : state) {
- // No status conversions and conditional returns in the benchmarked
- // inner loop.
- vm::ref<iree_vm_list_t> outputs;
- IREE_CHECK_OK(iree_vm_list_create(/*element_type=*/nullptr,
- output_descs.size(),
- iree_allocator_system(), &outputs));
- IREE_CHECK_OK(iree_vm_invoke(context, function, /*policy=*/nullptr,
- inputs, outputs.get(),
- iree_allocator_system()));
- }
- })
+ benchmark::RegisterBenchmark(benchmark_name.c_str(),
+ [benchmark_name, context, function, inputs,
+ output_descs](benchmark::State& state) -> void {
+ BenchmarkFunction(benchmark_name, context,
+ function, inputs,
+ output_descs, state);
+ })
// By default only the main thread is included in CPU time. Include all
// the threads instead.
->MeasureProcessCPUTime()
@@ -107,6 +108,7 @@
}
Status GetModuleContentsFromFlags(std::string& module_data) {
+ IREE_TRACE_SCOPE0("GetModuleContentsFromFlags");
auto module_file = absl::GetFlag(FLAGS_module_file);
IREE_ASSIGN_OR_RETURN(module_data, file_io::GetFileContents(module_file));
return iree::OkStatus();
@@ -127,6 +129,8 @@
context_(nullptr),
input_module_(nullptr){};
~IREEBenchmark() {
+ IREE_TRACE_SCOPE0("IREEBenchmark::dtor");
+
// Order matters.
inputs_.reset();
iree_vm_module_release(hal_module_);
@@ -137,6 +141,8 @@
};
Status Register() {
+ IREE_TRACE_SCOPE0("IREEBenchmark::Register");
+
if (!instance_ || !device_ || !hal_module_ || !context_ || !input_module_) {
IREE_RETURN_IF_ERROR(Init());
}
@@ -152,6 +158,9 @@
private:
Status Init() {
+ IREE_TRACE_SCOPE0("IREEBenchmark::Init");
+ IREE_TRACE_FRAME_MARK_BEGIN_NAMED("init");
+
IREE_RETURN_IF_ERROR(GetModuleContentsFromFlags(module_data_));
IREE_RETURN_IF_ERROR(iree_hal_module_register_types());
@@ -170,10 +179,14 @@
IREE_RETURN_IF_ERROR(iree_vm_context_create_with_modules(
instance_, modules.data(), modules.size(), iree_allocator_system(),
&context_));
+
+ IREE_TRACE_FRAME_MARK_END_NAMED("init");
return iree::OkStatus();
}
Status RegisterSpecificFunction(const std::string& function_name) {
+ IREE_TRACE_SCOPE0("IREEBenchmark::RegisterSpecificFunction");
+
iree_vm_function_t function;
IREE_RETURN_IF_ERROR(input_module_->lookup_function(
input_module_->self, IREE_VM_FUNCTION_LINKAGE_EXPORT,
@@ -203,6 +216,7 @@
}
Status RegisterAllExportedFunctions() {
+ IREE_TRACE_SCOPE0("IREEBenchmark::RegisterAllExportedFunctions");
iree_vm_function_t function;
iree_vm_module_signature_t signature =
input_module_->signature(input_module_->self);
@@ -239,6 +253,8 @@
} // namespace iree
int main(int argc, char** argv) {
+ IREE_TRACE_SCOPE0("main");
+
// We have to contend with two flag parsing libraries here: absl's and
// benchmark's. To make matters worse, both define the `--help` flag. To
// ensure that each is able to parse its own flags, we use an absl "internal"
diff --git a/iree/vm/BUILD b/iree/vm/BUILD
index 07f1324..3f3d93e 100644
--- a/iree/vm/BUILD
+++ b/iree/vm/BUILD
@@ -170,6 +170,7 @@
":builtin_types",
"//iree/base:api",
"//iree/base:atomics",
+ "//iree/base:tracing",
],
)
@@ -221,6 +222,7 @@
"//iree/base:alignment",
"//iree/base:api",
"//iree/base:atomics",
+ "//iree/base:tracing",
],
)
diff --git a/iree/vm/CMakeLists.txt b/iree/vm/CMakeLists.txt
index e17375e..48d6544 100644
--- a/iree/vm/CMakeLists.txt
+++ b/iree/vm/CMakeLists.txt
@@ -190,6 +190,7 @@
::builtin_types
iree::base::api
iree::base::atomics
+ iree::base::tracing
PUBLIC
)
@@ -251,6 +252,7 @@
iree::base::alignment
iree::base::api
iree::base::atomics
+ iree::base::tracing
PUBLIC
)
diff --git a/iree/vm/bytecode_dispatch.c b/iree/vm/bytecode_dispatch.c
index 72ba1ab..bb5fbbf 100644
--- a/iree/vm/bytecode_dispatch.c
+++ b/iree/vm/bytecode_dispatch.c
@@ -14,6 +14,7 @@
#include <string.h>
+#include "iree/base/tracing.h"
#include "iree/vm/bytecode_dispatch_util.h"
#include "iree/vm/list.h"
diff --git a/iree/vm/bytecode_module.c b/iree/vm/bytecode_module.c
index 8e564bd..caa89d3 100644
--- a/iree/vm/bytecode_module.c
+++ b/iree/vm/bytecode_module.c
@@ -16,6 +16,7 @@
#include "iree/base/alignment.h"
#include "iree/base/api.h"
+#include "iree/base/tracing.h"
#include "iree/vm/bytecode_module_impl.h"
#include "iree/vm/ref.h"
#include "iree/vm/stack.h"
@@ -79,15 +80,18 @@
// registered.
static iree_status_t iree_vm_bytecode_module_resolve_types(
iree_vm_TypeDef_vec_t type_defs, iree_vm_type_def_t* type_table) {
+ IREE_TRACE_ZONE_BEGIN(z0);
for (size_t i = 0; i < iree_vm_TypeDef_vec_len(type_defs); ++i) {
iree_vm_TypeDef_table_t type_def = iree_vm_TypeDef_vec_at(type_defs, i);
type_table[i] = iree_vm_bytecode_module_resolve_type(type_def);
if (!iree_vm_type_def_is_valid(type_table[i])) {
+ IREE_TRACE_ZONE_END(z0);
return iree_make_status(IREE_STATUS_NOT_FOUND,
"no type registered with name '%s'",
iree_vm_TypeDef_full_name(type_def));
}
}
+ IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
@@ -249,6 +253,7 @@
static void iree_vm_bytecode_module_destroy(void* self) {
iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
+ IREE_TRACE_ZONE_BEGIN(z0);
iree_allocator_free(module->flatbuffer_allocator,
(void*)module->flatbuffer_data.data);
@@ -256,6 +261,8 @@
module->flatbuffer_allocator = iree_allocator_null();
iree_allocator_free(module->allocator, module);
+
+ IREE_TRACE_ZONE_END(z0);
}
static iree_string_view_t iree_vm_bytecode_module_name(void* self) {
@@ -544,6 +551,7 @@
static iree_status_t iree_vm_bytecode_module_alloc_state(
void* self, iree_allocator_t allocator,
iree_vm_module_state_t** out_module_state) {
+ IREE_TRACE_ZONE_BEGIN(z0);
IREE_ASSERT_ARGUMENT(out_module_state);
*out_module_state = NULL;
@@ -556,8 +564,9 @@
// Allocate the storage for the structure and all its nested tables.
iree_vm_bytecode_module_state_t* state = NULL;
- IREE_RETURN_IF_ERROR(iree_allocator_malloc(allocator, total_state_struct_size,
- (void**)&state));
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(allocator, total_state_struct_size,
+ (void**)&state));
state->allocator = allocator;
// Perform layout to get the pointers into the storage for each nested table.
@@ -577,12 +586,14 @@
}
*out_module_state = (iree_vm_module_state_t*)state;
+ IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
static void iree_vm_bytecode_module_free_state(
void* self, iree_vm_module_state_t* module_state) {
if (!module_state) return;
+ IREE_TRACE_ZONE_BEGIN(z0);
iree_vm_bytecode_module_state_t* state =
(iree_vm_bytecode_module_state_t*)module_state;
@@ -593,6 +604,8 @@
}
iree_allocator_free(state->allocator, module_state);
+
+ IREE_TRACE_ZONE_END(z0);
}
static iree_status_t iree_vm_bytecode_module_resolve_import(
@@ -645,7 +658,7 @@
// NOTE: any work here adds directly to the invocation time. Avoid doing too
// much work or touching too many unlikely-to-be-cached structures (such as
// walking the FlatBuffer, which may cause page faults).
-
+ IREE_TRACE_ZONE_BEGIN(z0);
IREE_ASSERT_ARGUMENT(out_result);
memset(out_result, 0, sizeof(iree_vm_execution_result_t));
@@ -653,12 +666,15 @@
// allow exports here as well to make things easier to call externally.
iree_vm_function_t function = call->function;
if (function.linkage != IREE_VM_FUNCTION_LINKAGE_INTERNAL) {
- IREE_RETURN_IF_ERROR(iree_vm_bytecode_module_get_function(
- self, function.linkage, function.ordinal, &function, NULL, NULL));
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_vm_bytecode_module_get_function(
+ self, function.linkage, function.ordinal, &function, NULL, NULL));
}
iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
if (function.ordinal >= module->function_descriptor_count) {
+ IREE_TRACE_ZONE_END(z0);
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"function ordinal out of range (0 < %u < %zu)",
function.ordinal,
@@ -688,28 +704,40 @@
flatbuffers_string_len(calling_convention);
iree_string_view_t cconv_arguments = iree_string_view_empty();
iree_string_view_t cconv_results = iree_string_view_empty();
- IREE_RETURN_IF_ERROR(iree_vm_function_call_get_cconv_fragments(
- &signature, &cconv_arguments, &cconv_results));
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_vm_function_call_get_cconv_fragments(
+ &signature, &cconv_arguments, &cconv_results));
// Jump into the dispatch routine to execute bytecode until the function
// either returns (synchronous) or yields (asynchronous).
- return iree_vm_bytecode_dispatch(stack, module, call, cconv_arguments,
- cconv_results, out_result);
+ iree_status_t status = iree_vm_bytecode_dispatch(
+ stack, module, call, cconv_arguments, cconv_results, out_result);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
}
IREE_API_EXPORT iree_status_t IREE_API_CALL iree_vm_bytecode_module_create(
iree_const_byte_span_t flatbuffer_data,
iree_allocator_t flatbuffer_allocator, iree_allocator_t allocator,
iree_vm_module_t** out_module) {
+ IREE_TRACE_ZONE_BEGIN(z0);
IREE_ASSERT_ARGUMENT(out_module);
*out_module = NULL;
- IREE_RETURN_IF_ERROR(
- iree_vm_bytecode_module_flatbuffer_verify(flatbuffer_data));
+ IREE_TRACE_ZONE_BEGIN_NAMED(z1, "iree_vm_bytecode_module_flatbuffer_verify");
+ iree_status_t status =
+ iree_vm_bytecode_module_flatbuffer_verify(flatbuffer_data);
+ if (!iree_status_is_ok(status)) {
+ IREE_TRACE_ZONE_END(z1);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+ }
+ IREE_TRACE_ZONE_END(z1);
iree_vm_BytecodeModuleDef_table_t module_def =
iree_vm_BytecodeModuleDef_as_root(flatbuffer_data.data);
if (!module_def) {
+ IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"failed getting root from flatbuffer; expected identifier "
@@ -721,9 +749,10 @@
iree_vm_TypeDef_vec_len(type_defs) * sizeof(iree_vm_type_def_t);
iree_vm_bytecode_module_t* module = NULL;
- IREE_RETURN_IF_ERROR(iree_allocator_malloc(
- allocator, sizeof(iree_vm_bytecode_module_t) + type_table_size,
- (void**)&module));
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(
+ allocator, sizeof(iree_vm_bytecode_module_t) + type_table_size,
+ (void**)&module));
module->allocator = allocator;
iree_vm_FunctionDescriptor_vec_t function_descriptors =
@@ -748,6 +777,7 @@
iree_vm_bytecode_module_resolve_types(type_defs, module->type_table);
if (!iree_status_is_ok(resolve_status)) {
iree_allocator_free(allocator, module);
+ IREE_TRACE_ZONE_END(z0);
return resolve_status;
}
@@ -765,5 +795,6 @@
iree_vm_bytecode_module_get_function_reflection_attr;
*out_module = &module->interface;
+ IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
diff --git a/iree/vm/context.c b/iree/vm/context.c
index e0d2b8b..e211adc 100644
--- a/iree/vm/context.c
+++ b/iree/vm/context.c
@@ -89,6 +89,8 @@
static iree_status_t iree_vm_context_resolve_module_imports(
iree_vm_context_t* context, iree_vm_module_t* module,
iree_vm_module_state_t* module_state) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
// NOTE: this has some bad characteristics, but the number of modules and the
// number of imported functions should be relatively small (even if the number
// of exported functions for particular modules is large).
@@ -96,7 +98,8 @@
for (int i = 0; i < module_signature.import_function_count; ++i) {
iree_string_view_t full_name;
iree_vm_function_signature_t expected_signature;
- IREE_RETURN_IF_ERROR(
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
module->get_function(module->self, IREE_VM_FUNCTION_LINKAGE_IMPORT, i,
/*out_function=*/NULL,
/*out_name=*/&full_name,
@@ -105,7 +108,8 @@
// Resolve the function to the module that contains it and return the
// information.
iree_vm_function_t import_function;
- IREE_RETURN_IF_ERROR(
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
iree_vm_context_resolve_function(context, full_name, &import_function));
// Query the function signature from the module that contains it; we don't
@@ -127,6 +131,7 @@
if (expected_signature.calling_convention.size &&
!iree_string_view_equal(import_signature.calling_convention,
expected_signature.calling_convention)) {
+ IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_INTERNAL,
"import function signature mismatch between %.*s "
@@ -141,9 +146,12 @@
import_signature.calling_convention.data);
}
- IREE_RETURN_IF_ERROR(module->resolve_import(
- module->self, module_state, i, &import_function, &import_signature));
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, module->resolve_import(module->self, module_state, i,
+ &import_function, &import_signature));
}
+
+ IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
@@ -199,6 +207,7 @@
iree_vm_instance_t* instance, iree_vm_module_t** modules,
iree_host_size_t module_count, iree_allocator_t allocator,
iree_vm_context_t** out_context) {
+ IREE_TRACE_ZONE_BEGIN(z0);
IREE_ASSERT_ARGUMENT(instance);
IREE_ASSERT_ARGUMENT(out_context);
*out_context = NULL;
@@ -231,10 +240,12 @@
iree_vm_context_register_modules(context, modules, module_count);
if (!iree_status_is_ok(register_status)) {
iree_vm_context_destroy(context);
+ IREE_TRACE_ZONE_END(z0);
return register_status;
}
*out_context = context;
+ IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
@@ -317,9 +328,12 @@
}
}
+ IREE_TRACE_ZONE_BEGIN(z0);
+
// Try growing both our storage lists first, if needed.
if (context->list.count + module_count > context->list.capacity) {
if (context->is_static) {
+ IREE_TRACE_ZONE_END(z0);
return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"context was allocated as static and cannot "
"register modules after creation");
@@ -330,13 +344,16 @@
new_capacity = context->list.capacity * 2;
}
iree_vm_module_t** new_module_list;
- IREE_RETURN_IF_ERROR(iree_allocator_malloc(
- context->allocator, sizeof(iree_vm_module_t*) * new_capacity,
- (void**)&new_module_list));
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(context->allocator,
+ sizeof(iree_vm_module_t*) * new_capacity,
+ (void**)&new_module_list));
iree_vm_module_state_t** new_module_state_list;
- IREE_RETURN_IF_ERROR(iree_allocator_malloc(
- context->allocator, sizeof(iree_vm_module_state_t*) * new_capacity,
- (void**)&new_module_state_list));
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0,
+ iree_allocator_malloc(context->allocator,
+ sizeof(iree_vm_module_state_t*) * new_capacity,
+ (void**)&new_module_state_list));
memcpy(new_module_list, context->list.modules,
sizeof(iree_vm_module_t*) * context->list.count);
memcpy(new_module_state_list, context->list.module_states,
@@ -409,12 +426,14 @@
context->list.count = original_count;
}
+ IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t IREE_API_CALL iree_vm_context_resolve_function(
const iree_vm_context_t* context, iree_string_view_t full_name,
iree_vm_function_t* out_function) {
+ IREE_TRACE_ZONE_BEGIN(z0);
IREE_ASSERT_ARGUMENT(out_function);
memset(out_function, 0, sizeof(iree_vm_function_t));
@@ -422,6 +441,7 @@
iree_string_view_t function_name;
if (iree_string_view_split(full_name, '.', &module_name, &function_name) ==
-1) {
+ IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"import name not fully-qualified (module.func): '%.*s'",
@@ -430,13 +450,15 @@
for (int i = (int)context->list.count - 1; i >= 0; --i) {
iree_vm_module_t* module = context->list.modules[i];
- if (iree_string_view_compare(module_name, iree_vm_module_name(module)) ==
- 0) {
- return iree_vm_module_lookup_function_by_name(
+ if (iree_string_view_equal(module_name, iree_vm_module_name(module))) {
+ iree_status_t status = iree_vm_module_lookup_function_by_name(
module, IREE_VM_FUNCTION_LINKAGE_EXPORT, function_name, out_function);
+ IREE_TRACE_ZONE_END(z0);
+ return status;
}
}
+ IREE_TRACE_ZONE_END(z0);
return iree_make_status(IREE_STATUS_NOT_FOUND,
"module '%.*s' required for import '%.*s' not "
"registered with the context",
diff --git a/iree/vm/instance.c b/iree/vm/instance.c
index 5cc92d8..1ca17b4 100644
--- a/iree/vm/instance.c
+++ b/iree/vm/instance.c
@@ -15,6 +15,7 @@
#include "iree/vm/instance.h"
#include "iree/base/atomics.h"
+#include "iree/base/tracing.h"
#include "iree/vm/builtin_types.h"
struct iree_vm_instance {
@@ -24,24 +25,29 @@
IREE_API_EXPORT iree_status_t IREE_API_CALL iree_vm_instance_create(
iree_allocator_t allocator, iree_vm_instance_t** out_instance) {
+ IREE_TRACE_ZONE_BEGIN(z0);
IREE_ASSERT_ARGUMENT(out_instance);
*out_instance = NULL;
- IREE_RETURN_IF_ERROR(iree_vm_register_builtin_types());
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(z0, iree_vm_register_builtin_types());
iree_vm_instance_t* instance = NULL;
- IREE_RETURN_IF_ERROR(iree_allocator_malloc(
- allocator, sizeof(iree_vm_instance_t), (void**)&instance));
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(allocator, sizeof(iree_vm_instance_t),
+ (void**)&instance));
instance->allocator = allocator;
iree_atomic_ref_count_init(&instance->ref_count);
*out_instance = instance;
+ IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
static void iree_vm_instance_destroy(iree_vm_instance_t* instance) {
+ IREE_TRACE_ZONE_BEGIN(z0);
IREE_ASSERT_ARGUMENT(instance);
iree_allocator_free(instance->allocator, instance);
+ IREE_TRACE_ZONE_END(z0);
}
IREE_API_EXPORT void IREE_API_CALL
diff --git a/iree/vm/module.c b/iree/vm/module.c
index 934896f..54e602f 100644
--- a/iree/vm/module.c
+++ b/iree/vm/module.c
@@ -17,6 +17,7 @@
#include <string.h>
#include "iree/base/atomics.h"
+#include "iree/base/tracing.h"
#include "iree/vm/ref.h"
IREE_API_EXPORT iree_status_t IREE_API_CALL
@@ -138,9 +139,11 @@
IREE_API_EXPORT iree_status_t IREE_API_CALL
iree_vm_module_initialize(iree_vm_module_t* module, void* self) {
+ IREE_TRACE_ZONE_BEGIN(z0);
memset(module, 0, sizeof(iree_vm_module_t));
module->self = self;
iree_atomic_ref_count_init(&module->ref_count);
+ IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
@@ -225,8 +228,10 @@
IREE_API_EXPORT iree_string_view_t IREE_API_CALL
iree_vm_function_reflection_attr(const iree_vm_function_t* function,
iree_string_view_t key) {
+ IREE_TRACE_ZONE_BEGIN(z0);
iree_vm_module_t* module = function->module;
if (!module->get_function_reflection_attr) {
+ IREE_TRACE_ZONE_END(z0);
return iree_string_view_empty();
}
for (int index = 0;; ++index) {
@@ -239,9 +244,11 @@
break;
}
if (iree_string_view_compare(key, index_key) == 0) {
+ IREE_TRACE_ZONE_END(z0);
return index_value;
}
}
+ IREE_TRACE_ZONE_END(z0);
return iree_string_view_empty();
}
diff --git a/iree/vm/stack.c b/iree/vm/stack.c
index 6fe9e36..8acdd88 100644
--- a/iree/vm/stack.c
+++ b/iree/vm/stack.c
@@ -419,6 +419,15 @@
stack->frame_storage_size = new_top;
stack->top = frame_header;
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ // TODO(benvanik): cache source location and query from module.
+ iree_string_view_t function_name = iree_vm_function_name(function);
+ IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, function_name.data,
+ function_name.size);
+ callee_frame->trace_zone = z0;
+ IREE_TRACE_ZONE_APPEND_VALUE(z0, frame_size);
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
if (out_callee_frame) *out_callee_frame = callee_frame;
return iree_ok_status();
}
@@ -435,6 +444,8 @@
stack->top->frame_cleanup_fn(&stack->top->frame);
}
+ IREE_TRACE_ZONE_END(stack->top->frame.trace_zone);
+
// Restore the frame pointer to the caller.
stack->frame_storage_size -= stack->top->frame_size;
stack->top = stack->top->parent;
diff --git a/iree/vm/stack.h b/iree/vm/stack.h
index 3f1c580..98fe693 100644
--- a/iree/vm/stack.h
+++ b/iree/vm/stack.h
@@ -20,6 +20,7 @@
#include "iree/base/alignment.h"
#include "iree/base/api.h"
+#include "iree/base/tracing.h"
#include "iree/vm/module.h"
#include "iree/vm/ref.h"
@@ -85,6 +86,10 @@
// offset (such as in the case of VM bytecode), a block identifier (compiled
// code), etc.
iree_vm_source_offset_t pc;
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+ iree_zone_id_t trace_zone;
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
} iree_vm_stack_frame_t;
// Returns the implementation-defined frame storage associated with |frame|.