blob: a51301a4ffdc035505945f505a2a0e569d131874 [file] [log] [blame]
// Copyright 2019 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "iree/base/tracing.h"
// Textually include the Tracy implementation.
// We do this here instead of relying on an external build target so that we can
// ensure our configuration specified in tracing.h is picked up.
#if IREE_TRACING_FEATURES != 0
#include "TracyClient.cpp"
#endif // IREE_TRACING_FEATURES
#if defined(TRACY_ENABLE) && IREE_TRACING_EXPERIMENTAL_CONTEXT_API
// HACK: tracy doesn't let us at this but we need it in order to create new
// queue contexts. It's an implementation detail we have to take a dependency on
// because tracy does not have an API for what we're doing (yet).
namespace tracy {
moodycamel::ConcurrentQueue<QueueItem>& GetQueue();
} // namespace tracy
#endif // TRACY_ENABLE && IREE_TRACING_EXPERIMENTAL_CONTEXT_API
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
#if defined(TRACY_ENABLE) && defined(IREE_PLATFORM_WINDOWS)
static HANDLE iree_dbghelp_mutex;
void IREEDbgHelpInit(void) {
iree_dbghelp_mutex = CreateMutex(NULL, FALSE, NULL);
}
void IREEDbgHelpLock(void) {
WaitForSingleObject(iree_dbghelp_mutex, INFINITE);
}
void IREEDbgHelpUnlock(void) { ReleaseMutex(iree_dbghelp_mutex); }
#endif // TRACY_ENABLE && IREE_PLATFORM_WINDOWS
#if IREE_TRACING_FEATURES != 0
typedef struct iree_tracing_source_file_t {
uint8_t* filename;
size_t filename_length;
uint8_t* content;
size_t content_length;
} iree_tracing_source_file_t;
// Global registry of published source files allocated using the Tracy allocator
// and live for the entire lifetime of the program as Tracy will request the
// contents long past tear-down.
typedef struct iree_tracing_source_file_storage_t {
tracy::TracyMutex mutex;
iree_host_size_t capacity;
iree_host_size_t count;
iree_tracing_source_file_t** files;
} iree_tracing_source_file_storage_t;
static iree_tracing_source_file_storage_t iree_tracing_source_file_storage;
static char* iree_tracing_tracy_source_file_callback(void* user_data,
const char* filename,
size_t& out_size) {
iree_tracing_source_file_storage_t* storage =
(iree_tracing_source_file_storage_t*)user_data;
const iree_host_size_t filename_length = strlen(filename);
char* content_copy = NULL;
iree_host_size_t content_length = 0;
storage->mutex.lock();
for (iree_host_size_t i = 0; i < storage->count; ++i) {
iree_tracing_source_file_t* source_file = storage->files[i];
if (filename_length != source_file->filename_length) continue;
// NOTE: no case-insensitive/fuzzy comparison (yet). The paths are
// generated by the compiler in the same place and they should always line
// up but if we start embedding arbitrary user files we may need to
// normalize paths.
if (memcmp(filename, source_file->filename, filename_length) == 0) {
content_copy =
(char*)tracy::tracy_malloc_fast(source_file->content_length);
memcpy(content_copy, source_file->content, source_file->content_length);
content_length = source_file->content_length;
break;
}
}
storage->mutex.unlock();
out_size = content_length;
return content_copy;
}
void iree_tracing_tracy_initialize() {
// Register a single source provider callback with Tracy. Tracy only supports
// one at a time and the callback must remain valid until program exit.
tracy::Profiler::SourceCallbackRegister(
iree_tracing_tracy_source_file_callback,
&iree_tracing_source_file_storage);
}
void iree_tracing_tracy_deinitialize() {
#if defined(IREE_PLATFORM_APPLE)
// Synchronously shut down the profiler service.
// This is required on some platforms to support TRACY_NO_EXIT=1 such as
// MacOS and iOS. It should be harmless on other platforms as it returns
// quickly if TRACY_NO_EXIT=1 is not set.
// See: https://github.com/wolfpld/tracy/issues/8
tracy::GetProfiler().RequestShutdown();
while (!tracy::GetProfiler().HasShutdownFinished()) {
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
#endif // IREE_PLATFORM_*
}
void iree_tracing_publish_source_file(const void* filename,
size_t filename_length,
const void* content,
size_t content_length) {
iree_tracing_source_file_storage_t* storage =
&iree_tracing_source_file_storage;
// NOTE: this does not currently check to see whether the file has already
// been published. We could but in most valid usage we don't need to.
// Allocate storage for the file - we do this as a single alloc of the entry
// with the filename and content tacked on.
size_t total_size =
sizeof(iree_tracing_source_file_t) + filename_length + content_length;
uint8_t* entry_ptr = (uint8_t*)tracy::tracy_malloc_fast(total_size);
iree_tracing_source_file_t* source_file =
(iree_tracing_source_file_t*)entry_ptr;
source_file->filename = entry_ptr + sizeof(*source_file);
source_file->filename_length = filename_length;
memcpy(source_file->filename, filename, filename_length);
source_file->content = source_file->filename + filename_length;
source_file->content_length = content_length;
memcpy(source_file->content, content, content_length);
storage->mutex.lock();
// Grow capacity of the storage index if needed.
if (storage->count + 1 >= storage->capacity) {
storage->capacity = std::max((iree_host_size_t)32, storage->capacity * 2);
storage->files = (iree_tracing_source_file_t**)tracy::tracy_realloc(
storage->files, storage->capacity * sizeof(*storage->files));
}
// Append the file.
storage->files[storage->count++] = source_file;
storage->mutex.unlock();
}
iree_zone_id_t iree_tracing_zone_begin_impl(
const iree_tracing_location_t* src_loc, const char* name,
size_t name_length) {
const iree_zone_id_t zone_id = tracy::GetProfiler().GetNextZoneId();
#ifndef TRACY_NO_VERIFY
{
TracyQueuePrepareC(tracy::QueueType::ZoneValidation);
tracy::MemWrite(&item->zoneValidation.id, zone_id);
TracyQueueCommitC(zoneValidationThread);
}
#endif // TRACY_NO_VERIFY
{
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
TracyQueuePrepareC(tracy::QueueType::ZoneBeginCallstack);
#else
TracyQueuePrepareC(tracy::QueueType::ZoneBegin);
#endif // IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
tracy::MemWrite(&item->zoneBegin.time, tracy::Profiler::GetTime());
tracy::MemWrite(&item->zoneBegin.srcloc,
reinterpret_cast<uint64_t>(src_loc));
TracyQueueCommitC(zoneBeginThread);
}
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
tracy::GetProfiler().SendCallstack(IREE_TRACING_MAX_CALLSTACK_DEPTH);
#endif // IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
if (name_length) {
#ifndef TRACY_NO_VERIFY
{
TracyQueuePrepareC(tracy::QueueType::ZoneValidation);
tracy::MemWrite(&item->zoneValidation.id, zone_id);
TracyQueueCommitC(zoneValidationThread);
}
#endif // TRACY_NO_VERIFY
auto name_ptr = reinterpret_cast<char*>(tracy::tracy_malloc(name_length));
memcpy(name_ptr, name, name_length);
TracyQueuePrepareC(tracy::QueueType::ZoneName);
tracy::MemWrite(&item->zoneTextFat.text,
reinterpret_cast<uint64_t>(name_ptr));
tracy::MemWrite(&item->zoneTextFat.size,
static_cast<uint64_t>(name_length));
TracyQueueCommitC(zoneTextFatThread);
}
return zone_id;
}
iree_zone_id_t iree_tracing_zone_begin_external_impl(
const char* file_name, size_t file_name_length, uint32_t line,
const char* function_name, size_t function_name_length, const char* name,
size_t name_length) {
uint64_t src_loc = tracy::Profiler::AllocSourceLocation(
line, file_name, file_name_length, function_name, function_name_length,
name, name_length);
const iree_zone_id_t zone_id = tracy::GetProfiler().GetNextZoneId();
#ifndef TRACY_NO_VERIFY
{
TracyQueuePrepareC(tracy::QueueType::ZoneValidation);
tracy::MemWrite(&item->zoneValidation.id, zone_id);
TracyQueueCommitC(zoneValidationThread);
}
#endif // TRACY_NO_VERIFY
{
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
TracyQueuePrepareC(tracy::QueueType::ZoneBeginAllocSrcLocCallstack);
#else
TracyQueuePrepareC(tracy::QueueType::ZoneBeginAllocSrcLoc);
#endif // IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
tracy::MemWrite(&item->zoneBegin.time, tracy::Profiler::GetTime());
tracy::MemWrite(&item->zoneBegin.srcloc, src_loc);
TracyQueueCommitC(zoneBeginThread);
}
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
tracy::GetProfiler().SendCallstack(IREE_TRACING_MAX_CALLSTACK_DEPTH);
#endif // IREE_TRACING_FEATURE_INSTRUMENTATION_CALLSTACKS
return zone_id;
}
void iree_tracing_zone_end(iree_zone_id_t zone_id) {
___tracy_emit_zone_end(iree_tracing_make_zone_ctx(zone_id));
}
void iree_tracing_set_plot_type_impl(const char* name_literal,
uint8_t plot_type, bool step, bool fill,
uint32_t color) {
tracy::Profiler::ConfigurePlot(name_literal,
static_cast<tracy::PlotFormatType>(plot_type),
step, fill, color);
}
void iree_tracing_plot_value_i64_impl(const char* name_literal, int64_t value) {
tracy::Profiler::PlotData(name_literal, value);
}
void iree_tracing_plot_value_f32_impl(const char* name_literal, float value) {
tracy::Profiler::PlotData(name_literal, value);
}
void iree_tracing_plot_value_f64_impl(const char* name_literal, double value) {
tracy::Profiler::PlotData(name_literal, value);
}
void iree_tracing_mutex_announce(const iree_tracing_location_t* src_loc,
uint32_t* out_lock_id) {
uint32_t lock_id =
tracy::GetLockCounter().fetch_add(1, std::memory_order_relaxed);
assert(lock_id != std::numeric_limits<uint32_t>::max());
*out_lock_id = lock_id;
auto item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::LockAnnounce);
tracy::MemWrite(&item->lockAnnounce.id, lock_id);
tracy::MemWrite(&item->lockAnnounce.time, tracy::Profiler::GetTime());
tracy::MemWrite(&item->lockAnnounce.lckloc,
reinterpret_cast<uint64_t>(src_loc));
tracy::MemWrite(&item->lockAnnounce.type, tracy::LockType::Lockable);
tracy::Profiler::QueueSerialFinish();
}
void iree_tracing_mutex_terminate(uint32_t lock_id) {
auto item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::LockTerminate);
tracy::MemWrite(&item->lockTerminate.id, lock_id);
tracy::MemWrite(&item->lockTerminate.time, tracy::Profiler::GetTime());
tracy::Profiler::QueueSerialFinish();
}
void iree_tracing_mutex_before_lock(uint32_t lock_id) {
auto item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::LockWait);
tracy::MemWrite(&item->lockWait.thread, tracy::GetThreadHandle());
tracy::MemWrite(&item->lockWait.id, lock_id);
tracy::MemWrite(&item->lockWait.time, tracy::Profiler::GetTime());
tracy::Profiler::QueueSerialFinish();
}
void iree_tracing_mutex_after_lock(uint32_t lock_id) {
auto item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::LockObtain);
tracy::MemWrite(&item->lockObtain.thread, tracy::GetThreadHandle());
tracy::MemWrite(&item->lockObtain.id, lock_id);
tracy::MemWrite(&item->lockObtain.time, tracy::Profiler::GetTime());
tracy::Profiler::QueueSerialFinish();
}
void iree_tracing_mutex_after_try_lock(uint32_t lock_id, bool was_acquired) {
if (was_acquired) {
iree_tracing_mutex_after_lock(lock_id);
}
}
void iree_tracing_mutex_after_unlock(uint32_t lock_id) {
auto item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::LockRelease);
tracy::MemWrite(&item->lockReleaseShared.thread, tracy::GetThreadHandle());
tracy::MemWrite(&item->lockRelease.id, lock_id);
tracy::MemWrite(&item->lockRelease.time, tracy::Profiler::GetTime());
tracy::Profiler::QueueSerialFinish();
}
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
int64_t iree_tracing_time(void) { return tracy::Profiler::GetTime(); }
int64_t iree_tracing_frequency(void) { return tracy::GetFrequencyQpc(); }
uint8_t iree_tracing_gpu_context_allocate(iree_tracing_gpu_context_type_t type,
const char* name, size_t name_length,
bool is_calibrated,
uint64_t cpu_timestamp,
uint64_t gpu_timestamp,
float timestamp_period) {
// Allocate the process-unique GPU context ID. There's a max of 255 available;
// if we are recreating devices a lot we may exceed that. Don't do that, or
// wrap around and get weird (but probably still usable) numbers.
uint8_t context_id =
tracy::GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed);
if (context_id >= 255) {
context_id %= 255;
}
uint8_t context_flags = 0;
if (is_calibrated) {
// Tell tracy we'll be passing calibrated timestamps and not to mess with
// the times. We'll periodically send GpuCalibration events in case the
// times drift.
context_flags |= tracy::GpuContextCalibration;
}
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuNewContext);
tracy::MemWrite(&item->gpuNewContext.cpuTime, cpu_timestamp);
tracy::MemWrite(&item->gpuNewContext.gpuTime, gpu_timestamp);
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
tracy::MemWrite(&item->gpuNewContext.period, timestamp_period);
tracy::MemWrite(&item->gpuNewContext.context, context_id);
tracy::MemWrite(&item->gpuNewContext.flags, context_flags);
tracy::MemWrite(&item->gpuNewContext.type, (tracy::GpuContextType)type);
tracy::Profiler::QueueSerialFinish();
}
// Send the name of the context along.
// NOTE: Tracy will unconditionally free the name so we must clone it here.
// Since internally Tracy will use its own rpmalloc implementation we must
// make sure we allocate from the same source.
char* cloned_name = (char*)tracy::tracy_malloc(name_length);
memcpy(cloned_name, name, name_length);
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuContextName);
tracy::MemWrite(&item->gpuContextNameFat.context, context_id);
tracy::MemWrite(&item->gpuContextNameFat.ptr, (uint64_t)cloned_name);
tracy::MemWrite(&item->gpuContextNameFat.size, name_length);
tracy::Profiler::QueueSerialFinish();
}
return context_id;
}
void iree_tracing_gpu_context_calibrate(uint8_t context_id, int64_t cpu_delta,
int64_t cpu_timestamp,
int64_t gpu_timestamp) {
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuCalibration);
tracy::MemWrite(&item->gpuCalibration.gpuTime, gpu_timestamp);
tracy::MemWrite(&item->gpuCalibration.cpuTime, cpu_timestamp);
tracy::MemWrite(&item->gpuCalibration.cpuDelta, cpu_delta);
tracy::MemWrite(&item->gpuCalibration.context, context_id);
tracy::Profiler::QueueSerialFinish();
}
void iree_tracing_gpu_zone_begin(uint8_t context_id, uint16_t query_id,
const iree_tracing_location_t* src_loc) {
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuZoneBeginSerial);
tracy::MemWrite(&item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime());
tracy::MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)src_loc);
tracy::MemWrite(&item->gpuZoneBegin.thread, tracy::GetThreadHandle());
tracy::MemWrite(&item->gpuZoneBegin.queryId, query_id);
tracy::MemWrite(&item->gpuZoneBegin.context, context_id);
tracy::Profiler::QueueSerialFinish();
}
void iree_tracing_gpu_zone_begin_external(
uint8_t context_id, uint16_t query_id, const char* file_name,
size_t file_name_length, uint32_t line, const char* function_name,
size_t function_name_length, const char* name, size_t name_length) {
const auto src_loc = tracy::Profiler::AllocSourceLocation(
line, file_name, file_name_length, function_name, function_name_length,
name, name_length);
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type,
tracy::QueueType::GpuZoneBeginAllocSrcLocSerial);
tracy::MemWrite(&item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime());
tracy::MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)src_loc);
tracy::MemWrite(&item->gpuZoneBegin.thread, tracy::GetThreadHandle());
tracy::MemWrite(&item->gpuZoneBegin.queryId, query_id);
tracy::MemWrite(&item->gpuZoneBegin.context, context_id);
tracy::Profiler::QueueSerialFinish();
}
void iree_tracing_gpu_zone_end(uint8_t context_id, uint16_t query_id) {
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuZoneEndSerial);
tracy::MemWrite(&item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime());
tracy::MemWrite(&item->gpuZoneEnd.thread, tracy::GetThreadHandle());
tracy::MemWrite(&item->gpuZoneEnd.queryId, query_id);
tracy::MemWrite(&item->gpuZoneEnd.context, context_id);
tracy::Profiler::QueueSerialFinish();
}
void iree_tracing_gpu_zone_notify(uint8_t context_id, uint16_t query_id,
int64_t gpu_timestamp) {
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuTime);
tracy::MemWrite(&item->gpuTime.gpuTime, gpu_timestamp);
tracy::MemWrite(&item->gpuTime.queryId, query_id);
tracy::MemWrite(&item->gpuTime.context, context_id);
tracy::Profiler::QueueSerialFinish();
}
#endif // IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_ALLOCATION_TRACKING
void* iree_tracing_obscure_ptr(void* ptr) { return ptr; }
#endif // IREE_TRACING_FEATURE_ALLOCATION_TRACKING
//===----------------------------------------------------------------------===//
// Experimental Tracing Interop API
//===----------------------------------------------------------------------===//
#if IREE_TRACING_EXPERIMENTAL_CONTEXT_API
struct iree_tracing_context_t {
inline static std::atomic<uint32_t> next_tracing_thread_id{0x80000000u};
tracy::moodycamel::ProducerToken token_detail;
tracy::ProducerWrapper token;
uint32_t thread_id = 0;
iree_tracing_context_t()
: token_detail(tracy::GetQueue()),
token({tracy::GetQueue().get_explicit_producer(token_detail)}),
thread_id(iree_tracing_context_t::next_tracing_thread_id++) {
token.ptr->threadId = thread_id;
}
};
#define IREE_TRACING_CONTEXT_BEGIN_WRITE(context, queue_type) \
tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
tracy::moodycamel::ConcurrentQueue<tracy::QueueItem>::ExplicitProducer* \
__token = (context)->token.ptr; \
auto& __tail = __token->get_tail_index(); \
auto item = __token->enqueue_begin(__magic); \
tracy::MemWrite(&item->hdr.type, (queue_type));
#define IREE_TRACING_CONTEXT_END_WRITE(context) \
__tail.store(__magic + 1, std::memory_order_release);
iree_tracing_context_t* iree_tracing_context_allocate(
const char* name, iree_host_size_t name_length) {
iree_tracing_context_t* context = new iree_tracing_context_t();
// TODO(benvanik): upstream a tracy::Profiler::SetThreadNameWithHint that
// only updates the GetThreadNameData() linked list with a new entry. Today
// there's no way to set the thread name explicitly.
return context;
}
void iree_tracing_context_free(iree_tracing_context_t* context) {
if (context) delete context;
}
void iree_tracing_context_calibrate_executor(
iree_tracing_context_t* context, iree_tracing_executor_id_t executor_id,
int64_t cpu_delta, uint64_t host_timestamp, uint64_t executor_timestamp) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::GpuCalibration);
tracy::MemWrite(&item->gpuCalibration.gpuTime, executor_timestamp);
tracy::MemWrite(&item->gpuCalibration.cpuTime, host_timestamp);
tracy::MemWrite(&item->gpuCalibration.cpuDelta, cpu_delta);
tracy::MemWrite(&item->gpuCalibration.context, executor_id);
IREE_TRACING_CONTEXT_END_WRITE(context);
}
void iree_tracing_context_zone_begin(iree_tracing_context_t* context,
uint64_t timestamp,
const iree_tracing_location_t* src_loc) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::ZoneBegin);
tracy::MemWrite(&item->zoneBegin.time, timestamp);
tracy::MemWrite(&item->zoneBegin.srcloc, reinterpret_cast<uint64_t>(src_loc));
IREE_TRACING_CONTEXT_END_WRITE(context);
}
void iree_tracing_context_zone_end(iree_tracing_context_t* context,
uint64_t timestamp) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::ZoneEnd);
tracy::MemWrite(&item->zoneEnd.time, timestamp);
IREE_TRACING_CONTEXT_END_WRITE(context);
}
void iree_tracing_context_zone_value_i64(iree_tracing_context_t* context,
uint64_t value) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::ZoneValue);
tracy::MemWrite(&item->zoneValue.value, value);
IREE_TRACING_CONTEXT_END_WRITE(context);
}
void iree_tracing_context_zone_value_text_literal(
iree_tracing_context_t* context, const char* value) {
// NOTE: no literal tracing support, have to use the slow path.
iree_tracing_context_zone_value_text_dynamic(context, value, strlen(value));
}
void iree_tracing_context_zone_value_text_dynamic(
iree_tracing_context_t* context, const char* value,
iree_host_size_t value_length) {
auto ptr = (char*)tracy::tracy_malloc(value_length);
memcpy(ptr, value, value_length);
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::ZoneText);
tracy::MemWrite(&item->zoneTextFat.text, (uint64_t)ptr);
tracy::MemWrite(&item->zoneTextFat.size, (uint16_t)value_length);
IREE_TRACING_CONTEXT_END_WRITE(context);
}
// TODO(benvanik): figure out why serial recording works with GPU zones and
// thread-local recording doesn't (sometimes?). May be timing related.
#define IREE_TRACING_CONTEXT_SERIAL_FALLBACK 1
void iree_tracing_context_execution_zone_begin(
iree_tracing_context_t* context, uint64_t timestamp,
const iree_tracing_location_t* src_loc,
iree_tracing_executor_id_t executor_id, iree_tracing_query_id_t query_id) {
#if IREE_TRACING_CONTEXT_SERIAL_FALLBACK
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuZoneBeginSerial);
tracy::MemWrite(&item->gpuZoneBegin.cpuTime, timestamp);
tracy::MemWrite(&item->gpuZoneBegin.srcloc, src_loc);
tracy::MemWrite(&item->gpuZoneBegin.thread, context->thread_id);
tracy::MemWrite(&item->gpuZoneBegin.queryId, query_id);
tracy::MemWrite(&item->gpuZoneBegin.context, executor_id);
tracy::Profiler::QueueSerialFinish();
#else
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::GpuZoneBegin);
tracy::MemWrite(&item->gpuZoneBegin.cpuTime, timestamp);
tracy::MemWrite(&item->gpuZoneBegin.thread, context->thread_id);
tracy::MemWrite(&item->gpuZoneBegin.queryId, query_id);
tracy::MemWrite(&item->gpuZoneBegin.context, executor_id);
tracy::MemWrite(&item->gpuZoneBegin.srcloc, src_loc);
IREE_TRACING_CONTEXT_END_WRITE(context);
#endif // IREE_TRACING_CONTEXT_SERIAL_FALLBACK
}
void iree_tracing_context_execution_zone_end(
iree_tracing_context_t* context, uint64_t timestamp,
iree_tracing_executor_id_t executor_id, iree_tracing_query_id_t query_id) {
#if IREE_TRACING_CONTEXT_SERIAL_FALLBACK
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuZoneEndSerial);
tracy::MemWrite(&item->gpuZoneEnd.cpuTime, timestamp);
tracy::MemWrite(&item->gpuZoneEnd.thread, context->thread_id);
tracy::MemWrite(&item->gpuZoneEnd.queryId, query_id);
tracy::MemWrite(&item->gpuZoneEnd.context, executor_id);
tracy::Profiler::QueueSerialFinish();
#else
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::GpuZoneEnd);
tracy::MemWrite(&item->gpuZoneEnd.cpuTime, timestamp);
tracy::MemWrite(&item->gpuZoneEnd.thread, context->thread_id);
tracy::MemWrite(&item->gpuZoneEnd.queryId, query_id);
tracy::MemWrite(&item->gpuZoneEnd.context, executor_id);
IREE_TRACING_CONTEXT_END_WRITE(context);
#endif // IREE_TRACING_CONTEXT_SERIAL_FALLBACK
}
void iree_tracing_context_execution_zone_notify(
iree_tracing_context_t* context, iree_tracing_executor_id_t executor_id,
iree_tracing_query_id_t query_id, uint64_t query_timestamp) {
#if IREE_TRACING_CONTEXT_SERIAL_FALLBACK
iree_tracing_gpu_zone_notify(executor_id, query_id, query_timestamp);
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuTime);
tracy::MemWrite(&item->gpuTime.gpuTime, query_timestamp);
tracy::MemWrite(&item->gpuTime.queryId, query_id);
tracy::MemWrite(&item->gpuTime.context, executor_id);
tracy::Profiler::QueueSerialFinish();
#else
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::GpuTime);
tracy::MemWrite(&item->gpuTime.gpuTime, query_timestamp);
tracy::MemWrite(&item->gpuTime.queryId, query_id);
tracy::MemWrite(&item->gpuTime.context, executor_id);
IREE_TRACING_CONTEXT_END_WRITE(context);
#endif // IREE_TRACING_CONTEXT_SERIAL_FALLBACK
}
void iree_tracing_context_memory_alloc(iree_tracing_context_t* context,
uint64_t timestamp, const char* pool,
uint64_t ptr, uint64_t size) {
// TODO(benvanik): add a thread override to MemAllocNamed - it does shady
// things with m_memNamePayload that we can't easily replicate outside of the
// tracy implementation.
}
void iree_tracing_context_memory_free(iree_tracing_context_t* context,
uint64_t timestamp, const char* pool,
uint64_t ptr) {
// TODO(benvanik): add a thread override to MemFreeNamed- it does shady
// things with m_memNamePayload that we can't easily replicate outside of the
// tracy implementation.
}
void iree_tracing_context_message_literal(iree_tracing_context_t* context,
uint64_t timestamp,
const char* value) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::MessageLiteral);
tracy::MemWrite(&item->messageLiteral.time, timestamp);
tracy::MemWrite(&item->messageLiteral.text, (uint64_t)value);
IREE_TRACING_CONTEXT_END_WRITE(context);
}
void iree_tracing_context_message_dynamic(iree_tracing_context_t* context,
uint64_t timestamp, const char* value,
iree_host_size_t value_length) {
auto ptr = (char*)tracy::tracy_malloc(value_length);
memcpy(ptr, value, value_length);
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::Message);
tracy::MemWrite(&item->messageFat.time, timestamp);
tracy::MemWrite(&item->messageFat.text, (uint64_t)ptr);
tracy::MemWrite(&item->messageFat.size, (uint16_t)value_length);
IREE_TRACING_CONTEXT_END_WRITE(context);
}
void iree_tracing_context_plot_config(iree_tracing_context_t* context,
const char* name_literal, uint8_t type,
bool step, bool fill, uint32_t color) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::PlotConfig);
tracy::MemWrite(&item->plotConfig.name, (uint64_t)name_literal);
tracy::MemWrite(&item->plotConfig.type, (uint8_t)type);
tracy::MemWrite(&item->plotConfig.step, (uint8_t)step);
tracy::MemWrite(&item->plotConfig.fill, (uint8_t)fill);
tracy::MemWrite(&item->plotConfig.color, color);
IREE_TRACING_CONTEXT_END_WRITE(context);
}
void iree_tracing_context_plot_value_i64(iree_tracing_context_t* context,
uint64_t timestamp,
const char* plot_name, int64_t value) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::PlotDataInt);
tracy::MemWrite(&item->plotDataInt.name, (uint64_t)plot_name);
tracy::MemWrite(&item->plotDataInt.time, timestamp);
tracy::MemWrite(&item->plotDataInt.val, value);
IREE_TRACING_CONTEXT_END_WRITE(context);
}
#endif // IREE_TRACING_EXPERIMENTAL_CONTEXT_API
#endif // IREE_TRACING_FEATURES
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus