[hip][cuda] Added finer grained tracing options to hip. (#18180)
This allows us to specify the verbosity at which we want
device-side tracing. Command-buffer level tracing has
a significantly lower overhead if we do not need
individual kernel timing.
---------
Signed-off-by: Andrew Woloszyn <andrew.woloszyn@gmail.com>
diff --git a/runtime/src/iree/hal/drivers/cuda/api.h b/runtime/src/iree/hal/drivers/cuda/api.h
index a53ada0..d402422 100644
--- a/runtime/src/iree/hal/drivers/cuda/api.h
+++ b/runtime/src/iree/hal/drivers/cuda/api.h
@@ -77,15 +77,14 @@
// Specifies how command buffers are recorded and executed.
iree_hal_cuda_command_buffer_mode_t command_buffer_mode;
- // Enables tracing of command buffers when IREE tracing is enabled.
- // May take advantage of additional extensions for more accurate timing or
- // hardware-specific performance counters.
+ // Controls the verbosity of command buffers tracing when when IREE
+ // tracing is enabled.
//
// NOTE: tracing has a non-trivial overhead and will skew the timing of
- // submissions and introduce false barriers between dispatches. Use this to
- // identify slow dispatches and refine from there; be wary of whole-program
- // tracing with this enabled.
- bool stream_tracing;
+ // submissions and may introduce false barriers between dispatches.
+ // Use this to identify slow dispatches and command buffers and refine
+ // from there; be wary of whole-program tracing with this enabled.
+ int32_t stream_tracing;
// Whether to use async allocations even if reported as available by the
// device. Defaults to true when the device supports it.
diff --git a/runtime/src/iree/hal/drivers/cuda/cuda_device.c b/runtime/src/iree/hal/drivers/cuda/cuda_device.c
index a53f381..30cccca 100644
--- a/runtime/src/iree/hal/drivers/cuda/cuda_device.c
+++ b/runtime/src/iree/hal/drivers/cuda/cuda_device.c
@@ -277,7 +277,7 @@
out_params->event_pool_capacity = 32;
out_params->queue_count = 1;
out_params->command_buffer_mode = IREE_HAL_CUDA_COMMAND_BUFFER_MODE_GRAPH;
- out_params->stream_tracing = false;
+ out_params->stream_tracing = 0;
out_params->async_allocations = true;
}
@@ -346,9 +346,18 @@
// Enable tracing for the (currently only) stream - no-op if disabled.
if (iree_status_is_ok(status) && device->params.stream_tracing) {
+ if (device->params.stream_tracing >= IREE_HAL_CUDA_TRACING_VERBOSITY_MAX ||
+ device->params.stream_tracing < IREE_HAL_CUDA_TRACING_VERBOSITY_OFF) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "invalid stream_tracing argument: expected to be between %d and %d",
+ IREE_HAL_CUDA_TRACING_VERBOSITY_OFF,
+ IREE_HAL_CUDA_TRACING_VERBOSITY_MAX);
+ }
status = iree_hal_cuda_tracing_context_allocate(
device->cuda_symbols, device->identifier, dispatch_stream,
- &device->block_pool, host_allocator, &device->tracing_context);
+ device->params.stream_tracing, &device->block_pool, host_allocator,
+ &device->tracing_context);
}
// Memory pool support is conditional.
diff --git a/runtime/src/iree/hal/drivers/cuda/graph_command_buffer.c b/runtime/src/iree/hal/drivers/cuda/graph_command_buffer.c
index 68d4d34..e5b88df 100644
--- a/runtime/src/iree/hal/drivers/cuda/graph_command_buffer.c
+++ b/runtime/src/iree/hal/drivers/cuda/graph_command_buffer.c
@@ -82,9 +82,10 @@
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
static void iree_cuda_graph_command_buffer_trace_zone_begin_external(
- iree_hal_cuda_graph_command_buffer_t* command_buffer, const char* file_name,
- size_t file_name_length, uint32_t line, const char* function_name,
- size_t function_name_length, const char* name, size_t name_length) {
+ iree_hal_cuda_graph_command_buffer_t* command_buffer, int32_t verbosity,
+ const char* file_name, size_t file_name_length, uint32_t line,
+ const char* function_name, size_t function_name_length, const char* name,
+ size_t name_length) {
// Make sure there are no new nodes after the last barrier.
// Work should start after the event.
if (IREE_UNLIKELY(command_buffer->graph_node_count != 0)) {
@@ -97,7 +98,7 @@
size_t dependency_count = command_buffer->cu_barrier_node ? 1 : 0;
IREE_CUDA_GRAPH_TRACE_ZONE_BEGIN_EXTERNAL(
command_buffer->tracing_context, &command_buffer->tracing_event_list,
- tracing_event_node, command_buffer->cu_graph,
+ tracing_event_node, command_buffer->cu_graph, verbosity,
&command_buffer->cu_barrier_node, dependency_count, file_name,
file_name_length, line, function_name, function_name_length, name,
name_length);
@@ -109,7 +110,7 @@
}
static void iree_cuda_graph_command_buffer_trace_zone_end(
- iree_hal_cuda_graph_command_buffer_t* command_buffer) {
+ iree_hal_cuda_graph_command_buffer_t* command_buffer, int32_t verbosity) {
// Make sure there are no new nodes after the last barrier.
// Prior work should end before the tracing event is recorded.
if (IREE_UNLIKELY(command_buffer->graph_node_count != 0)) {
@@ -124,7 +125,7 @@
"ending a zone should at least depend on the beginning");
IREE_CUDA_GRAPH_TRACE_ZONE_END(
command_buffer->tracing_context, &command_buffer->tracing_event_list,
- tracing_event_node, command_buffer->cu_graph,
+ tracing_event_node, command_buffer->cu_graph, verbosity,
&command_buffer->cu_barrier_node, dependency_count);
// We need to wait on the tracing end before other work starts.
@@ -132,27 +133,29 @@
command_buffer->cu_barrier_node = *tracing_event_node;
}
-#define IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL( \
- command_buffer, file_name, file_name_length, line, function_name, \
- function_name_length, name, name_length) \
- iree_cuda_graph_command_buffer_trace_zone_begin_external( \
- command_buffer, file_name, file_name_length, line, function_name, \
- function_name_length, name, name_length)
-#define IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer) \
+#define IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL( \
+ command_buffer, verbosity, file_name, file_name_length, line, \
+ function_name, function_name_length, name, name_length) \
+ iree_cuda_graph_command_buffer_trace_zone_begin_external( \
+ command_buffer, verbosity, file_name, file_name_length, line, \
+ function_name, function_name_length, name, name_length)
+#define IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer, \
+ verbosity) \
IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL( \
- command_buffer, /*file_name=*/NULL, 0, /*line=*/0, __FUNCTION__, \
- strlen(__FUNCTION__), /*name=*/NULL, 0)
-#define IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer) \
- iree_cuda_graph_command_buffer_trace_zone_end(command_buffer)
+ command_buffer, verbosity, /*file_name=*/NULL, 0, /*line=*/0, \
+ __FUNCTION__, strlen(__FUNCTION__), /*name=*/NULL, 0)
+#define IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer, \
+ verbosity) \
+ iree_cuda_graph_command_buffer_trace_zone_end(command_buffer, verbosity)
#else // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
-#define IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL( \
- command_buffer, file_name, file_name_length, line, function_name, \
- function_name_length, name, name_length)
-#define IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer)
-#define IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer)
-
+#define IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL( \
+ command_buffer, verbosity, file_name, file_name_length, line, \
+ function_name, function_name_length, name, name_length)
+#define IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer, \
+ verbosity)
+#define IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer, verbosity)
#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
iree_status_t iree_hal_cuda_graph_command_buffer_create(
@@ -335,7 +338,8 @@
command_buffer->symbols,
cuGraphCreate(&command_buffer->cu_graph, /*flags=*/0), "cuGraphCreate");
- IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer);
+ IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_COARSE);
return iree_ok_status();
}
@@ -349,7 +353,8 @@
IREE_RETURN_IF_ERROR(
iree_hal_cuda_graph_command_buffer_flush_collectives(command_buffer));
- IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_COARSE);
// Reset state used during recording.
command_buffer->cu_barrier_node = NULL;
@@ -384,8 +389,9 @@
(void)command_buffer;
IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL(
- command_buffer, location ? location->file.data : NULL,
- location ? location->file.size : 0, location ? location->line : 0,
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_COARSE,
+ location ? location->file.data : NULL, location ? location->file.size : 0,
+ location ? location->line : 0,
/*func_name=*/NULL, 0, label.data, label.size);
}
@@ -394,7 +400,8 @@
iree_hal_cuda_graph_command_buffer_t* command_buffer =
iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
(void)command_buffer;
- IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_COARSE);
}
static iree_status_t
@@ -507,7 +514,8 @@
iree_hal_cuda_graph_command_buffer_t* command_buffer =
iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
- IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer);
+ IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_cuda_graph_command_buffer_flush_collectives(command_buffer));
@@ -546,7 +554,8 @@
dependency_count, ¶ms, command_buffer->cu_context),
"cuGraphAddMemsetNode");
- IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
@@ -557,7 +566,8 @@
iree_hal_cuda_graph_command_buffer_t* command_buffer =
iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
- IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer);
+ IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_cuda_graph_command_buffer_flush_collectives(command_buffer));
@@ -608,7 +618,8 @@
dependency_count, ¶ms, command_buffer->cu_context),
"cuGraphAddMemcpyNode");
- IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
@@ -619,7 +630,8 @@
iree_hal_cuda_graph_command_buffer_t* command_buffer =
iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
- IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer);
+ IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_cuda_graph_command_buffer_flush_collectives(command_buffer));
@@ -666,7 +678,8 @@
dependency_count, ¶ms, command_buffer->cu_context),
"cuGraphAddMemcpyNode");
- IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
@@ -763,9 +776,10 @@
executable, entry_point, &kernel_info));
IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL(
- command_buffer, kernel_info.source_filename.data,
- kernel_info.source_filename.size, kernel_info.source_line,
- kernel_info.function_name.data, kernel_info.function_name.size,
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE,
+ kernel_info.source_filename.data, kernel_info.source_filename.size,
+ kernel_info.source_line, kernel_info.function_name.data,
+ kernel_info.function_name.size,
/*name=*/NULL, 0);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
@@ -865,7 +879,8 @@
dependency_count, ¶ms),
"cuGraphAddKernelNode");
- IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
@@ -898,10 +913,10 @@
executable, entry_point, &kernel_info));
IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL(
- command_buffer, kernel_info.source_filename.data,
- kernel_info.source_filename.size, kernel_info.source_line,
- kernel_info.function_name.data, kernel_info.function_name.size,
- /*name=*/NULL, 0);
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE,
+ kernel_info.source_filename.data, kernel_info.source_filename.size,
+ kernel_info.source_line, kernel_info.function_name.data,
+ kernel_info.function_name.size, /*name=*/NULL, 0);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_resource_set_insert(command_buffer->resource_set, 1,
@@ -990,7 +1005,8 @@
dependency_count, ¶ms),
"cuGraphAddKernelNode");
- IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_CUDA_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
diff --git a/runtime/src/iree/hal/drivers/cuda/nccl_channel.c b/runtime/src/iree/hal/drivers/cuda/nccl_channel.c
index e3eb31c..2f6eb3f 100644
--- a/runtime/src/iree/hal/drivers/cuda/nccl_channel.c
+++ b/runtime/src/iree/hal/drivers/cuda/nccl_channel.c
@@ -559,7 +559,8 @@
iree_string_view_t collective_str =
iree_hal_collective_op_format(&entry->op, &string_temp);
IREE_CUDA_STREAM_TRACE_ZONE_BEGIN_EXTERNAL(
- tracing_context, tracing_event_list, stream, __FILE__, strlen(__FILE__),
+ tracing_context, tracing_event_list, stream,
+ IREE_HAL_CUDA_TRACING_VERBOSITY_FINE, __FILE__, strlen(__FILE__),
(uint32_t)__LINE__, __FUNCTION__, strlen(__FUNCTION__),
collective_str.data, collective_str.size);
}
@@ -578,8 +579,8 @@
// End all zones we began above - note that these are just simply nested so
// order doesn't matter so long as we end the right number of zones.
for (iree_host_size_t i = 0; i < batch->count; ++i) {
- IREE_CUDA_STREAM_TRACE_ZONE_END(tracing_context, tracing_event_list,
- stream);
+ IREE_CUDA_STREAM_TRACE_ZONE_END(tracing_context, tracing_event_list, stream,
+ IREE_HAL_CUDA_TRACING_VERBOSITY_FINE);
}
#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
diff --git a/runtime/src/iree/hal/drivers/cuda/registration/driver_module.c b/runtime/src/iree/hal/drivers/cuda/registration/driver_module.c
index bea81bc..2e5bcff 100644
--- a/runtime/src/iree/hal/drivers/cuda/registration/driver_module.c
+++ b/runtime/src/iree/hal/drivers/cuda/registration/driver_module.c
@@ -26,10 +26,14 @@
"Enables CUDA asynchronous stream-ordered allocations when supported.");
IREE_FLAG(
- bool, cuda_tracing, true,
- "Enables tracing of stream events when Tracy instrumentation is enabled.\n"
- "Severely impacts benchmark timings and should only be used when\n"
- "analyzing dispatch timings.");
+ int32_t, cuda_tracing, 2,
+ "Controls the verbosity of tracing when Tracy instrumentation is enabled.\n"
+ "The impact to benchmark timing becomes more severe as the verbosity\n"
+ "increases, and thus should be only enabled when needed.\n"
+ "Permissible values are:\n"
+ " 0 : stream tracing disabled.\n"
+ " 1 : coarse command buffer level tracing enabled.\n"
+ " 2 : fine-grained kernel level tracing enabled.\n");
IREE_FLAG(int32_t, cuda_default_index, 0,
"Specifies the index of the default CUDA device to use");
diff --git a/runtime/src/iree/hal/drivers/cuda/stream_command_buffer.c b/runtime/src/iree/hal/drivers/cuda/stream_command_buffer.c
index a9b50fc..4b8a0b1 100644
--- a/runtime/src/iree/hal/drivers/cuda/stream_command_buffer.c
+++ b/runtime/src/iree/hal/drivers/cuda/stream_command_buffer.c
@@ -182,7 +182,7 @@
IREE_CUDA_STREAM_TRACE_ZONE_BEGIN_EXTERNAL(
command_buffer->tracing_context, &command_buffer->tracing_event_list,
- command_buffer->cu_stream,
+ command_buffer->cu_stream, IREE_HAL_CUDA_TRACING_VERBOSITY_COARSE,
/*file_name=*/NULL, 0, /*line=*/0, "iree_hal_cuda_stream_command_buffer",
strlen("iree_hal_cuda_stream_command_buffer"), /*name=*/NULL, 0);
@@ -217,9 +217,9 @@
command_buffer->resource_set,
&command_buffer->collective_batch);
- IREE_CUDA_STREAM_TRACE_ZONE_END(command_buffer->tracing_context,
- &command_buffer->tracing_event_list,
- command_buffer->cu_stream);
+ IREE_CUDA_STREAM_TRACE_ZONE_END(
+ command_buffer->tracing_context, &command_buffer->tracing_event_list,
+ command_buffer->cu_stream, IREE_HAL_CUDA_TRACING_VERBOSITY_COARSE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
@@ -235,8 +235,9 @@
IREE_CUDA_STREAM_TRACE_ZONE_BEGIN_EXTERNAL(
command_buffer->tracing_context, &command_buffer->tracing_event_list,
- command_buffer->cu_stream, location ? location->file.data : NULL,
- location ? location->file.size : 0, location ? location->line : 0,
+ command_buffer->cu_stream, IREE_HAL_CUDA_TRACING_VERBOSITY_COARSE,
+ location ? location->file.data : NULL, location ? location->file.size : 0,
+ location ? location->line : 0,
/*func_name=*/NULL, 0, label.data, label.size);
// TODO: pass along to CUPTI if available.
@@ -250,9 +251,9 @@
// TODO: pass along to CUPTI if available.
- IREE_CUDA_STREAM_TRACE_ZONE_END(command_buffer->tracing_context,
- &command_buffer->tracing_event_list,
- command_buffer->cu_stream);
+ IREE_CUDA_STREAM_TRACE_ZONE_END(
+ command_buffer->tracing_context, &command_buffer->tracing_event_list,
+ command_buffer->cu_stream, IREE_HAL_CUDA_TRACING_VERBOSITY_COARSE);
}
static iree_status_t iree_hal_cuda_stream_command_buffer_execution_barrier(
@@ -550,9 +551,10 @@
IREE_CUDA_STREAM_TRACE_ZONE_BEGIN_EXTERNAL(
command_buffer->tracing_context, &command_buffer->tracing_event_list,
- command_buffer->cu_stream, kernel_info.source_filename.data,
- kernel_info.source_filename.size, kernel_info.source_line,
- kernel_info.function_name.data, kernel_info.function_name.size,
+ command_buffer->cu_stream, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE,
+ kernel_info.source_filename.data, kernel_info.source_filename.size,
+ kernel_info.source_line, kernel_info.function_name.data,
+ kernel_info.function_name.size,
/*name=*/NULL, 0);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
@@ -634,9 +636,9 @@
params_ptr, NULL),
"cuLaunchKernel");
- IREE_CUDA_STREAM_TRACE_ZONE_END(command_buffer->tracing_context,
- &command_buffer->tracing_event_list,
- command_buffer->cu_stream);
+ IREE_CUDA_STREAM_TRACE_ZONE_END(
+ command_buffer->tracing_context, &command_buffer->tracing_event_list,
+ command_buffer->cu_stream, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
@@ -672,10 +674,10 @@
IREE_CUDA_STREAM_TRACE_ZONE_BEGIN_EXTERNAL(
command_buffer->tracing_context, &command_buffer->tracing_event_list,
- command_buffer->cu_stream, kernel_info.source_filename.data,
- kernel_info.source_filename.size, kernel_info.source_line,
- kernel_info.function_name.data, kernel_info.function_name.size,
- /*name=*/NULL, 0);
+ command_buffer->cu_stream, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE,
+ kernel_info.source_filename.data, kernel_info.source_filename.size,
+ kernel_info.source_line, kernel_info.function_name.data,
+ kernel_info.function_name.size, /*name=*/NULL, 0);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_resource_set_insert(command_buffer->resource_set, 1,
@@ -747,9 +749,9 @@
command_buffer->cu_stream, params_ptr, NULL),
"cuLaunchKernel");
- IREE_CUDA_STREAM_TRACE_ZONE_END(command_buffer->tracing_context,
- &command_buffer->tracing_event_list,
- command_buffer->cu_stream);
+ IREE_CUDA_STREAM_TRACE_ZONE_END(
+ command_buffer->tracing_context, &command_buffer->tracing_event_list,
+ command_buffer->cu_stream, IREE_HAL_CUDA_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
diff --git a/runtime/src/iree/hal/drivers/cuda/tracing.c b/runtime/src/iree/hal/drivers/cuda/tracing.c
index 913ca5d..057fdda 100644
--- a/runtime/src/iree/hal/drivers/cuda/tracing.c
+++ b/runtime/src/iree/hal/drivers/cuda/tracing.c
@@ -69,6 +69,8 @@
uint32_t query_capacity;
+ iree_hal_cuda_tracing_verbosity_t verbosity;
+
// Event pool reused to capture tracing timestamps.
// The lifetime of the events are as follows.
// 1) All events are allocated when the tracing context is created.
@@ -118,6 +120,7 @@
iree_status_t iree_hal_cuda_tracing_context_allocate(
const iree_hal_cuda_dynamic_symbols_t* symbols,
iree_string_view_t queue_name, CUstream stream,
+ iree_hal_cuda_tracing_verbosity_t stream_tracing_verbosity,
iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator,
iree_hal_cuda_tracing_context_t** out_context) {
IREE_TRACE_ZONE_BEGIN(z0);
@@ -138,6 +141,7 @@
context->query_capacity = IREE_ARRAYSIZE(context->event_pool);
context->submitted_event_list.head = NULL;
context->submitted_event_list.tail = NULL;
+ context->verbosity = stream_tracing_verbosity;
iree_slim_mutex_initialize(&context->event_mutex);
}
@@ -364,7 +368,8 @@
// event.
static uint16_t iree_hal_cuda_stream_tracing_context_insert_query(
iree_hal_cuda_tracing_context_t* context,
- iree_hal_cuda_tracing_context_event_list_t* event_list, CUstream stream) {
+ iree_hal_cuda_tracing_context_event_list_t* event_list, CUstream stream,
+ iree_hal_cuda_tracing_verbosity_t verbosity) {
iree_slim_mutex_lock(&context->event_mutex);
IREE_ASSERT_ARGUMENT(event_list);
@@ -392,7 +397,8 @@
static uint16_t iree_hal_cuda_graph_tracing_context_insert_query(
iree_hal_cuda_tracing_context_t* context,
iree_hal_cuda_tracing_context_event_list_t* event_list,
- CUgraphNode* out_node, CUgraph graph, CUgraphNode* dependency_nodes,
+ CUgraphNode* out_node, CUgraph graph,
+ iree_hal_cuda_tracing_verbosity_t verbosity, CUgraphNode* dependency_nodes,
size_t dependency_nodes_count) {
IREE_ASSERT_ARGUMENT(event_list);
iree_slim_mutex_lock(&context->event_mutex);
@@ -426,22 +432,26 @@
void iree_hal_cuda_stream_tracing_zone_begin_impl(
iree_hal_cuda_tracing_context_t* context,
iree_hal_cuda_tracing_context_event_list_t* event_list, CUstream stream,
+ iree_hal_cuda_tracing_verbosity_t verbosity,
const iree_tracing_location_t* src_loc) {
if (!context) return;
+ if (verbosity > context->verbosity) return;
+
uint16_t query_id = iree_hal_cuda_stream_tracing_context_insert_query(
- context, event_list, stream);
+ context, event_list, stream, verbosity);
iree_tracing_gpu_zone_begin(context->id, query_id, src_loc);
}
void iree_hal_cuda_stream_tracing_zone_begin_external_impl(
iree_hal_cuda_tracing_context_t* context,
iree_hal_cuda_tracing_context_event_list_t* event_list, CUstream stream,
- const char* file_name, size_t file_name_length, uint32_t line,
- const char* function_name, size_t function_name_length, const char* name,
- size_t name_length) {
+ iree_hal_cuda_tracing_verbosity_t verbosity, const char* file_name,
+ size_t file_name_length, uint32_t line, const char* function_name,
+ size_t function_name_length, const char* name, size_t name_length) {
if (!context) return;
+ if (verbosity > context->verbosity) return;
uint16_t query_id = iree_hal_cuda_stream_tracing_context_insert_query(
- context, event_list, stream);
+ context, event_list, stream, verbosity);
iree_tracing_gpu_zone_begin_external(context->id, query_id, file_name,
file_name_length, line, function_name,
function_name_length, name, name_length);
@@ -450,13 +460,15 @@
void iree_hal_cuda_graph_tracing_zone_begin_external_impl(
iree_hal_cuda_tracing_context_t* context,
iree_hal_cuda_tracing_context_event_list_t* event_list,
- CUgraphNode* out_node, CUgraph graph, CUgraphNode* dependency_nodes,
+ CUgraphNode* out_node, CUgraph graph,
+ iree_hal_cuda_tracing_verbosity_t verbosity, CUgraphNode* dependency_nodes,
size_t dependency_nodes_count, const char* file_name,
size_t file_name_length, uint32_t line, const char* function_name,
size_t function_name_length, const char* name, size_t name_length) {
if (!context) return;
+ if (verbosity > context->verbosity) return;
uint16_t query_id = iree_hal_cuda_graph_tracing_context_insert_query(
- context, event_list, out_node, graph, dependency_nodes,
+ context, event_list, out_node, graph, verbosity, dependency_nodes,
dependency_nodes_count);
iree_tracing_gpu_zone_begin_external(context->id, query_id, file_name,
file_name_length, line, function_name,
@@ -465,21 +477,25 @@
void iree_hal_cuda_stream_tracing_zone_end_impl(
iree_hal_cuda_tracing_context_t* context,
- iree_hal_cuda_tracing_context_event_list_t* event_list, CUstream stream) {
+ iree_hal_cuda_tracing_context_event_list_t* event_list, CUstream stream,
+ iree_hal_cuda_tracing_verbosity_t verbosity) {
if (!context) return;
+ if (verbosity > context->verbosity) return;
uint16_t query_id = iree_hal_cuda_stream_tracing_context_insert_query(
- context, event_list, stream);
+ context, event_list, stream, verbosity);
iree_tracing_gpu_zone_end(context->id, query_id);
}
void iree_hal_cuda_graph_tracing_zone_end_impl(
iree_hal_cuda_tracing_context_t* context,
iree_hal_cuda_tracing_context_event_list_t* event_list,
- CUgraphNode* out_node, CUgraph graph, CUgraphNode* dependency_nodes,
+ CUgraphNode* out_node, CUgraph graph,
+ iree_hal_cuda_tracing_verbosity_t verbosity, CUgraphNode* dependency_nodes,
size_t dependency_nodes_count) {
if (!context) return;
+ if (verbosity > context->verbosity) return;
uint16_t query_id = iree_hal_cuda_graph_tracing_context_insert_query(
- context, event_list, out_node, graph, dependency_nodes,
+ context, event_list, out_node, graph, verbosity, dependency_nodes,
dependency_nodes_count);
iree_tracing_gpu_zone_end(context->id, query_id);
}
@@ -489,6 +505,7 @@
iree_status_t iree_hal_cuda_tracing_context_allocate(
const iree_hal_cuda_dynamic_symbols_t* symbols,
iree_string_view_t queue_name, CUstream stream,
+ iree_hal_cuda_tracing_verbosity_t stream_tracing_verbosity,
iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator,
iree_hal_cuda_tracing_context_t** out_context) {
*out_context = NULL;
diff --git a/runtime/src/iree/hal/drivers/cuda/tracing.h b/runtime/src/iree/hal/drivers/cuda/tracing.h
index abe468f..1174f77 100644
--- a/runtime/src/iree/hal/drivers/cuda/tracing.h
+++ b/runtime/src/iree/hal/drivers/cuda/tracing.h
@@ -52,11 +52,19 @@
iree_hal_cuda_tracing_context_event_t* tail;
} iree_hal_cuda_tracing_context_event_list_t;
+typedef enum iree_hal_cuda_tracing_verbosity_e {
+ IREE_HAL_CUDA_TRACING_VERBOSITY_OFF = 0,
+ IREE_HAL_CUDA_TRACING_VERBOSITY_COARSE,
+ IREE_HAL_CUDA_TRACING_VERBOSITY_FINE,
+ IREE_HAL_CUDA_TRACING_VERBOSITY_MAX
+} iree_hal_cuda_tracing_verbosity_t;
+
// Allocates a tracing context for the given CUDA |stream|.
// Each context must only be used with the stream it was created for.
iree_status_t iree_hal_cuda_tracing_context_allocate(
const iree_hal_cuda_dynamic_symbols_t* symbols,
iree_string_view_t queue_name, CUstream stream,
+ iree_hal_cuda_tracing_verbosity_t stream_tracing_verbosity,
iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator,
iree_hal_cuda_tracing_context_t** out_context);
@@ -88,6 +96,7 @@
void iree_hal_cuda_stream_tracing_zone_begin_impl(
iree_hal_cuda_tracing_context_t* context,
iree_hal_cuda_tracing_context_event_list_t* event_list, CUstream stream,
+ iree_hal_cuda_tracing_verbosity_t verbosity,
const iree_tracing_location_t* src_loc);
// Begins an external zone using the given source information.
@@ -95,74 +104,80 @@
void iree_hal_cuda_stream_tracing_zone_begin_external_impl(
iree_hal_cuda_tracing_context_t* context,
iree_hal_cuda_tracing_context_event_list_t* event_list, CUstream stream,
- const char* file_name, size_t file_name_length, uint32_t line,
- const char* function_name, size_t function_name_length, const char* name,
- size_t name_length);
+ iree_hal_cuda_tracing_verbosity_t verbosity, const char* file_name,
+ size_t file_name_length, uint32_t line, const char* function_name,
+ size_t function_name_length, const char* name, size_t name_length);
void iree_hal_cuda_graph_tracing_zone_begin_external_impl(
iree_hal_cuda_tracing_context_t* context,
iree_hal_cuda_tracing_context_event_list_t* event_list,
- CUgraphNode* out_node, CUgraph graph, CUgraphNode* dependency_nodes,
+ CUgraphNode* out_node, CUgraph graph,
+ iree_hal_cuda_tracing_verbosity_t verbosity, CUgraphNode* dependency_nodes,
size_t dependency_nodes_count, const char* file_name,
size_t file_name_length, uint32_t line, const char* function_name,
size_t function_name_length, const char* name, size_t name_length);
void iree_hal_cuda_stream_tracing_zone_end_impl(
iree_hal_cuda_tracing_context_t* context,
- iree_hal_cuda_tracing_context_event_list_t* event_list, CUstream stream);
+ iree_hal_cuda_tracing_context_event_list_t* event_list, CUstream stream,
+ iree_hal_cuda_tracing_verbosity_t verbosity);
void iree_hal_cuda_graph_tracing_zone_end_impl(
iree_hal_cuda_tracing_context_t* context,
iree_hal_cuda_tracing_context_event_list_t* event_list,
- CUgraphNode* out_node, CUgraph graph, CUgraphNode* dependency_nodes,
+ CUgraphNode* out_node, CUgraph graph,
+ iree_hal_cuda_tracing_verbosity_t verbosity, CUgraphNode* dependency_nodes,
size_t dependency_nodes_count);
// Begins a new zone with the parent function name.
-#define IREE_CUDA_STREAM_TRACE_ZONE_BEGIN(context, event_list_begin, \
- event_list_end, stream) \
- static const iree_tracing_location_t TracyConcat( \
- __tracy_source_location, __LINE__) = {NULL, __FUNCTION__, __FILE__, \
- (uint32_t)__LINE__, 0}; \
- iree_hal_cuda_stream_tracing_zone_begin_impl( \
- context, event_list_begin, event_list_end, stream, \
+#define IREE_CUDA_STREAM_TRACE_ZONE_BEGIN(context, event_list_begin, \
+ event_list_end, stream, verbosity) \
+ static const iree_tracing_location_t TracyConcat( \
+ __tracy_source_location, __LINE__) = {NULL, __FUNCTION__, __FILE__, \
+ (uint32_t)__LINE__, 0}; \
+ iree_hal_cuda_stream_tracing_zone_begin_impl( \
+ context, event_list_begin, event_list_end, stream, verbosity, \
&TracyConcat(__tracy_source_location, __LINE__));
// Begins an externally defined zone with a dynamic source location.
// The |file_name|, |function_name|, and optional |name| strings will be copied
// into the trace buffer and do not need to persist.
-#define IREE_CUDA_STREAM_TRACE_ZONE_BEGIN_EXTERNAL( \
- context, event_list, stream, file_name, file_name_length, line, \
- function_name, function_name_length, name, name_length) \
- iree_hal_cuda_stream_tracing_zone_begin_external_impl( \
- context, event_list, stream, file_name, file_name_length, line, \
- function_name, function_name_length, name, name_length)
+#define IREE_CUDA_STREAM_TRACE_ZONE_BEGIN_EXTERNAL( \
+ context, event_list, stream, verbosity, file_name, file_name_length, line, \
+ function_name, function_name_length, name, name_length) \
+ iree_hal_cuda_stream_tracing_zone_begin_external_impl( \
+ context, event_list, stream, verbosity, file_name, file_name_length, \
+ line, function_name, function_name_length, name, name_length)
#define IREE_CUDA_GRAPH_TRACE_ZONE_BEGIN_EXTERNAL( \
- context, event_list, out_node, graph, dependency_nodes, \
+ context, event_list, out_node, graph, verbosity, dependency_nodes, \
dependency_nodes_count, file_name, file_name_length, line, function_name, \
function_name_length, name, name_length) \
iree_hal_cuda_graph_tracing_zone_begin_external_impl( \
- context, event_list, out_node, graph, dependency_nodes, \
+ context, event_list, out_node, graph, verbosity, dependency_nodes, \
dependency_nodes_count, file_name, file_name_length, line, \
function_name, function_name_length, name, name_length)
-#define IREE_CUDA_STREAM_TRACE_ZONE_END(context, event_list, stream) \
- iree_hal_cuda_stream_tracing_zone_end_impl(context, event_list, stream)
+#define IREE_CUDA_STREAM_TRACE_ZONE_END(context, event_list, stream, \
+ verbosity) \
+ iree_hal_cuda_stream_tracing_zone_end_impl(context, event_list, stream, \
+ verbosity)
#define IREE_CUDA_GRAPH_TRACE_ZONE_END(context, event_list, out_node, graph, \
- dependency_nodes, \
+ verbosity, dependency_nodes, \
dependency_nodes_count) \
- iree_hal_cuda_graph_tracing_zone_end_impl(context, event_list, out_node, \
- graph, dependency_nodes, \
- dependency_nodes_count)
+ iree_hal_cuda_graph_tracing_zone_end_impl( \
+ context, event_list, out_node, graph, verbosity, dependency_nodes, \
+ dependency_nodes_count)
#else
-#define IREE_CUDA_STREAM_TRACE_ZONE_BEGIN(context, event_list, stream)
-#define IREE_CUDA_STREAM_TRACE_ZONE_BEGIN_EXTERNAL( \
- context, event_list, stream, file_name, file_name_length, line, \
+#define IREE_CUDA_STREAM_TRACE_ZONE_BEGIN(context, event_list, stream, \
+ verbosity)
+#define IREE_CUDA_STREAM_TRACE_ZONE_BEGIN_EXTERNAL( \
+ context, event_list, stream, verbosity, file_name, file_name_length, line, \
function_name, function_name_length, name, name_length)
#define IREE_CUDA_GRAPH_TRACE_ZONE_BEGIN_EXTERNAL( \
- context, event_list, out_node, graph, dependency_nodes, \
+ context, event_list, out_node, graph, verbosity, dependency_nodes, \
dependency_nodes_count, file_name, file_name_length, line, function_name, \
function_name_length, name, name_length)
-#define IREE_CUDA_STREAM_TRACE_ZONE_END(context, event_list, stream)
+#define IREE_CUDA_STREAM_TRACE_ZONE_END(context, event_list, stream, verbosity)
#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
diff --git a/runtime/src/iree/hal/drivers/hip/api.h b/runtime/src/iree/hal/drivers/hip/api.h
index a505535..2218b19 100644
--- a/runtime/src/iree/hal/drivers/hip/api.h
+++ b/runtime/src/iree/hal/drivers/hip/api.h
@@ -76,15 +76,16 @@
// Specifies how command buffers are recorded and executed.
iree_hal_hip_command_buffer_mode_t command_buffer_mode;
- // Enables tracing of command buffers when IREE tracing is enabled.
+ // Controls the verbosity of command buffers tracing when when IREE
+ // tracing is enabled.
// May take advantage of additional extensions for more accurate timing or
// hardware-specific performance counters.
//
// NOTE: tracing has a non-trivial overhead and will skew the timing of
- // submissions and introduce false barriers between dispatches. Use this to
- // identify slow dispatches and refine from there; be wary of whole-program
- // tracing with this enabled.
- bool stream_tracing;
+ // submissions and may introduce false barriers between dispatches.
+ // Use this to identify slow dispatches and command buffers and refine
+ // from there; be wary of whole-program tracing with this enabled.
+ int32_t stream_tracing;
// Whether to use async allocations even if reported as available by the
// device. Defaults to true when the device supports it.
diff --git a/runtime/src/iree/hal/drivers/hip/graph_command_buffer.c b/runtime/src/iree/hal/drivers/hip/graph_command_buffer.c
index 99b3538..afade26 100644
--- a/runtime/src/iree/hal/drivers/hip/graph_command_buffer.c
+++ b/runtime/src/iree/hal/drivers/hip/graph_command_buffer.c
@@ -83,7 +83,8 @@
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
static void iree_hip_graph_command_buffer_trace_zone_begin_external(
- iree_hal_hip_graph_command_buffer_t* command_buffer, const char* file_name,
+ iree_hal_hip_graph_command_buffer_t* command_buffer,
+ iree_hal_hip_tracing_verbosity_t verbosity, const char* file_name,
size_t file_name_length, uint32_t line, const char* function_name,
size_t function_name_length, const char* name, size_t name_length) {
// Make sure there are no new nodes after the last barrier.
@@ -98,7 +99,7 @@
size_t dependency_count = command_buffer->hip_barrier_node ? 1 : 0;
IREE_HIP_GRAPH_TRACE_ZONE_BEGIN_EXTERNAL(
command_buffer->tracing_context, &command_buffer->tracing_event_list,
- tracing_event_node, command_buffer->hip_graph,
+ tracing_event_node, command_buffer->hip_graph, verbosity,
&command_buffer->hip_barrier_node, dependency_count, file_name,
file_name_length, line, function_name, function_name_length, name,
name_length);
@@ -110,7 +111,8 @@
}
static void iree_hip_graph_command_buffer_trace_zone_end(
- iree_hal_hip_graph_command_buffer_t* command_buffer) {
+ iree_hal_hip_graph_command_buffer_t* command_buffer,
+ iree_hal_hip_tracing_verbosity_t verbosity) {
// Make sure there are no new nodes after the last barrier.
// Prior work should end before the tracing event is recorded.
if (IREE_UNLIKELY(command_buffer->graph_node_count != 0)) {
@@ -125,7 +127,7 @@
"ending a zone should at least depend on the beginning");
IREE_HIP_GRAPH_TRACE_ZONE_END(
command_buffer->tracing_context, &command_buffer->tracing_event_list,
- tracing_event_node, command_buffer->hip_graph,
+ tracing_event_node, command_buffer->hip_graph, verbosity,
&command_buffer->hip_barrier_node, dependency_count);
// We need to wait on the tracing end before other work starts.
@@ -133,26 +135,29 @@
command_buffer->hip_barrier_node = *tracing_event_node;
}
-#define IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL( \
- command_buffer, file_name, file_name_length, line, function_name, \
- function_name_length, name, name_length) \
- iree_hip_graph_command_buffer_trace_zone_begin_external( \
- command_buffer, file_name, file_name_length, line, function_name, \
- function_name_length, name, name_length)
-#define IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer) \
+#define IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL( \
+ command_buffer, verbosity, file_name, file_name_length, line, \
+ function_name, function_name_length, name, name_length) \
+ iree_hip_graph_command_buffer_trace_zone_begin_external( \
+ command_buffer, verbosity, file_name, file_name_length, line, \
+ function_name, function_name_length, name, name_length)
+#define IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer, \
+ verbosity) \
IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL( \
- command_buffer, /*file_name=*/NULL, 0, /*line=*/0, __FUNCTION__, \
- strlen(__FUNCTION__), /*name=*/NULL, 0)
-#define IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer) \
- iree_hip_graph_command_buffer_trace_zone_end(command_buffer)
+ command_buffer, verbosity, /*file_name=*/NULL, 0, /*line=*/0, \
+ __FUNCTION__, strlen(__FUNCTION__), /*name=*/NULL, 0)
+#define IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer, \
+ verbosity) \
+ iree_hip_graph_command_buffer_trace_zone_end(command_buffer, verbosity)
#else // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
-#define IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL( \
- command_buffer, file_name, file_name_length, line, function_name, \
- function_name_length, name, name_length)
-#define IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer)
-#define IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer)
+#define IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL( \
+ command_buffer, verbosity, file_name, file_name_length, line, \
+ function_name, function_name_length, name, name_length)
+#define IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer, \
+ verbosity)
+#define IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer, verbosity)
#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
@@ -340,7 +345,7 @@
"hipGraphCreate");
IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL(
- command_buffer,
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_COARSE,
/*file_name=*/NULL, 0, /*line=*/0, "iree_hal_hip_graph_command_buffer",
strlen("iree_hal_hip_graph_command_buffer"),
/*name=*/NULL, 0);
@@ -357,7 +362,8 @@
IREE_RETURN_IF_ERROR(
iree_hal_hip_graph_command_buffer_flush_collectives(command_buffer));
- IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_COARSE);
// Reset state used during recording.
command_buffer->hip_barrier_node = NULL;
@@ -392,8 +398,9 @@
(void)command_buffer;
IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL(
- command_buffer, location ? location->file.data : NULL,
- location ? location->file.size : 0, location ? location->line : 0,
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_COARSE,
+ location ? location->file.data : NULL, location ? location->file.size : 0,
+ location ? location->line : 0,
/*func_name=*/NULL, 0, label.data, label.size);
}
@@ -402,7 +409,8 @@
iree_hal_hip_graph_command_buffer_t* command_buffer =
iree_hal_hip_graph_command_buffer_cast(base_command_buffer);
(void)command_buffer;
- IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_COARSE);
}
static iree_status_t
@@ -515,7 +523,8 @@
iree_hal_hip_graph_command_buffer_t* command_buffer =
iree_hal_hip_graph_command_buffer_cast(base_command_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
- IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer);
+ IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_FINE);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_hip_graph_command_buffer_flush_collectives(command_buffer));
@@ -553,7 +562,8 @@
dependency_count, ¶ms),
"hipGraphAddMemsetNode");
- IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
@@ -569,7 +579,8 @@
"cannot use graph-based command buffer");
}
IREE_TRACE_ZONE_BEGIN(z0);
- IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer);
+ IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_FINE);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_hip_graph_command_buffer_flush_collectives(command_buffer));
@@ -621,7 +632,8 @@
dependency_count, ¶ms, command_buffer->hip_context),
"hipDrvGraphAddMemcpyNode");
- IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
@@ -637,7 +649,8 @@
"cannot use graph-based command buffer");
}
IREE_TRACE_ZONE_BEGIN(z0);
- IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(command_buffer);
+ IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN(
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_FINE);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_hip_graph_command_buffer_flush_collectives(command_buffer));
@@ -683,7 +696,8 @@
dependency_count, ¶ms, command_buffer->hip_context),
"hipDrvGraphAddMemcpyNode");
- IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
@@ -787,9 +801,10 @@
executable, entry_point, &kernel_info));
IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL(
- command_buffer, kernel_info.source_filename.data,
- kernel_info.source_filename.size, kernel_info.source_line,
- kernel_info.function_name.data, kernel_info.function_name.size,
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_FINE,
+ kernel_info.source_filename.data, kernel_info.source_filename.size,
+ kernel_info.source_line, kernel_info.function_name.data,
+ kernel_info.function_name.size,
/*name=*/NULL, 0);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
@@ -874,7 +889,8 @@
dependency_count, ¶ms),
"hipGraphAddKernelNode");
- IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
@@ -907,10 +923,10 @@
executable, entry_point, &kernel_info));
IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_BEGIN_EXTERNAL(
- command_buffer, kernel_info.source_filename.data,
- kernel_info.source_filename.size, kernel_info.source_line,
- kernel_info.function_name.data, kernel_info.function_name.size,
- /*name=*/NULL, 0);
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_FINE,
+ kernel_info.source_filename.data, kernel_info.source_filename.size,
+ kernel_info.source_line, kernel_info.function_name.data,
+ kernel_info.function_name.size, /*name=*/NULL, 0);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_resource_set_insert(command_buffer->resource_set, 1,
@@ -990,7 +1006,8 @@
dependency_count, ¶ms),
"hipGraphAddKernelNode");
- IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(command_buffer);
+ IREE_HIP_GRAPH_COMMAND_BUFFER_TRACE_ZONE_END(
+ command_buffer, IREE_HAL_HIP_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
diff --git a/runtime/src/iree/hal/drivers/hip/hip_device.c b/runtime/src/iree/hal/drivers/hip/hip_device.c
index 133d3f5..f92c784 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_device.c
+++ b/runtime/src/iree/hal/drivers/hip/hip_device.c
@@ -275,7 +275,7 @@
out_params->event_pool_capacity = 32;
out_params->queue_count = 1;
out_params->command_buffer_mode = IREE_HAL_HIP_COMMAND_BUFFER_MODE_STREAM;
- out_params->stream_tracing = false;
+ out_params->stream_tracing = 0;
out_params->async_allocations = true;
out_params->allow_inline_execution = false;
}
@@ -344,9 +344,18 @@
// Enable tracing for the (currently only) stream - no-op if disabled.
if (iree_status_is_ok(status) && device->params.stream_tracing) {
+ if (device->params.stream_tracing >= IREE_HAL_HIP_TRACING_VERBOSITY_MAX ||
+ device->params.stream_tracing < IREE_HAL_HIP_TRACING_VERBOSITY_OFF) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "invalid stream_tracing argument: expected to be between %d and %d",
+ IREE_HAL_HIP_TRACING_VERBOSITY_OFF,
+ IREE_HAL_HIP_TRACING_VERBOSITY_MAX);
+ }
status = iree_hal_hip_tracing_context_allocate(
device->hip_symbols, device->identifier, dispatch_stream,
- &device->block_pool, host_allocator, &device->tracing_context);
+ device->params.stream_tracing, &device->block_pool, host_allocator,
+ &device->tracing_context);
}
// Memory pool support is conditional.
diff --git a/runtime/src/iree/hal/drivers/hip/rccl_channel.c b/runtime/src/iree/hal/drivers/hip/rccl_channel.c
index e3c38a2..84e592c 100644
--- a/runtime/src/iree/hal/drivers/hip/rccl_channel.c
+++ b/runtime/src/iree/hal/drivers/hip/rccl_channel.c
@@ -593,7 +593,8 @@
iree_string_view_t collective_str =
iree_hal_collective_op_format(&entry->op, &string_temp);
IREE_HIP_STREAM_TRACE_ZONE_BEGIN_EXTERNAL(
- tracing_context, tracing_event_list, stream, __FILE__, strlen(__FILE__),
+ tracing_context, tracing_event_list, stream,
+ IREE_HAL_HIP_TRACING_VERBOSITY_FINE, __FILE__, strlen(__FILE__),
(uint32_t)__LINE__, __FUNCTION__, strlen(__FUNCTION__),
collective_str.data, collective_str.size);
}
@@ -613,7 +614,8 @@
IREE_TRACE({
for (iree_host_size_t i = 0; i < batch->count; ++i) {
IREE_HIP_STREAM_TRACE_ZONE_END(tracing_context, tracing_event_list,
- stream);
+ stream,
+ IREE_HAL_HIP_TRACING_VERBOSITY_FINE);
}
});
diff --git a/runtime/src/iree/hal/drivers/hip/registration/driver_module.c b/runtime/src/iree/hal/drivers/hip/registration/driver_module.c
index cabb1da..1998cfc 100644
--- a/runtime/src/iree/hal/drivers/hip/registration/driver_module.c
+++ b/runtime/src/iree/hal/drivers/hip/registration/driver_module.c
@@ -36,10 +36,14 @@
"Enables HIP asynchronous stream-ordered allocations when supported.");
IREE_FLAG(
- bool, hip_tracing, true,
- "Enables tracing of stream events when Tracy instrumentation is enabled.\n"
- "Severely impacts benchmark timings and should only be used when\n"
- "analyzing dispatch timings.");
+ int32_t, hip_tracing, 2,
+ "Controls the verbosity of tracing when Tracy instrumentation is enabled.\n"
+ "The impact to benchmark timing becomes more severe as the verbosity\n"
+ "increases, and thus should be only enabled when needed.\n"
+ "Permissible values are:\n"
+ " 0 : stream tracing disabled.\n"
+ " 1 : coarse command buffer level tracing enabled.\n"
+ " 2 : fine-grained kernel level tracing enabled.\n");
IREE_FLAG(int32_t, hip_default_index, 0,
"Specifies the index of the default HIP device to use");
@@ -181,7 +185,7 @@
"Option 'hip_tracing' expected to be int. Got: '%.*s'",
(int)value.size, value.data);
}
- device_params->stream_tracing = ivalue ? true : false;
+ device_params->stream_tracing = ivalue;
} else if (iree_string_view_equal(key, key_hip_default_index)) {
if (!iree_string_view_atoi_int32(value, &ivalue)) {
return iree_make_status(
diff --git a/runtime/src/iree/hal/drivers/hip/stream_command_buffer.c b/runtime/src/iree/hal/drivers/hip/stream_command_buffer.c
index e4ffac2..1b8b6b6 100644
--- a/runtime/src/iree/hal/drivers/hip/stream_command_buffer.c
+++ b/runtime/src/iree/hal/drivers/hip/stream_command_buffer.c
@@ -183,7 +183,7 @@
IREE_HIP_STREAM_TRACE_ZONE_BEGIN_EXTERNAL(
command_buffer->tracing_context, &command_buffer->tracing_event_list,
- command_buffer->hip_stream,
+ command_buffer->hip_stream, IREE_HAL_HIP_TRACING_VERBOSITY_COARSE,
/*file_name=*/NULL, 0, /*line=*/0, "iree_hal_hip_stream_command_buffer",
strlen("iree_hal_hip_stream_command_buffer"),
/*name=*/NULL, 0);
@@ -212,9 +212,9 @@
z0, iree_hal_resource_set_allocate(command_buffer->arena.block_pool,
&command_buffer->resource_set));
- IREE_HIP_STREAM_TRACE_ZONE_END(command_buffer->tracing_context,
- &command_buffer->tracing_event_list,
- command_buffer->hip_stream);
+ IREE_HIP_STREAM_TRACE_ZONE_END(
+ command_buffer->tracing_context, &command_buffer->tracing_event_list,
+ command_buffer->hip_stream, IREE_HAL_HIP_TRACING_VERBOSITY_COARSE);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
@@ -230,8 +230,9 @@
IREE_HIP_STREAM_TRACE_ZONE_BEGIN_EXTERNAL(
command_buffer->tracing_context, &command_buffer->tracing_event_list,
- command_buffer->hip_stream, location ? location->file.data : NULL,
- location ? location->file.size : 0, location ? location->line : 0,
+ command_buffer->hip_stream, IREE_HAL_HIP_TRACING_VERBOSITY_COARSE,
+ location ? location->file.data : NULL, location ? location->file.size : 0,
+ location ? location->line : 0,
/*func_name=*/NULL, 0, label.data, label.size);
}
@@ -241,9 +242,9 @@
iree_hal_hip_stream_command_buffer_cast(base_command_buffer);
(void)command_buffer;
- IREE_HIP_STREAM_TRACE_ZONE_END(command_buffer->tracing_context,
- &command_buffer->tracing_event_list,
- command_buffer->hip_stream);
+ IREE_HIP_STREAM_TRACE_ZONE_END(
+ command_buffer->tracing_context, &command_buffer->tracing_event_list,
+ command_buffer->hip_stream, IREE_HAL_HIP_TRACING_VERBOSITY_COARSE);
}
static iree_status_t iree_hal_hip_stream_command_buffer_execution_barrier(
@@ -542,9 +543,10 @@
IREE_HIP_STREAM_TRACE_ZONE_BEGIN_EXTERNAL(
command_buffer->tracing_context, &command_buffer->tracing_event_list,
- command_buffer->hip_stream, kernel_info.source_filename.data,
- kernel_info.source_filename.size, kernel_info.source_line,
- kernel_info.function_name.data, kernel_info.function_name.size,
+ command_buffer->hip_stream, IREE_HAL_HIP_TRACING_VERBOSITY_FINE,
+ kernel_info.source_filename.data, kernel_info.source_filename.size,
+ kernel_info.source_line, kernel_info.function_name.data,
+ kernel_info.function_name.size,
/*name=*/NULL, 0);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
@@ -615,9 +617,9 @@
command_buffer->hip_stream, params_ptr, NULL),
"hipModuleLaunchKernel");
- IREE_HIP_STREAM_TRACE_ZONE_END(command_buffer->tracing_context,
- &command_buffer->tracing_event_list,
- command_buffer->hip_stream);
+ IREE_HIP_STREAM_TRACE_ZONE_END(
+ command_buffer->tracing_context, &command_buffer->tracing_event_list,
+ command_buffer->hip_stream, IREE_HAL_HIP_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return status;
@@ -652,10 +654,10 @@
IREE_HIP_STREAM_TRACE_ZONE_BEGIN_EXTERNAL(
command_buffer->tracing_context, &command_buffer->tracing_event_list,
- command_buffer->hip_stream, kernel_info.source_filename.data,
- kernel_info.source_filename.size, kernel_info.source_line,
- kernel_info.function_name.data, kernel_info.function_name.size,
- /*name=*/NULL, 0);
+ command_buffer->hip_stream, IREE_HAL_HIP_TRACING_VERBOSITY_FINE,
+ kernel_info.source_filename.data, kernel_info.source_filename.size,
+ kernel_info.source_line, kernel_info.function_name.data,
+ kernel_info.function_name.size, /*name=*/NULL, 0);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_resource_set_insert(command_buffer->resource_set, 1,
@@ -718,9 +720,9 @@
params_ptr, NULL),
"hipModuleLaunchKernel");
- IREE_HIP_STREAM_TRACE_ZONE_END(command_buffer->tracing_context,
- &command_buffer->tracing_event_list,
- command_buffer->hip_stream);
+ IREE_HIP_STREAM_TRACE_ZONE_END(
+ command_buffer->tracing_context, &command_buffer->tracing_event_list,
+ command_buffer->hip_stream, IREE_HAL_HIP_TRACING_VERBOSITY_FINE);
IREE_TRACE_ZONE_END(z0);
return status;
diff --git a/runtime/src/iree/hal/drivers/hip/tracing.c b/runtime/src/iree/hal/drivers/hip/tracing.c
index f1a7007..62b15ef 100644
--- a/runtime/src/iree/hal/drivers/hip/tracing.c
+++ b/runtime/src/iree/hal/drivers/hip/tracing.c
@@ -67,6 +67,8 @@
// Submitted events
iree_hal_hip_tracing_context_event_list_t submitted_event_list;
+ int32_t verbosity;
+
uint32_t query_capacity;
// Event pool reused to capture tracing timestamps.
@@ -119,6 +121,7 @@
iree_status_t iree_hal_hip_tracing_context_allocate(
const iree_hal_hip_dynamic_symbols_t* symbols,
iree_string_view_t queue_name, hipStream_t stream,
+ iree_hal_hip_tracing_verbosity_t stream_tracing_verbosity,
iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator,
iree_hal_hip_tracing_context_t** out_context) {
IREE_TRACE_ZONE_BEGIN(z0);
@@ -139,6 +142,7 @@
context->query_capacity = IREE_ARRAYSIZE(context->event_pool);
context->submitted_event_list.head = NULL;
context->submitted_event_list.tail = NULL;
+ context->verbosity = stream_tracing_verbosity;
iree_slim_mutex_initialize(&context->event_mutex);
}
@@ -425,8 +429,10 @@
void iree_hal_hip_stream_tracing_zone_begin_impl(
iree_hal_hip_tracing_context_t* context,
iree_hal_hip_tracing_context_event_list_t* event_list, hipStream_t stream,
+ iree_hal_hip_tracing_verbosity_t verbosity,
const iree_tracing_location_t* src_loc) {
if (!context) return;
+ if (verbosity > context->verbosity) return;
uint16_t query_id = iree_hal_hip_stream_tracing_context_insert_query(
context, event_list, stream);
iree_tracing_gpu_zone_begin(context->id, query_id, src_loc);
@@ -435,10 +441,11 @@
void iree_hal_hip_stream_tracing_zone_begin_external_impl(
iree_hal_hip_tracing_context_t* context,
iree_hal_hip_tracing_context_event_list_t* event_list, hipStream_t stream,
- const char* file_name, size_t file_name_length, uint32_t line,
- const char* function_name, size_t function_name_length, const char* name,
- size_t name_length) {
+ iree_hal_hip_tracing_verbosity_t verbosity, const char* file_name,
+ size_t file_name_length, uint32_t line, const char* function_name,
+ size_t function_name_length, const char* name, size_t name_length) {
if (!context) return;
+ if (verbosity > context->verbosity) return;
uint16_t query_id = iree_hal_hip_stream_tracing_context_insert_query(
context, event_list, stream);
iree_tracing_gpu_zone_begin_external(context->id, query_id, file_name,
@@ -450,11 +457,13 @@
iree_hal_hip_tracing_context_t* context,
iree_hal_hip_tracing_context_event_list_t* event_list,
hipGraphNode_t* out_node, hipGraph_t graph,
+ iree_hal_hip_tracing_verbosity_t verbosity,
hipGraphNode_t* dependency_nodes, size_t dependency_nodes_count,
const char* file_name, size_t file_name_length, uint32_t line,
const char* function_name, size_t function_name_length, const char* name,
size_t name_length) {
if (!context) return;
+ if (verbosity > context->verbosity) return;
uint16_t query_id = iree_hal_hip_graph_tracing_context_insert_query(
context, event_list, out_node, graph, dependency_nodes,
dependency_nodes_count);
@@ -465,8 +474,10 @@
void iree_hal_hip_stream_tracing_zone_end_impl(
iree_hal_hip_tracing_context_t* context,
- iree_hal_hip_tracing_context_event_list_t* event_list, hipStream_t stream) {
+ iree_hal_hip_tracing_context_event_list_t* event_list, hipStream_t stream,
+ iree_hal_hip_tracing_verbosity_t verbosity) {
if (!context) return;
+ if (verbosity > context->verbosity) return;
uint16_t query_id = iree_hal_hip_stream_tracing_context_insert_query(
context, event_list, stream);
iree_tracing_gpu_zone_end(context->id, query_id);
@@ -476,8 +487,10 @@
iree_hal_hip_tracing_context_t* context,
iree_hal_hip_tracing_context_event_list_t* event_list,
hipGraphNode_t* out_node, hipGraph_t graph,
+ iree_hal_hip_tracing_verbosity_t verbosity,
hipGraphNode_t* dependency_nodes, size_t dependency_nodes_count) {
if (!context) return;
+ if (verbosity > context->verbosity) return;
uint16_t query_id = iree_hal_hip_graph_tracing_context_insert_query(
context, event_list, out_node, graph, dependency_nodes,
dependency_nodes_count);
@@ -489,6 +502,7 @@
iree_status_t iree_hal_hip_tracing_context_allocate(
const iree_hal_hip_dynamic_symbols_t* symbols,
iree_string_view_t queue_name, hipStream_t stream,
+ iree_hal_hip_tracing_verbosity_t stream_tracing_verbosity,
iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator,
iree_hal_hip_tracing_context_t** out_context) {
*out_context = NULL;
diff --git a/runtime/src/iree/hal/drivers/hip/tracing.h b/runtime/src/iree/hal/drivers/hip/tracing.h
index 24e12b8..8323fd7 100644
--- a/runtime/src/iree/hal/drivers/hip/tracing.h
+++ b/runtime/src/iree/hal/drivers/hip/tracing.h
@@ -52,11 +52,19 @@
iree_hal_hip_tracing_context_event_t* tail;
} iree_hal_hip_tracing_context_event_list_t;
+typedef enum iree_hal_hip_tracing_verbosity_e {
+ IREE_HAL_HIP_TRACING_VERBOSITY_OFF = 0,
+ IREE_HAL_HIP_TRACING_VERBOSITY_COARSE,
+ IREE_HAL_HIP_TRACING_VERBOSITY_FINE,
+ IREE_HAL_HIP_TRACING_VERBOSITY_MAX
+} iree_hal_hip_tracing_verbosity_t;
+
// Allocates a tracing context for the given HIP |stream|.
// Each context must only be used with the stream it was created for.
iree_status_t iree_hal_hip_tracing_context_allocate(
const iree_hal_hip_dynamic_symbols_t* symbols,
iree_string_view_t queue_name, hipStream_t stream,
+ iree_hal_hip_tracing_verbosity_t stream_tracing_verbosity,
iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator,
iree_hal_hip_tracing_context_t** out_context);
@@ -87,6 +95,7 @@
void iree_hal_hip_stream_tracing_zone_begin_impl(
iree_hal_hip_tracing_context_t* context,
iree_hal_hip_tracing_context_event_list_t* event_list, hipStream_t stream,
+ iree_hal_hip_tracing_verbosity_t verbosity,
const iree_tracing_location_t* src_loc);
// Begins an external zone using the given source information.
@@ -94,14 +103,15 @@
void iree_hal_hip_stream_tracing_zone_begin_external_impl(
iree_hal_hip_tracing_context_t* context,
iree_hal_hip_tracing_context_event_list_t* event_list, hipStream_t stream,
- const char* file_name, size_t file_name_length, uint32_t line,
- const char* function_name, size_t function_name_length, const char* name,
- size_t name_length);
+ iree_hal_hip_tracing_verbosity_t verbosity, const char* file_name,
+ size_t file_name_length, uint32_t line, const char* function_name,
+ size_t function_name_length, const char* name, size_t name_length);
void iree_hal_hip_graph_tracing_zone_begin_external_impl(
iree_hal_hip_tracing_context_t* context,
iree_hal_hip_tracing_context_event_list_t* event_list,
hipGraphNode_t* out_node, hipGraph_t graph,
+ iree_hal_hip_tracing_verbosity_t verbosity,
hipGraphNode_t* dependency_nodes, size_t dependency_nodes_count,
const char* file_name, size_t file_name_length, uint32_t line,
const char* function_name, size_t function_name_length, const char* name,
@@ -109,61 +119,67 @@
void iree_hal_hip_stream_tracing_zone_end_impl(
iree_hal_hip_tracing_context_t* context,
- iree_hal_hip_tracing_context_event_list_t* event_list, hipStream_t stream);
+ iree_hal_hip_tracing_context_event_list_t* event_list, hipStream_t stream,
+ iree_hal_hip_tracing_verbosity_t verbosity);
void iree_hal_hip_graph_tracing_zone_end_impl(
iree_hal_hip_tracing_context_t* context,
iree_hal_hip_tracing_context_event_list_t* event_list,
hipGraphNode_t* out_node, hipGraph_t graph,
+ iree_hal_hip_tracing_verbosity_t verbosity,
hipGraphNode_t* dependency_nodes, size_t dependency_nodes_count);
// Begins a new zone with the parent function name.
-#define IREE_HIP_STREAM_TRACE_ZONE_BEGIN(context, event_list, stream) \
+#define IREE_HIP_STREAM_TRACE_ZONE_BEGIN(context, event_list, stream, \
+ verbosity) \
static const iree_tracing_location_t TracyConcat( \
__tracy_source_location, __LINE__) = {NULL, __FUNCTION__, __FILE__, \
(uint32_t)__LINE__, 0}; \
iree_hal_hip_stream_tracing_zone_begin_impl( \
- context, event_list, stream, \
+ context, event_list, stream, verbosity, \
&TracyConcat(__tracy_source_location, __LINE__));
// Begins an externally defined zone with a dynamic source location.
// The |file_name|, |function_name|, and optional |name| strings will be copied
// into the trace buffer and do not need to persist.
-#define IREE_HIP_STREAM_TRACE_ZONE_BEGIN_EXTERNAL( \
- context, event_list, stream, file_name, file_name_length, line, \
- function_name, function_name_length, name, name_length) \
- iree_hal_hip_stream_tracing_zone_begin_external_impl( \
- context, event_list, stream, file_name, file_name_length, line, \
- function_name, function_name_length, name, name_length)
+#define IREE_HIP_STREAM_TRACE_ZONE_BEGIN_EXTERNAL( \
+ context, event_list, stream, verbosity, file_name, file_name_length, line, \
+ function_name, function_name_length, name, name_length) \
+ iree_hal_hip_stream_tracing_zone_begin_external_impl( \
+ context, event_list, stream, verbosity, file_name, file_name_length, \
+ line, function_name, function_name_length, name, name_length)
+
#define IREE_HIP_GRAPH_TRACE_ZONE_BEGIN_EXTERNAL( \
- context, event_list, out_node, graph, dependency_nodes, \
+ context, event_list, out_node, graph, verbosity, dependency_nodes, \
dependency_nodes_count, file_name, file_name_length, line, function_name, \
function_name_length, name, name_length) \
iree_hal_hip_graph_tracing_zone_begin_external_impl( \
- context, event_list, out_node, graph, dependency_nodes, \
+ context, event_list, out_node, graph, verbosity, dependency_nodes, \
dependency_nodes_count, file_name, file_name_length, line, \
function_name, function_name_length, name, name_length)
-#define IREE_HIP_STREAM_TRACE_ZONE_END(context, event_list, stream) \
- iree_hal_hip_stream_tracing_zone_end_impl(context, event_list, stream)
-#define IREE_HIP_GRAPH_TRACE_ZONE_END(context, event_list, out_node, graph, \
- dependency_nodes, \
- dependency_nodes_count) \
- iree_hal_hip_graph_tracing_zone_end_impl(context, event_list, out_node, \
- graph, dependency_nodes, \
+#define IREE_HIP_STREAM_TRACE_ZONE_END(context, event_list, stream, verbosity) \
+ iree_hal_hip_stream_tracing_zone_end_impl(context, event_list, stream, \
+ verbosity)
+
+#define IREE_HIP_GRAPH_TRACE_ZONE_END(context, event_list, out_node, graph, \
+ verbosity, dependency_nodes, \
+ dependency_nodes_count) \
+ iree_hal_hip_graph_tracing_zone_end_impl(context, event_list, out_node, \
+ graph, verbosity, dependency_nodes, \
dependency_nodes_count)
#else
-#define IREE_HIP_STREAM_TRACE_ZONE_BEGIN(context, event_list, stream)
-#define IREE_HIP_STREAM_TRACE_ZONE_BEGIN_EXTERNAL( \
- context, event_list, stream, file_name, file_name_length, line, \
+#define IREE_HIP_STREAM_TRACE_ZONE_BEGIN(context, event_list, stream, verbosity)
+#define IREE_HIP_STREAM_TRACE_ZONE_BEGIN_EXTERNAL( \
+ context, event_list, stream, verbosity, file_name, file_name_length, line, \
function_name, function_name_length, name, name_length)
#define IREE_HIP_GRAPH_TRACE_ZONE_BEGIN_EXTERNAL( \
- context, event_list, out_node, graph, dependency_nodes, \
+ context, event_list, out_node, graph, verbosity, dependency_nodes, \
dependency_nodes_count, file_name, file_name_length, line, function_name, \
function_name_length, name, name_length)
-#define IREE_HIP_STREAM_TRACE_ZONE_END(context, evnet_list, stream)
+#define IREE_HIP_STREAM_TRACE_ZONE_END(context, evnet_list, stream, verbosity)
#define IREE_HIP_GRAPH_TRACE_ZONE_END(context, event_list, out_node, graph, \
- dependency_nodes, \
+ verbosity, dependency_nodes, \
dependency_nodes_count)
#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE