Adding support for the RenderDoc capture API for Vulkan devices. Only enabled when the `-DIREE_ENABLE_RENDERDOC_PROFILING=ON` cmake option is set. Once enabled passing `--device_profiling_mode=queue` to the various `iree-*-module` tools will perform a single capture when launched from within RenderDoc or the remoting mechanism. The capture file name can be prepopulated with `--device_profiling_file=foo.rdc`.
diff --git a/CMakeLists.txt b/CMakeLists.txt index 9596f68..6d6039f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt
@@ -47,6 +47,7 @@ option(IREE_ENABLE_RUNTIME_TRACING "Enables instrumented runtime tracing." OFF) option(IREE_ENABLE_COMPILER_TRACING "Enables instrumented compiler tracing." OFF) +option(IREE_ENABLE_RENDERDOC_PROFILING "Enables profiling HAL devices with the RenderDoc tool." OFF) option(IREE_ENABLE_THREADING "Builds IREE in with thread library support." ON) option(IREE_ENABLE_CLANG_TIDY "Builds IREE in with clang tidy enabled on IREE's libraries." OFF)
diff --git a/docs/developers/developing_iree/profiling_vulkan_gpu.md b/docs/developers/developing_iree/profiling_vulkan_gpu.md index 294c69b..2cba22a 100644 --- a/docs/developers/developing_iree/profiling_vulkan_gpu.md +++ b/docs/developers/developing_iree/profiling_vulkan_gpu.md
@@ -8,6 +8,33 @@ (TODO: add some pictures for each tool) +## RenderDoc + +Support for [RenderDoc](https://github.com/baldurk/renderdoc) can be enabled by +configuring cmake with `-DIREE_ENABLE_RENDERDOC_PROFILING=ON`. When built in to +IREE the profiling functionality is available for programmatic use via the +`iree_hal_device_profiling_begin` and `iree_hal_device_profiling_end` APIs. + +When using one of the standard IREE tools (`iree-run-module`, +`iree-benchmark-module`, etc) the `--device_profiling_mode=queue` flag can be +passed to enable capture around the entire invocation (be careful when +benchmarking as the recordings can be quite large!). The default capture file +name can be specified with `--device_profiling_file=foo.rdc`. + +Capturing in the RenderDoc UI can be done by specifying the IREE tool or +embedding application (`iree-run-module`, etc) as the launch executable and +adding all arguments as normal. + +Capturing from the command line can be done using `renderdoccmd` with the +specified file appearing (by default) in the executable directory: + +```shell +$ renderdoccmd capture tools/iree-run-module --device_profiling_mode=queue --device_profiling_file=foo.rdc ... +$ stat tools/foo.rdc +$ renderdoccmd capture tools/iree-run-module --device_profiling_mode=queue --device_profiling_file=/some/path/foo.rdc ... +$ stat /some/path/foo.rdc +``` + ## Android GPUs There are multiple GPU vendors for the Android platforms, each offering their
diff --git a/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt b/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt index 7df232e..c90219e 100644 --- a/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt +++ b/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt
@@ -118,3 +118,12 @@ ) ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### + +# If renderdoc support is enabled we can make use of it in the device. +# Note that we disable this by default as it introduces a backdoor. +if(IREE_ENABLE_RENDERDOC_PROFILING) + target_compile_definitions(iree_hal_drivers_vulkan_vulkan + PUBLIC + "IREE_HAL_VULKAN_HAVE_RENDERDOC=1" + ) +endif()
diff --git a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc index 26c8f10..63ad55a 100644 --- a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc +++ b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
@@ -37,6 +37,96 @@ using namespace iree::hal::vulkan; //===----------------------------------------------------------------------===// +// RenderDoc integration +//===----------------------------------------------------------------------===// + +// Configure cmake with -DIREE_ENABLE_RENDERDOC_PROFILING=ON in order to +// enable profiling support. This should be left off in production builds to +// avoid introducing a backdoor. +#if defined(IREE_HAL_VULKAN_HAVE_RENDERDOC) + +// NOTE: C API, see https://renderdoc.org/docs/in_application_api.html. +// When compiled in the API will no-op itself if not running under a RenderDoc +// capture context (renderdoc.dll/so already loaded). +#include "third_party/renderdoc/renderdoc_app.h" + +typedef RENDERDOC_API_1_5_0 RENDERDOC_API_LATEST; + +// Returns a handle to the RenderDoc API when it is hooking the process. +// Returns NULL when RenderDoc is not present (or valid). +static RENDERDOC_API_LATEST* iree_hal_vulkan_query_renderdoc_api( + VkInstance instance) { + pRENDERDOC_GetAPI RENDERDOC_GetAPI = NULL; +#if defined(IREE_PLATFORM_WINDOWS) + + // NOTE: RenderDoc only supports hooking so we can't use LoadLibrary - if + // we're going to use RenderDoc its library must already be loaded. + if (HMODULE hook_module = GetModuleHandleA("renderdoc.dll")) { + RENDERDOC_GetAPI = + (pRENDERDOC_GetAPI)GetProcAddress(hook_module, "RENDERDOC_GetAPI"); + } + +#else + + // dlopen/dlsym on posix-like systems. Note that each platform has its own + // naming for the injected module. Because RenderDoc only supports hooking + // (where the hosting process loads the library in magic ways for us) we use + // RTLD_NOLOAD to ensure we don't accidentally try to load it when not hooked. + void* hook_module = NULL; +#if defined(IREE_PLATFORM_ANDROID) + hook_module = dlopen("libVkLayer_GLES_RenderDoc.so", RTLD_NOW | RTLD_NOLOAD); +#elif defined(IREE_PLATFORM_APPLE) + hook_module = dlopen("librenderdoc.dylib", RTLD_NOW | RTLD_NOLOAD); +#elif defined(IREE_PLATFORM_LINUX) + hook_module = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD); +#else +#error "RenderDoc profiling not supported on this platform" +#endif // IREE_PLATFORM_* + if (hook_module) { + RENDERDOC_GetAPI = + (pRENDERDOC_GetAPI)dlsym(hook_module, "RENDERDOC_GetAPI"); + } + +#endif // IREE_PLATFORM_WINDOWS + + if (!RENDERDOC_GetAPI) return NULL; // not found, no-op + + RENDERDOC_API_LATEST* api = NULL; + int query_result = + RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_5_0, (void**)&api); + if (query_result != 1) { + // Failed to initialize API (old version, etc). No-op. + return NULL; + } + + return api; +} + +// Begins a new RenderDoc capture. +static void iree_hal_vulkan_begin_renderdoc_capture( + RENDERDOC_API_LATEST* renderdoc_api, VkInstance instance, + const iree_hal_device_profiling_options_t* options) { + if (!renderdoc_api) return; + if (options->file_path) { + renderdoc_api->SetCaptureFilePathTemplate(options->file_path); + } + renderdoc_api->StartFrameCapture( + RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), NULL); +} + +// Ends the active RenderDoc capture, if any active. +static void iree_hal_vulkan_end_renderdoc_capture( + RENDERDOC_API_LATEST* renderdoc_api, VkInstance instance) { + if (!renderdoc_api) return; + if (renderdoc_api->IsFrameCapturing()) { + renderdoc_api->EndFrameCapture( + RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), NULL); + } +} + +#endif // IREE_HAL_VULKAN_HAVE_RENDERDOC + +//===----------------------------------------------------------------------===// // iree_hal_vulkan_device_t extensibility util //===----------------------------------------------------------------------===// @@ -382,6 +472,10 @@ iree_arena_block_pool_t block_pool; BuiltinExecutables* builtin_executables; + +#if defined(IREE_HAL_VULKAN_HAVE_RENDERDOC) + RENDERDOC_API_LATEST* renderdoc_api; +#endif // IREE_HAL_VULKAN_HAVE_RENDERDOC } iree_hal_vulkan_device_t; namespace { @@ -570,6 +664,10 @@ device->logical_device = logical_device; device->logical_device->AddReference(); +#if defined(IREE_HAL_VULKAN_HAVE_RENDERDOC) + device->renderdoc_api = iree_hal_vulkan_query_renderdoc_api(instance); +#endif // IREE_HAL_VULKAN_HAVE_RENDERDOC + iree_arena_block_pool_initialize(32 * 1024, host_allocator, &device->block_pool); @@ -1129,18 +1227,33 @@ } static iree_status_t iree_hal_vulkan_device_profiling_begin( - iree_hal_device_t* device, + iree_hal_device_t* base_device, const iree_hal_device_profiling_options_t* options) { - // Unimplemented (and that's ok). If counters are requested we'd use + // For now we only support RenderDoc. As much as possible we should try to use + // standardized Vulkan layers to do profiling configuration/control like // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_performance_query.html - // to acquire a lock. For most other cases we can use something like the - // RenderDoc API to directly tell an attached tool that we want to capture. + // to avoid the combinatorial explosion of vendor tooling hooks. + // Since RenderDoc is fairly simple, cross-platform, and cross-vendor we + // support it here. If this grows beyond a few lines of code we should shuffle + // it off to another file. + if (iree_all_bits_set(options->mode, + IREE_HAL_DEVICE_PROFILING_MODE_QUEUE_OPERATIONS)) { +#if defined(IREE_HAL_VULKAN_HAVE_RENDERDOC) + iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device); + iree_hal_vulkan_begin_renderdoc_capture(device->renderdoc_api, + device->instance, options); +#endif // IREE_HAL_VULKAN_HAVE_RENDERDOC + } return iree_ok_status(); } static iree_status_t iree_hal_vulkan_device_profiling_end( - iree_hal_device_t* device) { - // Unimplemented (and that's ok). + iree_hal_device_t* base_device) { +#if defined(IREE_HAL_VULKAN_HAVE_RENDERDOC) + iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device); + iree_hal_vulkan_end_renderdoc_capture(device->renderdoc_api, + device->instance); +#endif // IREE_HAL_VULKAN_HAVE_RENDERDOC return iree_ok_status(); }