Adding support for the RenderDoc capture API for Vulkan devices.
Only enabled when the `-DIREE_ENABLE_RENDERDOC_PROFILING=ON` cmake
option is set. Once enabled passing `--device_profiling_mode=queue` to
the various `iree-*-module` tools will perform a single capture when
launched from within RenderDoc or the remoting mechanism. The capture
file name can be prepopulated with `--device_profiling_file=foo.rdc`.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9596f68..6d6039f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -47,6 +47,7 @@
 
 option(IREE_ENABLE_RUNTIME_TRACING "Enables instrumented runtime tracing." OFF)
 option(IREE_ENABLE_COMPILER_TRACING "Enables instrumented compiler tracing." OFF)
+option(IREE_ENABLE_RENDERDOC_PROFILING "Enables profiling HAL devices with the RenderDoc tool." OFF)
 option(IREE_ENABLE_THREADING "Builds IREE in with thread library support." ON)
 option(IREE_ENABLE_CLANG_TIDY "Builds IREE in with clang tidy enabled on IREE's libraries." OFF)
 
diff --git a/docs/developers/developing_iree/profiling_vulkan_gpu.md b/docs/developers/developing_iree/profiling_vulkan_gpu.md
index 294c69b..2cba22a 100644
--- a/docs/developers/developing_iree/profiling_vulkan_gpu.md
+++ b/docs/developers/developing_iree/profiling_vulkan_gpu.md
@@ -8,6 +8,33 @@
 
 (TODO: add some pictures for each tool)
 
+## RenderDoc
+
+Support for [RenderDoc](https://github.com/baldurk/renderdoc) can be enabled by
+configuring cmake with `-DIREE_ENABLE_RENDERDOC_PROFILING=ON`. When built in to
+IREE the profiling functionality is available for programmatic use via the
+`iree_hal_device_profiling_begin` and `iree_hal_device_profiling_end` APIs.
+
+When using one of the standard IREE tools (`iree-run-module`,
+`iree-benchmark-module`, etc) the `--device_profiling_mode=queue` flag can be
+passed to enable capture around the entire invocation (be careful when
+benchmarking as the recordings can be quite large!). The default capture file
+name can be specified with `--device_profiling_file=foo.rdc`.
+
+Capturing in the RenderDoc UI can be done by specifying the IREE tool or
+embedding application (`iree-run-module`, etc) as the launch executable and
+adding all arguments as normal.
+
+Capturing from the command line can be done using `renderdoccmd` with the
+specified file appearing (by default) in the executable directory:
+
+```shell
+$ renderdoccmd capture tools/iree-run-module --device_profiling_mode=queue --device_profiling_file=foo.rdc ...
+$ stat tools/foo.rdc
+$ renderdoccmd capture tools/iree-run-module --device_profiling_mode=queue --device_profiling_file=/some/path/foo.rdc ...
+$ stat /some/path/foo.rdc
+```
+
 ## Android GPUs
 
 There are multiple GPU vendors for the Android platforms, each offering their
diff --git a/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt b/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt
index 7df232e..c90219e 100644
--- a/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt
+++ b/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt
@@ -118,3 +118,12 @@
 )
 
 ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
+
+# If renderdoc support is enabled we can make use of it in the device.
+# Note that we disable this by default as it introduces a backdoor.
+if(IREE_ENABLE_RENDERDOC_PROFILING)
+  target_compile_definitions(iree_hal_drivers_vulkan_vulkan
+    PUBLIC
+      "IREE_HAL_VULKAN_HAVE_RENDERDOC=1"
+  )
+endif()
diff --git a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
index 26c8f10..63ad55a 100644
--- a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
@@ -37,6 +37,96 @@
 using namespace iree::hal::vulkan;
 
 //===----------------------------------------------------------------------===//
+// RenderDoc integration
+//===----------------------------------------------------------------------===//
+
+// Configure cmake with -DIREE_ENABLE_RENDERDOC_PROFILING=ON in order to
+// enable profiling support. This should be left off in production builds to
+// avoid introducing a backdoor.
+#if defined(IREE_HAL_VULKAN_HAVE_RENDERDOC)
+
+// NOTE: C API, see https://renderdoc.org/docs/in_application_api.html.
+// When compiled in the API will no-op itself if not running under a RenderDoc
+// capture context (renderdoc.dll/so already loaded).
+#include "third_party/renderdoc/renderdoc_app.h"
+
+typedef RENDERDOC_API_1_5_0 RENDERDOC_API_LATEST;
+
+// Returns a handle to the RenderDoc API when it is hooking the process.
+// Returns NULL when RenderDoc is not present (or valid).
+static RENDERDOC_API_LATEST* iree_hal_vulkan_query_renderdoc_api(
+    VkInstance instance) {
+  pRENDERDOC_GetAPI RENDERDOC_GetAPI = NULL;
+#if defined(IREE_PLATFORM_WINDOWS)
+
+  // NOTE: RenderDoc only supports hooking so we can't use LoadLibrary - if
+  // we're going to use RenderDoc its library must already be loaded.
+  if (HMODULE hook_module = GetModuleHandleA("renderdoc.dll")) {
+    RENDERDOC_GetAPI =
+        (pRENDERDOC_GetAPI)GetProcAddress(hook_module, "RENDERDOC_GetAPI");
+  }
+
+#else
+
+  // dlopen/dlsym on posix-like systems. Note that each platform has its own
+  // naming for the injected module. Because RenderDoc only supports hooking
+  // (where the hosting process loads the library in magic ways for us) we use
+  // RTLD_NOLOAD to ensure we don't accidentally try to load it when not hooked.
+  void* hook_module = NULL;
+#if defined(IREE_PLATFORM_ANDROID)
+  hook_module = dlopen("libVkLayer_GLES_RenderDoc.so", RTLD_NOW | RTLD_NOLOAD);
+#elif defined(IREE_PLATFORM_APPLE)
+  hook_module = dlopen("librenderdoc.dylib", RTLD_NOW | RTLD_NOLOAD);
+#elif defined(IREE_PLATFORM_LINUX)
+  hook_module = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD);
+#else
+#error "RenderDoc profiling not supported on this platform"
+#endif  // IREE_PLATFORM_*
+  if (hook_module) {
+    RENDERDOC_GetAPI =
+        (pRENDERDOC_GetAPI)dlsym(hook_module, "RENDERDOC_GetAPI");
+  }
+
+#endif  // IREE_PLATFORM_WINDOWS
+
+  if (!RENDERDOC_GetAPI) return NULL;  // not found, no-op
+
+  RENDERDOC_API_LATEST* api = NULL;
+  int query_result =
+      RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_5_0, (void**)&api);
+  if (query_result != 1) {
+    // Failed to initialize API (old version, etc). No-op.
+    return NULL;
+  }
+
+  return api;
+}
+
+// Begins a new RenderDoc capture.
+static void iree_hal_vulkan_begin_renderdoc_capture(
+    RENDERDOC_API_LATEST* renderdoc_api, VkInstance instance,
+    const iree_hal_device_profiling_options_t* options) {
+  if (!renderdoc_api) return;
+  if (options->file_path) {
+    renderdoc_api->SetCaptureFilePathTemplate(options->file_path);
+  }
+  renderdoc_api->StartFrameCapture(
+      RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), NULL);
+}
+
+// Ends the active RenderDoc capture, if any active.
+static void iree_hal_vulkan_end_renderdoc_capture(
+    RENDERDOC_API_LATEST* renderdoc_api, VkInstance instance) {
+  if (!renderdoc_api) return;
+  if (renderdoc_api->IsFrameCapturing()) {
+    renderdoc_api->EndFrameCapture(
+        RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), NULL);
+  }
+}
+
+#endif  // IREE_HAL_VULKAN_HAVE_RENDERDOC
+
+//===----------------------------------------------------------------------===//
 // iree_hal_vulkan_device_t extensibility util
 //===----------------------------------------------------------------------===//
 
@@ -382,6 +472,10 @@
   iree_arena_block_pool_t block_pool;
 
   BuiltinExecutables* builtin_executables;
+
+#if defined(IREE_HAL_VULKAN_HAVE_RENDERDOC)
+  RENDERDOC_API_LATEST* renderdoc_api;
+#endif  // IREE_HAL_VULKAN_HAVE_RENDERDOC
 } iree_hal_vulkan_device_t;
 
 namespace {
@@ -570,6 +664,10 @@
   device->logical_device = logical_device;
   device->logical_device->AddReference();
 
+#if defined(IREE_HAL_VULKAN_HAVE_RENDERDOC)
+  device->renderdoc_api = iree_hal_vulkan_query_renderdoc_api(instance);
+#endif  // IREE_HAL_VULKAN_HAVE_RENDERDOC
+
   iree_arena_block_pool_initialize(32 * 1024, host_allocator,
                                    &device->block_pool);
 
@@ -1129,18 +1227,33 @@
 }
 
 static iree_status_t iree_hal_vulkan_device_profiling_begin(
-    iree_hal_device_t* device,
+    iree_hal_device_t* base_device,
     const iree_hal_device_profiling_options_t* options) {
-  // Unimplemented (and that's ok). If counters are requested we'd use
+  // For now we only support RenderDoc. As much as possible we should try to use
+  // standardized Vulkan layers to do profiling configuration/control like
   // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_performance_query.html
-  // to acquire a lock. For most other cases we can use something like the
-  // RenderDoc API to directly tell an attached tool that we want to capture.
+  // to avoid the combinatorial explosion of vendor tooling hooks.
+  // Since RenderDoc is fairly simple, cross-platform, and cross-vendor we
+  // support it here. If this grows beyond a few lines of code we should shuffle
+  // it off to another file.
+  if (iree_all_bits_set(options->mode,
+                        IREE_HAL_DEVICE_PROFILING_MODE_QUEUE_OPERATIONS)) {
+#if defined(IREE_HAL_VULKAN_HAVE_RENDERDOC)
+    iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+    iree_hal_vulkan_begin_renderdoc_capture(device->renderdoc_api,
+                                            device->instance, options);
+#endif  // IREE_HAL_VULKAN_HAVE_RENDERDOC
+  }
   return iree_ok_status();
 }
 
 static iree_status_t iree_hal_vulkan_device_profiling_end(
-    iree_hal_device_t* device) {
-  // Unimplemented (and that's ok).
+    iree_hal_device_t* base_device) {
+#if defined(IREE_HAL_VULKAN_HAVE_RENDERDOC)
+  iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
+  iree_hal_vulkan_end_renderdoc_capture(device->renderdoc_api,
+                                        device->instance);
+#endif  // IREE_HAL_VULKAN_HAVE_RENDERDOC
   return iree_ok_status();
 }