[HAL/AMDGPU] Abstract profile device clock sampling Move profiling clock-correlation sampling behind util/device_clock. The system object now owns a platform clock source instead of a raw KFD descriptor, logical devices sample through that source, and physical devices carry the HSA driver_uid without baking the Linux KFD name into the core device identity. Keep util/kfd as the Linux ioctl transport. It now returns raw AMDKFD_IOC_GET_CLOCK_COUNTERS values, while device_clock owns the generic validation and source-type dispatch so future Windows or macOS HSA support has a named unavailable/source boundary instead of spreading platform branches through profiling consumers. Emit clock-correlation chunks only for profiling data families that consume HSA/device timestamps. Host-only queue events, memory events, executable metadata, and device metrics no longer require a device clock source, which keeps unrelated profiling modes alive on platforms without an equivalent clock-correlation API.
diff --git a/runtime/src/iree/hal/drivers/amdgpu/BUILD.bazel b/runtime/src/iree/hal/drivers/amdgpu/BUILD.bazel index ba0a6c5..4784e75 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/BUILD.bazel +++ b/runtime/src/iree/hal/drivers/amdgpu/BUILD.bazel
@@ -167,6 +167,7 @@ "//runtime/src/iree/hal/drivers/amdgpu/device/binaries:toc", "//runtime/src/iree/hal/drivers/amdgpu/util:block_pool", "//runtime/src/iree/hal/drivers/amdgpu/util:code_object_target", + "//runtime/src/iree/hal/drivers/amdgpu/util:device_clock", "//runtime/src/iree/hal/drivers/amdgpu/util:device_library", "//runtime/src/iree/hal/drivers/amdgpu/util:hsaco_metadata", "//runtime/src/iree/hal/drivers/amdgpu/util:info",
diff --git a/runtime/src/iree/hal/drivers/amdgpu/CMakeLists.txt b/runtime/src/iree/hal/drivers/amdgpu/CMakeLists.txt index ac10d0f..0e92cf1 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/CMakeLists.txt +++ b/runtime/src/iree/hal/drivers/amdgpu/CMakeLists.txt
@@ -173,6 +173,7 @@ iree::hal::drivers::amdgpu::device::timestamp iree::hal::drivers::amdgpu::util::block_pool iree::hal::drivers::amdgpu::util::code_object_target + iree::hal::drivers::amdgpu::util::device_clock iree::hal::drivers::amdgpu::util::device_library iree::hal::drivers::amdgpu::util::hsaco_metadata iree::hal::drivers::amdgpu::util::info
diff --git a/runtime/src/iree/hal/drivers/amdgpu/host_queue_command_buffer_test.cc b/runtime/src/iree/hal/drivers/amdgpu/host_queue_command_buffer_test.cc index 0dbd202..0f1344a 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/host_queue_command_buffer_test.cc +++ b/runtime/src/iree/hal/drivers/amdgpu/host_queue_command_buffer_test.cc
@@ -1730,7 +1730,7 @@ ASSERT_EQ(physical_device_count, sink.device_records.size()); ASSERT_EQ(physical_device_count * queue_count_per_physical_device, sink.queue_records.size()); - EXPECT_GE(sink.clock_correlations.size(), 2 * physical_device_count); + EXPECT_TRUE(sink.clock_correlations.empty()); for (const auto& device_record : sink.device_records) { EXPECT_LT(device_record.physical_device_ordinal, physical_device_count); @@ -1740,9 +1740,6 @@ EXPECT_LT(queue_record.physical_device_ordinal, physical_device_count); EXPECT_LT(queue_record.queue_ordinal, queue_count_per_physical_device); } - for (const auto& clock_correlation : sink.clock_correlations) { - EXPECT_LT(clock_correlation.physical_device_ordinal, physical_device_count); - } } TEST_F(HostQueueCommandBufferTest, SinklessProfilingBeginFails) { @@ -1899,7 +1896,7 @@ const iree_host_size_t event_capacity = test_device.logical_device() - ->profiling.event_streams.queue.stream.capacity; + ->profiling.event_streams.queue.stream.ring.capacity; ASSERT_GT(event_capacity, 0u); for (iree_host_size_t i = 0; i <= event_capacity; ++i) { iree_hal_profile_queue_event_t event = @@ -1938,7 +1935,7 @@ const iree_host_size_t event_capacity = test_device.logical_device() - ->profiling.event_streams.memory.stream.capacity; + ->profiling.event_streams.memory.stream.ring.capacity; ASSERT_GT(event_capacity, 0u); iree_host_size_t recorded_count = 0; for (iree_host_size_t i = 0; i <= event_capacity; ++i) {
diff --git a/runtime/src/iree/hal/drivers/amdgpu/logical_device.c b/runtime/src/iree/hal/drivers/amdgpu/logical_device.c index a9b2f2f..9098d84 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/logical_device.c +++ b/runtime/src/iree/hal/drivers/amdgpu/logical_device.c
@@ -489,10 +489,11 @@ physical_device->device_ordinal); } - iree_hal_amdgpu_clock_counters_t counters = {0}; + iree_hal_amdgpu_device_clock_counters_t counters = {0}; const iree_time_t host_time_begin_ns = iree_time_now(); - iree_status_t status = iree_hal_amdgpu_kfd_get_clock_counters( - logical_device->system->kfd_fd, physical_device->kfd_gpu_uid, &counters); + iree_status_t status = iree_hal_amdgpu_device_clock_source_sample( + &logical_device->system->device_clock_source, physical_device->driver_uid, + &counters); const iree_time_t host_time_end_ns = iree_time_now(); if (iree_status_is_ok(status)) { @@ -506,18 +507,18 @@ (uint32_t)physical_device->device_ordinal; out_record->sample_id = logical_device->profiling.next_clock_correlation_sample_id++; - out_record->device_tick = counters.gpu_clock_counter; - out_record->host_cpu_timestamp_ns = counters.cpu_clock_counter; - out_record->host_system_timestamp = counters.system_clock_counter; - out_record->host_system_frequency_hz = counters.system_clock_freq; + out_record->device_tick = counters.device_clock_counter; + out_record->host_cpu_timestamp_ns = counters.host_cpu_timestamp_ns; + out_record->host_system_timestamp = counters.host_system_timestamp; + out_record->host_system_frequency_hz = counters.host_system_frequency_hz; out_record->host_time_begin_ns = host_time_begin_ns; out_record->host_time_end_ns = host_time_end_ns; } else { status = iree_status_annotate_f( status, "sampling profile clock correlation for physical_device_ordinal=%zu " - "gpu_uid=%" PRIu32, - physical_device->device_ordinal, physical_device->kfd_gpu_uid); + "driver_uid=%" PRIu32, + physical_device->device_ordinal, physical_device->driver_uid); } return status; } @@ -746,8 +747,12 @@ &logical_device->profile_metadata, sink, session_id, logical_device->identifier, emit_executable_artifacts, &logical_device->profiling.metadata_cursor)); - return iree_hal_amdgpu_logical_device_write_profile_clock_correlations( - logical_device, sink, session_id); + if (iree_hal_amdgpu_logical_device_profiling_needs_hsa_timestamps( + data_families)) { + return iree_hal_amdgpu_logical_device_write_profile_clock_correlations( + logical_device, sink, session_id); + } + return iree_ok_status(); } static iree_status_t iree_hal_amdgpu_logical_device_write_profile_events( @@ -2643,9 +2648,12 @@ emit_executable_artifacts, &logical_device->profiling.metadata_cursor)); IREE_RETURN_IF_ERROR(iree_hal_amdgpu_logical_device_write_profile_events( logical_device, sink, logical_device->profiling.session_id)); - IREE_RETURN_IF_ERROR( - iree_hal_amdgpu_logical_device_write_profile_clock_correlations( - logical_device, sink, logical_device->profiling.session_id)); + if (iree_hal_amdgpu_logical_device_profiling_needs_hsa_timestamps( + options->data_families)) { + IREE_RETURN_IF_ERROR( + iree_hal_amdgpu_logical_device_write_profile_clock_correlations( + logical_device, sink, logical_device->profiling.session_id)); + } return iree_hal_amdgpu_profile_device_metrics_session_sample_and_write( logical_device->profiling.device_metrics_session, sink, logical_device->profiling.session_id, logical_device->identifier); @@ -2686,7 +2694,9 @@ status = iree_hal_amdgpu_logical_device_write_profile_events( logical_device, sink, session_id); } - if (iree_status_is_ok(status)) { + if (iree_status_is_ok(status) && + iree_hal_amdgpu_logical_device_profiling_needs_hsa_timestamps( + data_families)) { status = iree_hal_amdgpu_logical_device_write_profile_clock_correlations( logical_device, sink, session_id); }
diff --git a/runtime/src/iree/hal/drivers/amdgpu/physical_device.c b/runtime/src/iree/hal/drivers/amdgpu/physical_device.c index 5a5c347..d7f26dc 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/physical_device.c +++ b/runtime/src/iree/hal/drivers/amdgpu/physical_device.c
@@ -412,7 +412,7 @@ IREE_RETURN_IF_ERROR( iree_hsa_agent_get_info(IREE_LIBHSA(libhsa), device_agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_DRIVER_UID, - &out_physical_device->kfd_gpu_uid)); + &out_physical_device->driver_uid)); IREE_RETURN_IF_ERROR(iree_hal_amdgpu_query_agent_pci_identity( libhsa, device_agent, out_physical_device)); bool has_physical_device_uuid = false;
diff --git a/runtime/src/iree/hal/drivers/amdgpu/physical_device.h b/runtime/src/iree/hal/drivers/amdgpu/physical_device.h index 26124b0..83204c3 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/physical_device.h +++ b/runtime/src/iree/hal/drivers/amdgpu/physical_device.h
@@ -172,8 +172,8 @@ hsa_agent_t device_agent; // Ordinal of the GPU agent within the topology. iree_host_size_t device_ordinal; - // KFD GPU identifier used when querying per-device clock counters. - uint32_t kfd_gpu_uid; + // HSA driver identifier used when querying per-device clock counters. + uint32_t driver_uid; // PCI domain from HSA_AMD_AGENT_INFO_DOMAIN. uint32_t pci_domain; // PCI bus decoded from HSA_AMD_AGENT_INFO_BDFID.
diff --git a/runtime/src/iree/hal/drivers/amdgpu/system.c b/runtime/src/iree/hal/drivers/amdgpu/system.c index 1956f91..a201cec 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/system.c +++ b/runtime/src/iree/hal/drivers/amdgpu/system.c
@@ -7,7 +7,6 @@ #include "iree/hal/drivers/amdgpu/system.h" #include "iree/hal/drivers/amdgpu/executable.h" -#include "iree/hal/drivers/amdgpu/util/kfd.h" #include "iree/hal/drivers/amdgpu/util/topology.h" //===----------------------------------------------------------------------===// @@ -240,9 +239,10 @@ IREE_RETURN_AND_END_ZONE_IF_ERROR( z0, iree_hal_amdgpu_system_info_query(libhsa, &out_system->info)); - // Open /dev/kfd so that we can issue ioctls directly. + // Initialize the platform source used for profile clock correlation. IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, iree_hal_amdgpu_kfd_open(&out_system->kfd_fd)); + z0, iree_hal_amdgpu_device_clock_source_initialize( + &out_system->device_clock_source)); // Copy the libhsa symbol table and retain HSA for the lifetime of the system. // The caller may destroy the provided libhsa after this call returns. @@ -295,8 +295,9 @@ // Unload the device library - no references to it should remain. iree_hal_amdgpu_device_library_deinitialize(&system->device_library); - // Close our handle to /dev/kfd prior to (potentially) unloading HSA. - iree_hal_amdgpu_kfd_close(system->kfd_fd); + // Release platform clock-sampling state before unloading HSA. + iree_hal_amdgpu_device_clock_source_deinitialize( + &system->device_clock_source); // This may unload HSA if we were the last retainer in the process. iree_hal_amdgpu_libhsa_deinitialize(&system->libhsa);
diff --git a/runtime/src/iree/hal/drivers/amdgpu/system.h b/runtime/src/iree/hal/drivers/amdgpu/system.h index 297ca72..a2ec00f 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/system.h +++ b/runtime/src/iree/hal/drivers/amdgpu/system.h
@@ -8,6 +8,7 @@ #define IREE_HAL_DRIVERS_AMDGPU_SYSTEM_H_ #include "iree/base/api.h" +#include "iree/hal/drivers/amdgpu/util/device_clock.h" #include "iree/hal/drivers/amdgpu/util/device_library.h" #include "iree/hal/drivers/amdgpu/util/info.h" #include "iree/hal/drivers/amdgpu/util/libhsa.h" @@ -68,9 +69,8 @@ // HSA API handle. iree_hal_amdgpu_libhsa_t libhsa; - // /dev/kfd handle, or -1 when unavailable on the platform. - // TODO(benvanik): drop this when HSA supports all of the ioctls we need. - int kfd_fd; + // Platform source used for device/host clock-correlation sampling. + iree_hal_amdgpu_device_clock_source_t device_clock_source; // System topology as visible to the HAL device. This may be a subset of // the devices available in the system.
diff --git a/runtime/src/iree/hal/drivers/amdgpu/util/BUILD.bazel b/runtime/src/iree/hal/drivers/amdgpu/util/BUILD.bazel index dc5947f..86249fd 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/util/BUILD.bazel +++ b/runtime/src/iree/hal/drivers/amdgpu/util/BUILD.bazel
@@ -234,6 +234,16 @@ ) iree_runtime_cc_library( + name = "device_clock", + srcs = ["device_clock.c"], + hdrs = ["device_clock.h"], + deps = [ + ":kfd", + "//runtime/src/iree/base", + ], +) + +iree_runtime_cc_library( name = "kfd", srcs = ["kfd.c"], hdrs = ["kfd.h"], @@ -479,6 +489,17 @@ ) iree_runtime_cc_test( + name = "device_clock_test", + srcs = ["device_clock_test.cc"], + deps = [ + ":device_clock", + "//runtime/src/iree/base", + "//runtime/src/iree/testing:gtest", + "//runtime/src/iree/testing:gtest_main", + ], +) + +iree_runtime_cc_test( name = "kfd_test", srcs = ["kfd_test.cc"], group = "iree-hal-drivers-amdgpu-tests",
diff --git a/runtime/src/iree/hal/drivers/amdgpu/util/CMakeLists.txt b/runtime/src/iree/hal/drivers/amdgpu/util/CMakeLists.txt index e9bcf70..cde6837 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/util/CMakeLists.txt +++ b/runtime/src/iree/hal/drivers/amdgpu/util/CMakeLists.txt
@@ -239,6 +239,19 @@ iree_cc_library( NAME + device_clock + HDRS + "device_clock.h" + SRCS + "device_clock.c" + DEPS + ::kfd + iree::base + PUBLIC +) + +iree_cc_library( + NAME kfd HDRS "kfd.h" @@ -512,6 +525,18 @@ iree_cc_test( NAME + device_clock_test + SRCS + "device_clock_test.cc" + DEPS + ::device_clock + iree::base + iree::testing::gtest + iree::testing::gtest_main +) + +iree_cc_test( + NAME kfd_test SRCS "kfd_test.cc"
diff --git a/runtime/src/iree/hal/drivers/amdgpu/util/device_clock.c b/runtime/src/iree/hal/drivers/amdgpu/util/device_clock.c new file mode 100644 index 0000000..68464f1 --- /dev/null +++ b/runtime/src/iree/hal/drivers/amdgpu/util/device_clock.c
@@ -0,0 +1,97 @@ +// Copyright 2026 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "iree/hal/drivers/amdgpu/util/device_clock.h" + +#include <inttypes.h> +#include <string.h> + +#include "iree/hal/drivers/amdgpu/util/kfd.h" + +iree_status_t iree_hal_amdgpu_device_clock_counters_validate( + uint32_t driver_uid, + const iree_hal_amdgpu_device_clock_counters_t* counters) { + IREE_ASSERT_ARGUMENT(counters); + if (IREE_UNLIKELY(counters->device_clock_counter == 0)) { + return iree_make_status( + IREE_STATUS_FAILED_PRECONDITION, + "device clock source returned an invalid zero device_clock_counter for " + "driver_uid=%" PRIu32, + driver_uid); + } + if (IREE_UNLIKELY(counters->host_cpu_timestamp_ns == 0)) { + return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, + "device clock source returned an invalid zero " + "host_cpu_timestamp_ns for driver_uid=%" PRIu32, + driver_uid); + } + if (IREE_UNLIKELY(counters->host_system_timestamp == 0)) { + return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, + "device clock source returned an invalid zero " + "host_system_timestamp for driver_uid=%" PRIu32, + driver_uid); + } + if (IREE_UNLIKELY(counters->host_system_frequency_hz == 0)) { + return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, + "device clock source returned an invalid zero " + "host_system_frequency_hz for driver_uid=%" PRIu32, + driver_uid); + } + return iree_ok_status(); +} + +iree_status_t iree_hal_amdgpu_device_clock_source_initialize( + iree_hal_amdgpu_device_clock_source_t* out_source) { + IREE_ASSERT_ARGUMENT(out_source); + memset(out_source, 0, sizeof(*out_source)); + out_source->platform_handle = -1; + +#if defined(IREE_PLATFORM_LINUX) + int kfd = -1; + IREE_RETURN_IF_ERROR(iree_hal_amdgpu_kfd_open(&kfd)); + out_source->platform_handle = (intptr_t)kfd; + out_source->type = IREE_HAL_AMDGPU_DEVICE_CLOCK_SOURCE_TYPE_LINUX_KFD; +#endif // IREE_PLATFORM_LINUX + + return iree_ok_status(); +} + +void iree_hal_amdgpu_device_clock_source_deinitialize( + iree_hal_amdgpu_device_clock_source_t* source) { + if (!source) return; + if (source->type == IREE_HAL_AMDGPU_DEVICE_CLOCK_SOURCE_TYPE_LINUX_KFD) { + iree_hal_amdgpu_kfd_close((int)source->platform_handle); + } + memset(source, 0, sizeof(*source)); + source->platform_handle = -1; +} + +iree_status_t iree_hal_amdgpu_device_clock_source_sample( + const iree_hal_amdgpu_device_clock_source_t* source, uint32_t driver_uid, + iree_hal_amdgpu_device_clock_counters_t* out_counters) { + IREE_ASSERT_ARGUMENT(source); + IREE_ASSERT_ARGUMENT(out_counters); + memset(out_counters, 0, sizeof(*out_counters)); + + switch (source->type) { + case IREE_HAL_AMDGPU_DEVICE_CLOCK_SOURCE_TYPE_LINUX_KFD: { + iree_hal_amdgpu_kfd_clock_counters_t kfd_counters = {0}; + IREE_RETURN_IF_ERROR(iree_hal_amdgpu_kfd_get_clock_counters( + (int)source->platform_handle, driver_uid, &kfd_counters)); + out_counters->device_clock_counter = kfd_counters.gpu_clock_counter; + out_counters->host_cpu_timestamp_ns = kfd_counters.cpu_clock_counter; + out_counters->host_system_timestamp = kfd_counters.system_clock_counter; + out_counters->host_system_frequency_hz = kfd_counters.system_clock_freq; + return iree_hal_amdgpu_device_clock_counters_validate(driver_uid, + out_counters); + } + case IREE_HAL_AMDGPU_DEVICE_CLOCK_SOURCE_TYPE_UNAVAILABLE: + default: + return iree_make_status( + IREE_STATUS_UNIMPLEMENTED, + "AMDGPU device clock sampling is unavailable on this platform"); + } +}
diff --git a/runtime/src/iree/hal/drivers/amdgpu/util/device_clock.h b/runtime/src/iree/hal/drivers/amdgpu/util/device_clock.h new file mode 100644 index 0000000..b98a765 --- /dev/null +++ b/runtime/src/iree/hal/drivers/amdgpu/util/device_clock.h
@@ -0,0 +1,72 @@ +// Copyright 2026 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef IREE_HAL_DRIVERS_AMDGPU_UTIL_DEVICE_CLOCK_H_ +#define IREE_HAL_DRIVERS_AMDGPU_UTIL_DEVICE_CLOCK_H_ + +#include "iree/base/api.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// Device and host clock counters sampled by one platform source. +typedef struct iree_hal_amdgpu_device_clock_counters_t { + // Device clock counter sampled for the requested GPU. + uint64_t device_clock_counter; + + // Host CPU timestamp sampled near the device clock read. + uint64_t host_cpu_timestamp_ns; + + // Host system clock counter sampled near the device clock read. + uint64_t host_system_timestamp; + + // Frequency in Hz for |host_system_timestamp|. + uint64_t host_system_frequency_hz; +} iree_hal_amdgpu_device_clock_counters_t; + +// Validates that |counters| contains a usable clock-correlation sample. +iree_status_t iree_hal_amdgpu_device_clock_counters_validate( + uint32_t driver_uid, + const iree_hal_amdgpu_device_clock_counters_t* counters); + +// Platform implementation used for device/host clock-correlation sampling. +typedef enum iree_hal_amdgpu_device_clock_source_type_e { + IREE_HAL_AMDGPU_DEVICE_CLOCK_SOURCE_TYPE_UNAVAILABLE = 0, + IREE_HAL_AMDGPU_DEVICE_CLOCK_SOURCE_TYPE_LINUX_KFD = 1, +} iree_hal_amdgpu_device_clock_source_type_t; + +// Platform device-clock sampling source. +// +// Linux currently backs this with KFD's AMDKFD_IOC_GET_CLOCK_COUNTERS ioctl. +// Other platforms keep the source unavailable until their HSA runtime exposes +// equivalent device/host clock correlation. +typedef struct iree_hal_amdgpu_device_clock_source_t { + // Active platform sampling implementation. + iree_hal_amdgpu_device_clock_source_type_t type; + + // Opaque platform handle for the active clock source, or -1 when unavailable. + intptr_t platform_handle; +} iree_hal_amdgpu_device_clock_source_t; + +// Initializes a platform device-clock source. +iree_status_t iree_hal_amdgpu_device_clock_source_initialize( + iree_hal_amdgpu_device_clock_source_t* out_source); + +// Deinitializes |source| and releases its platform handle, if any. +void iree_hal_amdgpu_device_clock_source_deinitialize( + iree_hal_amdgpu_device_clock_source_t* source); + +// Samples clock counters for the GPU with HSA driver UID |driver_uid|. +iree_status_t iree_hal_amdgpu_device_clock_source_sample( + const iree_hal_amdgpu_device_clock_source_t* source, uint32_t driver_uid, + iree_hal_amdgpu_device_clock_counters_t* out_counters); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // IREE_HAL_DRIVERS_AMDGPU_UTIL_DEVICE_CLOCK_H_
diff --git a/runtime/src/iree/hal/drivers/amdgpu/util/device_clock_test.cc b/runtime/src/iree/hal/drivers/amdgpu/util/device_clock_test.cc new file mode 100644 index 0000000..f96b3aa --- /dev/null +++ b/runtime/src/iree/hal/drivers/amdgpu/util/device_clock_test.cc
@@ -0,0 +1,71 @@ +// Copyright 2026 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "iree/hal/drivers/amdgpu/util/device_clock.h" + +#include "iree/testing/gtest.h" +#include "iree/testing/status_matchers.h" + +namespace iree::hal::amdgpu { +namespace { + +TEST(DeviceClockTest, ValidateCounters) { + iree_hal_amdgpu_device_clock_counters_t counters = { + /*.device_clock_counter=*/1, + /*.host_cpu_timestamp_ns=*/2, + /*.host_system_timestamp=*/3, + /*.host_system_frequency_hz=*/4, + }; + IREE_EXPECT_OK( + iree_hal_amdgpu_device_clock_counters_validate(1234, &counters)); + + counters.device_clock_counter = 0; + IREE_EXPECT_STATUS_IS( + IREE_STATUS_FAILED_PRECONDITION, + iree_hal_amdgpu_device_clock_counters_validate(1234, &counters)); + counters.device_clock_counter = 1; + + counters.host_cpu_timestamp_ns = 0; + IREE_EXPECT_STATUS_IS( + IREE_STATUS_FAILED_PRECONDITION, + iree_hal_amdgpu_device_clock_counters_validate(1234, &counters)); + counters.host_cpu_timestamp_ns = 2; + + counters.host_system_timestamp = 0; + IREE_EXPECT_STATUS_IS( + IREE_STATUS_FAILED_PRECONDITION, + iree_hal_amdgpu_device_clock_counters_validate(1234, &counters)); + counters.host_system_timestamp = 3; + + counters.host_system_frequency_hz = 0; + IREE_EXPECT_STATUS_IS( + IREE_STATUS_FAILED_PRECONDITION, + iree_hal_amdgpu_device_clock_counters_validate(1234, &counters)); +} + +TEST(DeviceClockTest, UnavailableSourceSampleFailsExplicitly) { + iree_hal_amdgpu_device_clock_source_t source = { + /*.type=*/IREE_HAL_AMDGPU_DEVICE_CLOCK_SOURCE_TYPE_UNAVAILABLE, + /*.platform_handle=*/-1, + }; + iree_hal_amdgpu_device_clock_counters_t counters = { + /*.device_clock_counter=*/1, + /*.host_cpu_timestamp_ns=*/2, + /*.host_system_timestamp=*/3, + /*.host_system_frequency_hz=*/4, + }; + + IREE_EXPECT_STATUS_IS( + IREE_STATUS_UNIMPLEMENTED, + iree_hal_amdgpu_device_clock_source_sample(&source, 1234, &counters)); + EXPECT_EQ(counters.device_clock_counter, 0); + EXPECT_EQ(counters.host_cpu_timestamp_ns, 0); + EXPECT_EQ(counters.host_system_timestamp, 0); + EXPECT_EQ(counters.host_system_frequency_hz, 0); +} + +} // namespace +} // namespace iree::hal::amdgpu
diff --git a/runtime/src/iree/hal/drivers/amdgpu/util/kfd.c b/runtime/src/iree/hal/drivers/amdgpu/util/kfd.c index 8e79330..9b1af7a 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/util/kfd.c +++ b/runtime/src/iree/hal/drivers/amdgpu/util/kfd.c
@@ -89,36 +89,6 @@ // AMDKFD_IOC_GET_CLOCK_COUNTERS //===----------------------------------------------------------------------===// -iree_status_t iree_hal_amdgpu_kfd_validate_clock_counters( - uint32_t gpu_uid, const iree_hal_amdgpu_clock_counters_t* counters) { - IREE_ASSERT_ARGUMENT(counters); - if (IREE_UNLIKELY(counters->gpu_clock_counter == 0)) { - return iree_make_status( - IREE_STATUS_FAILED_PRECONDITION, - "KFD returned an invalid zero gpu_clock_counter for gpu_uid=%" PRIu32, - gpu_uid); - } - if (IREE_UNLIKELY(counters->cpu_clock_counter == 0)) { - return iree_make_status( - IREE_STATUS_FAILED_PRECONDITION, - "KFD returned an invalid zero cpu_clock_counter for gpu_uid=%" PRIu32, - gpu_uid); - } - if (IREE_UNLIKELY(counters->system_clock_counter == 0)) { - return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, - "KFD returned an invalid zero " - "system_clock_counter for gpu_uid=%" PRIu32, - gpu_uid); - } - if (IREE_UNLIKELY(counters->system_clock_freq == 0)) { - return iree_make_status( - IREE_STATUS_FAILED_PRECONDITION, - "KFD returned an invalid zero system_clock_freq for gpu_uid=%" PRIu32, - gpu_uid); - } - return iree_ok_status(); -} - #if defined(IREE_PLATFORM_LINUX) #define IREE_AMDKFD_IOC_GET_CLOCK_COUNTERS \ @@ -145,41 +115,43 @@ }; iree_status_t iree_hal_amdgpu_kfd_get_clock_counters( - int fd, uint32_t gpu_uid, iree_hal_amdgpu_clock_counters_t* out_counters) { + int fd, uint32_t driver_uid, + iree_hal_amdgpu_kfd_clock_counters_t* out_counters) { IREE_ASSERT_ARGUMENT(out_counters); memset(out_counters, 0, sizeof(*out_counters)); if (IREE_UNLIKELY(fd < 0)) { return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "invalid /dev/kfd file descriptor for " - "AMDKFD_IOC_GET_CLOCK_COUNTERS(gpu_uid=%" PRIu32 + "AMDKFD_IOC_GET_CLOCK_COUNTERS(driver_uid=%" PRIu32 ")", - gpu_uid); + driver_uid); } struct iree_kfd_ioctl_get_clock_counters_args args = {0}; - args.gpu_id = gpu_uid; + args.gpu_id = driver_uid; int kmt_err = iree_hal_amdgpu_ioctl(fd, IREE_AMDKFD_IOC_GET_CLOCK_COUNTERS, &args); if (IREE_UNLIKELY(kmt_err < 0)) { const int errsv = errno; return iree_make_status(IREE_STATUS_INTERNAL, - "AMDKFD_IOC_GET_CLOCK_COUNTERS(gpu_uid=%" PRIu32 + "AMDKFD_IOC_GET_CLOCK_COUNTERS(driver_uid=%" PRIu32 ") failed with %d; errno=%d (%s)", - gpu_uid, kmt_err, errsv, strerror(errsv)); + driver_uid, kmt_err, errsv, strerror(errsv)); } out_counters->gpu_clock_counter = args.gpu_clock_counter; out_counters->cpu_clock_counter = args.cpu_clock_counter; out_counters->system_clock_counter = args.system_clock_counter; out_counters->system_clock_freq = args.system_clock_freq; - return iree_hal_amdgpu_kfd_validate_clock_counters(gpu_uid, out_counters); + return iree_ok_status(); } #else iree_status_t iree_hal_amdgpu_kfd_get_clock_counters( - int fd, uint32_t gpu_uid, iree_hal_amdgpu_clock_counters_t* out_counters) { + int fd, uint32_t driver_uid, + iree_hal_amdgpu_kfd_clock_counters_t* out_counters) { (void)fd; - (void)gpu_uid; + (void)driver_uid; memset(out_counters, 0, sizeof(*out_counters)); return iree_make_status( IREE_STATUS_UNIMPLEMENTED,
diff --git a/runtime/src/iree/hal/drivers/amdgpu/util/kfd.h b/runtime/src/iree/hal/drivers/amdgpu/util/kfd.h index cfabbf3..416e637 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/util/kfd.h +++ b/runtime/src/iree/hal/drivers/amdgpu/util/kfd.h
@@ -42,7 +42,7 @@ // Tracking for adding AMDKFD_IOC_GET_CLOCK_COUNTERS to the API: // https://github.com/ROCm/ROCR-Runtime/issues/278 -typedef struct iree_hal_amdgpu_clock_counters_t { +typedef struct iree_hal_amdgpu_kfd_clock_counters_t { // GPU clock counter sampled by KFD for the requested GPU. uint64_t gpu_clock_counter; @@ -54,21 +54,14 @@ // Frequency in Hz for system_clock_counter. uint64_t system_clock_freq; -} iree_hal_amdgpu_clock_counters_t; - -// Validates that |counters| contains a usable KFD clock-counter sample. -// -// AMDKFD_IOC_GET_CLOCK_COUNTERS can succeed while returning zeroed counters -// for an invalid GPU UID. Callers must validate the sample before publishing -// any clock-correlation flags derived from it. -iree_status_t iree_hal_amdgpu_kfd_validate_clock_counters( - uint32_t gpu_uid, const iree_hal_amdgpu_clock_counters_t* counters); +} iree_hal_amdgpu_kfd_clock_counters_t; // Equivalent to `hsaKmtGetClockCounters` in the ROCR KMT. // |fd| must be an open /dev/kfd file handle. -// |gpu_uid| must be the HSA_AMD_AGENT_INFO_DRIVER_UID of the node to query. +// |driver_uid| must be the HSA_AMD_AGENT_INFO_DRIVER_UID of the node to query. iree_status_t iree_hal_amdgpu_kfd_get_clock_counters( - int fd, uint32_t gpu_uid, iree_hal_amdgpu_clock_counters_t* out_counters); + int fd, uint32_t driver_uid, + iree_hal_amdgpu_kfd_clock_counters_t* out_counters); #ifdef __cplusplus } // extern "C"
diff --git a/runtime/src/iree/hal/drivers/amdgpu/util/kfd_test.cc b/runtime/src/iree/hal/drivers/amdgpu/util/kfd_test.cc index 2afdd98..7f87f1f 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/util/kfd_test.cc +++ b/runtime/src/iree/hal/drivers/amdgpu/util/kfd_test.cc
@@ -40,41 +40,8 @@ }; #endif // IREE_PLATFORM_LINUX -TEST(KFDStandaloneTest, ValidateClockCounters) { - iree_hal_amdgpu_clock_counters_t counters = { - /*.gpu_clock_counter=*/1, - /*.cpu_clock_counter=*/2, - /*.system_clock_counter=*/3, - /*.system_clock_freq=*/4, - }; - IREE_EXPECT_OK(iree_hal_amdgpu_kfd_validate_clock_counters(1234, &counters)); - - counters.gpu_clock_counter = 0; - IREE_EXPECT_STATUS_IS( - IREE_STATUS_FAILED_PRECONDITION, - iree_hal_amdgpu_kfd_validate_clock_counters(1234, &counters)); - counters.gpu_clock_counter = 1; - - counters.cpu_clock_counter = 0; - IREE_EXPECT_STATUS_IS( - IREE_STATUS_FAILED_PRECONDITION, - iree_hal_amdgpu_kfd_validate_clock_counters(1234, &counters)); - counters.cpu_clock_counter = 2; - - counters.system_clock_counter = 0; - IREE_EXPECT_STATUS_IS( - IREE_STATUS_FAILED_PRECONDITION, - iree_hal_amdgpu_kfd_validate_clock_counters(1234, &counters)); - counters.system_clock_counter = 3; - - counters.system_clock_freq = 0; - IREE_EXPECT_STATUS_IS( - IREE_STATUS_FAILED_PRECONDITION, - iree_hal_amdgpu_kfd_validate_clock_counters(1234, &counters)); -} - TEST(KFDStandaloneTest, GetClockCountersFailsForInvalidDescriptor) { - iree_hal_amdgpu_clock_counters_t counters = { + iree_hal_amdgpu_kfd_clock_counters_t counters = { /*.gpu_clock_counter=*/1, /*.cpu_clock_counter=*/2, /*.system_clock_counter=*/3, @@ -190,7 +157,7 @@ int kfd = -1; IREE_ASSERT_OK(iree_hal_amdgpu_kfd_open(&kfd)); - iree_hal_amdgpu_clock_counters_t counters = {0}; + iree_hal_amdgpu_kfd_clock_counters_t counters = {0}; IREE_ASSERT_OK( iree_hal_amdgpu_kfd_get_clock_counters(kfd, *gpu_uid, &counters));