[HAL/AMDGPU] Fix patched-template profile metadata Patched-template command-buffer dispatches store only the dynamic binding patch sources in the block sidecar. The operation metadata path was walking binding_count entries from that compact sidecar, which is only valid for full HAL binding-source lists. Mixed static/dynamic dispatches could therefore read past the block allocation when profiling forced retained command-buffer metadata. Classify patched-template binding flags directly from the dispatch strategy and patch-source count instead of treating binding_count as a sidecar length. This preserves static/dynamic operation attribution without touching the queue hot path. Extend the mixed dynamic dispatch test to retain profile metadata and assert that the registered dispatch operation reports both static and dynamic bindings, covering the sidecar shape that exposed the ASAN failure.
diff --git a/runtime/src/iree/hal/drivers/amdgpu/aql_command_buffer_profile.c b/runtime/src/iree/hal/drivers/amdgpu/aql_command_buffer_profile.c index 3203989..1ecf3fb 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/aql_command_buffer_profile.c +++ b/runtime/src/iree/hal/drivers/amdgpu/aql_command_buffer_profile.c
@@ -54,12 +54,28 @@ if (dispatch_command->binding_count == 0) { return IREE_HAL_PROFILE_COMMAND_OPERATION_FLAG_NONE; } + + iree_hal_profile_command_operation_flags_t flags = + IREE_HAL_PROFILE_COMMAND_OPERATION_FLAG_NONE; + switch (dispatch_command->kernarg_strategy) { + case IREE_HAL_AMDGPU_COMMAND_BUFFER_KERNARG_STRATEGY_PREPUBLISHED: + return IREE_HAL_PROFILE_COMMAND_OPERATION_FLAG_STATIC_BINDINGS; + case IREE_HAL_AMDGPU_COMMAND_BUFFER_KERNARG_STRATEGY_PATCHED_TEMPLATE: + if (dispatch_command->payload.patch_source_count != 0) { + flags |= IREE_HAL_PROFILE_COMMAND_OPERATION_FLAG_DYNAMIC_BINDINGS; + } + if (dispatch_command->payload.patch_source_count < + dispatch_command->binding_count) { + flags |= IREE_HAL_PROFILE_COMMAND_OPERATION_FLAG_STATIC_BINDINGS; + } + return flags; + default: + break; + } if (dispatch_command->binding_source_offset == 0) { return IREE_HAL_PROFILE_COMMAND_OPERATION_FLAG_STATIC_BINDINGS; } - iree_hal_profile_command_operation_flags_t flags = - IREE_HAL_PROFILE_COMMAND_OPERATION_FLAG_NONE; const uint8_t* block_base = (const uint8_t*)block; const uint32_t binding_source_offset = dispatch_command->binding_source_offset;
diff --git a/runtime/src/iree/hal/drivers/amdgpu/host_queue_command_buffer_test.cc b/runtime/src/iree/hal/drivers/amdgpu/host_queue_command_buffer_test.cc index 2b3e523..48d316f 100644 --- a/runtime/src/iree/hal/drivers/amdgpu/host_queue_command_buffer_test.cc +++ b/runtime/src/iree/hal/drivers/amdgpu/host_queue_command_buffer_test.cc
@@ -1706,7 +1706,9 @@ Ref<iree_hal_command_buffer_t> command_buffer; IREE_ASSERT_OK(iree_hal_command_buffer_create( - test_device.base_device(), IREE_HAL_COMMAND_BUFFER_MODE_DEFAULT, + test_device.base_device(), + IREE_HAL_COMMAND_BUFFER_MODE_DEFAULT | + IREE_HAL_COMMAND_BUFFER_MODE_RETAIN_PROFILE_METADATA, IREE_HAL_COMMAND_CATEGORY_DISPATCH, IREE_HAL_QUEUE_AFFINITY_ANY, /*binding_capacity=*/1, command_buffer.out())); IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer)); @@ -1737,6 +1739,28 @@ EXPECT_EQ(dispatch_command->kernarg_strategy, IREE_HAL_AMDGPU_COMMAND_BUFFER_KERNARG_STRATEGY_PATCHED_TEMPLATE); EXPECT_EQ(dispatch_command->payload.patch_source_count, 1u); + const iree_hal_amdgpu_profile_metadata_registry_t& profile_metadata = + test_device.logical_device()->profile_metadata; + ASSERT_EQ(profile_metadata.command_operation_record_count, 2u); + const iree_hal_profile_command_operation_record_t* dispatch_operation = + nullptr; + for (iree_host_size_t i = 0; + i < profile_metadata.command_operation_record_count; ++i) { + const iree_hal_profile_command_operation_record_t& operation = + profile_metadata.command_operation_records[i]; + if (operation.type == IREE_HAL_PROFILE_COMMAND_OPERATION_TYPE_DISPATCH) { + dispatch_operation = &operation; + break; + } + } + ASSERT_NE(dispatch_operation, nullptr); + EXPECT_EQ(dispatch_operation->binding_count, 2u); + EXPECT_NE(dispatch_operation->flags & + IREE_HAL_PROFILE_COMMAND_OPERATION_FLAG_STATIC_BINDINGS, + 0u); + EXPECT_NE(dispatch_operation->flags & + IREE_HAL_PROFILE_COMMAND_OPERATION_FLAG_DYNAMIC_BINDINGS, + 0u); const uint32_t kernarg_length = (uint32_t)dispatch_command->kernarg_length_qwords * 8u; EXPECT_NE(