[rocm] Print GPU information when dumping device info (#15230)

Example on 7900 XTX:

```
- gpu-compute-capability: 11.0
- gpu-arch-name: gfx1100

- launch-max-block-dims: (1024, 1024, 1024)

- block-max-thread-count: 1024
- block-max-32-bit-register-count: 65536
- block-max-shared-memory: 64 KB

- memory-is-integrated-memory: 0
- memory-supports-managed-memory: 1
- memory-total-const-memory-size: 2047 MB
- memory-total-global-memory-size: 24560 MB
- memory-l2-cache-size: 6291456 bytes

- gpu-compute-unit-count: 48
- gpu-compute-max-clock-rate: 2304 mHz
- gpu-memory-max-clock-rate: 1249 mHz
- gpu-warp-size: 32
```
diff --git a/experimental/rocm/dynamic_symbol_tables.h b/experimental/rocm/dynamic_symbol_tables.h
index 0f710d2..58a1c49 100644
--- a/experimental/rocm/dynamic_symbol_tables.h
+++ b/experimental/rocm/dynamic_symbol_tables.h
@@ -8,6 +8,7 @@
 RC_PFN_DECL(hipCtxDestroy, hipCtx_t)
 RC_PFN_DECL(hipDeviceGet, hipDevice_t *, int)  // No direct, need to modify
 RC_PFN_DECL(hipGetDeviceCount, int *)
+RC_PFN_DECL(hipGetDeviceProperties, hipDeviceProp_t *, int)
 RC_PFN_DECL(hipDeviceGetName, char *, int,
             hipDevice_t)  // No direct, need to modify
 RC_PFN_STR_DECL(
diff --git a/experimental/rocm/rocm_driver.c b/experimental/rocm/rocm_driver.c
index e284554..1273e9e 100644
--- a/experimental/rocm/rocm_driver.c
+++ b/experimental/rocm/rocm_driver.c
@@ -156,10 +156,61 @@
     iree_string_builder_t* builder) {
   iree_hal_rocm_driver_t* driver = iree_hal_rocm_driver_cast(base_driver);
   hipDevice_t device = (hipDevice_t)device_id;
-  if (!device) return iree_ok_status();
-  // TODO: dump detailed device info.
-  (void)driver;
-  (void)device;
+
+  hipDeviceProp_t prop;
+  ROCM_RETURN_IF_ERROR(&driver->syms, hipGetDeviceProperties(&prop, device),
+                       "hipGetDeviceProperties");
+
+  // GPU capabilities and architecture.
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- gpu-compute-capability: %d.%d", prop.major, prop.minor));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- gpu-arch-name: %s", prop.gcnArchName));
+
+  // Launch configuration limits.
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "\n"));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- launch-max-block-dims: (%d, %d, %d)", prop.maxThreadsDim[0],
+      prop.maxThreadsDim[1], prop.maxThreadsDim[2]));
+
+  int shared_memory_kb = prop.sharedMemPerBlock / 1024;
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "\n"));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- block-max-thread-count: %d", prop.maxThreadsPerBlock));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- block-max-32-bit-register-count: %d", prop.regsPerBlock));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- block-max-shared-memory: %d KB", shared_memory_kb));
+
+  // Memory hierarchy related information.
+  int const_memory_mb = prop.totalConstMem / 1024 / 1024;
+  int global_memory_mb = prop.totalGlobalMem / 1024 / 1024;
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "\n"));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- memory-is-integrated-memory: %d", prop.integrated));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- memory-supports-managed-memory: %d", prop.managedMemory));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- memory-total-const-memory-size: %d MB", const_memory_mb));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- memory-total-global-memory-size: %d MB", global_memory_mb));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- memory-l2-cache-size: %d bytes", prop.l2CacheSize));
+
+  // GPU related information.
+  int compute_clock_mhz = prop.clockRate / 1000;
+  int memory_clock_mhz = prop.memoryClockRate / 1000;
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "\n"));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- gpu-compute-unit-count: %d", prop.multiProcessorCount));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- gpu-compute-max-clock-rate: %d mHz", compute_clock_mhz));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- gpu-memory-max-clock-rate: %d mHz", memory_clock_mhz));
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+      builder, "\n- gpu-warp-size: %d", prop.warpSize));
+
+  IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "\n"));
   return iree_ok_status();
 }