Refactor vulkan device creation to allow hidden devices and make lavapipe hidden. (#9621)

This updates enumeration and selection by index to respect whether a device is hidden, based on some characteristics. Since we've had continuous problems with lavapipe as a compute device (and since, when accidentally using since it is often installed by default, it spews stderr warnings about only being for testing), I opted to make this the first heuristic for hiding a device.

Broken out of #9330.
diff --git a/runtime/src/iree/hal/drivers/vulkan/vulkan_driver.cc b/runtime/src/iree/hal/drivers/vulkan/vulkan_driver.cc
index 14b4dfd..6730394 100644
--- a/runtime/src/iree/hal/drivers/vulkan/vulkan_driver.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/vulkan_driver.cc
@@ -347,16 +347,14 @@
          strlen(physical_device_properties.deviceName);
 }
 
-// Populates device information from the given Vulkan physical device handle.
-// |out_device_info| must point to valid memory and additional data will be
-// appended to |buffer_ptr| and the new pointer is returned.
-static uint8_t* iree_hal_vulkan_populate_device_info(
-    uint32_t physical_device_index, VkPhysicalDevice physical_device,
-    DynamicSymbols* syms, uint8_t* buffer_ptr,
-    iree_hal_device_info_t* out_device_info) {
-  memset(out_device_info, 0, sizeof(*out_device_info));
-  out_device_info->device_id = (iree_hal_device_id_t)physical_device;
-
+// Checks whether a physical device should be considered visible. Devices
+// are considered invisible if they do not satisfy various checks for minimal
+// compliance with this implementation.
+static bool iree_hal_vulkan_is_device_visible(
+    VkPhysicalDevice physical_device,
+    VkPhysicalDeviceFeatures* physical_device_features,
+    VkPhysicalDeviceProperties* physical_device_properties) {
+  // TODO(benvanik): check and optionally require reasonable limits.
   // TODO(benvanik): check and optionally require these features:
   // VkPhysicalDeviceFeatures physical_device_features;
   // syms->vkGetPhysicalDeviceFeatures(physical_device,
@@ -366,6 +364,32 @@
   // - physical_device_features.shaderInt64
   // - physical_device_features.shaderFloat64
 
+  // Deny some devices by name match.
+  if (strstr(physical_device_properties->deviceName, "llvmpipe") ==
+      physical_device_properties->deviceName) {
+    // When creating this device it spews to stderr "for testing use only"
+    // and seems quite unstable in practice (often failing to even succeed
+    // through our initiation sequence). Since it installs by default on
+    // many Linux systems, we just hide it.
+    // These device names report like:
+    //   llvmpipe (LLVM 13.0.1, 256 bits)
+    return false;
+  }
+
+  return true;
+}
+
+// Populates device information from the given Vulkan physical device handle.
+// |out_device_info| must point to valid memory and additional data will be
+// appended to |buffer_ptr| and the new pointer is returned.
+// If the device is not visible, then no modifications are made and nullptr is
+// returned.
+static uint8_t* iree_hal_vulkan_populate_device_info_if_visible(
+    VkPhysicalDevice physical_device, DynamicSymbols* syms, uint8_t* buffer_ptr,
+    iree_hal_device_info_t* out_device_info) {
+  // Early exit if device is not visible.
+  VkPhysicalDeviceFeatures physical_device_features;
+  syms->vkGetPhysicalDeviceFeatures(physical_device, &physical_device_features);
   VkPhysicalDeviceIDProperties device_id_props = {};
   device_id_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
   device_id_props.pNext = NULL;
@@ -373,6 +397,15 @@
   device_props2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
   device_props2.pNext = &device_id_props;
   syms->vkGetPhysicalDeviceProperties2(physical_device, &device_props2);
+  if (!iree_hal_vulkan_is_device_visible(physical_device,
+                                         &physical_device_features,
+                                         &device_props2.properties)) {
+    return nullptr;
+  }
+
+  // Device is visible: Populate.
+  memset(out_device_info, 0, sizeof(*out_device_info));
+  out_device_info->device_id = (iree_hal_device_id_t)physical_device;
 
   // Use the deviceUUID - which is _mostly_ persistent - as the primary path.
   const uint8_t* device_uuid = device_id_props.deviceUUID;
@@ -415,6 +448,8 @@
       &physical_device_count, &physical_devices));
 
   // Allocate the return infos and populate with the devices.
+  // We allocate space for all of them, even though we may filter some out
+  // in the following step.
   iree_hal_device_info_t* device_infos = NULL;
   iree_host_size_t total_size =
       physical_device_count * sizeof(iree_hal_device_info_t);
@@ -428,12 +463,18 @@
     uint8_t* buffer_ptr =
         (uint8_t*)device_infos +
         physical_device_count * sizeof(iree_hal_device_info_t);
+    uint32_t visible_device_count = 0;
     for (uint32_t i = 0; i < physical_device_count; ++i) {
-      buffer_ptr = iree_hal_vulkan_populate_device_info(
-          i, physical_devices[i], driver->syms.get(), buffer_ptr,
-          &device_infos[i]);
+      uint8_t* new_buffer_ptr = iree_hal_vulkan_populate_device_info_if_visible(
+          physical_devices[i], driver->syms.get(), buffer_ptr,
+          &device_infos[visible_device_count]);
+      if (new_buffer_ptr) {
+        // Device is visible.
+        visible_device_count += 1;
+        buffer_ptr = new_buffer_ptr;
+      }
     }
-    *out_device_info_count = physical_device_count;
+    *out_device_info_count = visible_device_count;
     *out_device_infos = device_infos;
   }
 
@@ -450,26 +491,68 @@
   return iree_ok_status();
 }
 
-static iree_status_t iree_hal_vulkan_driver_select_default_device(
-    iree::hal::vulkan::DynamicSymbols* instance_syms, VkInstance instance,
-    int default_device_index, iree_allocator_t host_allocator,
-    VkPhysicalDevice* out_physical_device) {
+static iree_status_t iree_hal_vulkan_driver_find_device_by_index(
+    iree_hal_driver_t* base_driver, uint32_t device_index,
+    iree_allocator_t host_allocator, VkPhysicalDevice* found_physical_device) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+  IREE_TRACE_ZONE_APPEND_VALUE(z0, (uint64_t)device_index);
+
+  iree_hal_vulkan_driver_t* driver = iree_hal_vulkan_driver_cast(base_driver);
+
+  // Query all devices from the Vulkan instance.
   uint32_t physical_device_count = 0;
   VkPhysicalDevice* physical_devices = NULL;
-  IREE_RETURN_IF_ERROR(iree_hal_vulkan_driver_enumerate_physical_devices(
-      instance_syms, instance, host_allocator, &physical_device_count,
-      &physical_devices));
-  iree_status_t status = iree_ok_status();
-  if (physical_device_count == 0 ||
-      default_device_index >= physical_device_count) {
-    status = iree_make_status(IREE_STATUS_NOT_FOUND,
-                              "default device %d not found (of %d enumerated)",
-                              default_device_index, physical_device_count);
-  } else {
-    *out_physical_device = physical_devices[default_device_index];
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(
+      z0, iree_hal_vulkan_driver_enumerate_physical_devices(
+              driver->syms.get(), driver->instance, host_allocator,
+              &physical_device_count, &physical_devices));
+
+  // Loop through devices to find the |device_index|'d visible device.
+  VkPhysicalDevice physical_device = VK_NULL_HANDLE;
+  bool found = false;
+  uint32_t probe_device_index = device_index;
+  uint32_t visible_physical_devices = 0;
+  if (device_index >= 0) {
+    for (uint32_t i = 0; i < physical_device_count; ++i) {
+      physical_device = physical_devices[i];
+      VkPhysicalDeviceFeatures physical_device_features;
+      driver->syms.get()->vkGetPhysicalDeviceFeatures(
+          physical_device, &physical_device_features);
+      VkPhysicalDeviceProperties physical_device_properties;
+      driver->syms.get()->vkGetPhysicalDeviceProperties(
+          physical_device, &physical_device_properties);
+
+      if (!iree_hal_vulkan_is_device_visible(physical_device,
+                                             &physical_device_features,
+                                             &physical_device_properties)) {
+        continue;
+      }
+
+      // Break or advance.
+      if (probe_device_index == 0) {
+        found = true;
+        break;
+      }
+      probe_device_index -= 1;
+      visible_physical_devices += 1;
+    }
   }
+
   iree_allocator_free(host_allocator, physical_devices);
-  return status;
+
+  if (!found) {
+    IREE_TRACE_ZONE_END(z0);
+    return iree_make_status(IREE_STATUS_NOT_FOUND,
+                            "physical device %u invalid; %u physical devices "
+                            "available; %u visible",
+                            device_index, physical_device_count,
+                            visible_physical_devices);
+  }
+
+  *found_physical_device = physical_device;
+
+  IREE_TRACE_ZONE_END(z0);
+  return iree_ok_status();
 }
 
 static iree_status_t iree_hal_vulkan_driver_create_device_by_id(
@@ -484,10 +567,9 @@
   VkPhysicalDevice physical_device = (VkPhysicalDevice)device_id;
   if (physical_device == VK_NULL_HANDLE) {
     IREE_RETURN_AND_END_ZONE_IF_ERROR(
-        z0,
-        iree_hal_vulkan_driver_select_default_device(
-            driver->syms.get(), driver->instance, driver->default_device_index,
-            host_allocator, &physical_device));
+        z0, iree_hal_vulkan_driver_find_device_by_index(
+                base_driver, driver->default_device_index, host_allocator,
+                &physical_device));
   }
 
   // TODO(benvanik): remove HAL module dependence on the identifier for matching
@@ -514,47 +596,6 @@
   return status;
 }
 
-static iree_status_t iree_hal_vulkan_driver_create_device_by_index(
-    iree_hal_driver_t* base_driver, iree_string_view_t driver_name,
-    uint32_t device_index, iree_host_size_t param_count,
-    const iree_string_pair_t* params, iree_allocator_t host_allocator,
-    iree_hal_device_t** out_device) {
-  iree_hal_vulkan_driver_t* driver = iree_hal_vulkan_driver_cast(base_driver);
-  IREE_TRACE_ZONE_BEGIN(z0);
-  IREE_TRACE_ZONE_APPEND_VALUE(z0, (uint64_t)device_index);
-
-  // Query all devices from the Vulkan instance.
-  uint32_t physical_device_count = 0;
-  VkPhysicalDevice* physical_devices = NULL;
-  IREE_RETURN_AND_END_ZONE_IF_ERROR(
-      z0, iree_hal_vulkan_driver_enumerate_physical_devices(
-              driver->syms.get(), driver->instance, host_allocator,
-              &physical_device_count, &physical_devices));
-
-  // Get the device at the requested index.
-  VkPhysicalDevice physical_device = VK_NULL_HANDLE;
-  if (device_index >= 0 && device_index < physical_device_count) {
-    physical_device = physical_devices[device_index];
-  }
-
-  iree_allocator_free(host_allocator, physical_devices);
-
-  if (physical_device == VK_NULL_HANDLE) {
-    IREE_TRACE_ZONE_END(z0);
-    return iree_make_status(IREE_STATUS_NOT_FOUND,
-                            "physical device %u invalid; %u physical devices "
-                            "available",
-                            device_index, physical_device_count);
-  }
-
-  iree_status_t status = iree_hal_vulkan_driver_create_device_by_id(
-      base_driver, (iree_hal_device_id_t)physical_device, param_count, params,
-      host_allocator, out_device);
-
-  IREE_TRACE_ZONE_END(z0);
-  return status;
-}
-
 static iree_status_t iree_hal_vulkan_driver_create_device_by_uuid(
     iree_hal_driver_t* base_driver, iree_string_view_t driver_name,
     const uint8_t* device_uuid, iree_host_size_t param_count,
@@ -640,8 +681,11 @@
   // Fallback and try to parse as a device index.
   uint32_t device_index = 0;
   if (iree_string_view_atoi_uint32(device_path, &device_index)) {
-    return iree_hal_vulkan_driver_create_device_by_index(
-        base_driver, driver_name, device_index, param_count, params,
+    VkPhysicalDevice physical_device;
+    IREE_RETURN_IF_ERROR(iree_hal_vulkan_driver_find_device_by_index(
+        base_driver, device_index, host_allocator, &physical_device));
+    return iree_hal_vulkan_driver_create_device_by_id(
+        base_driver, (iree_hal_device_id_t)physical_device, param_count, params,
         host_allocator, out_device);
   }