Supporting CUDA importable buffers in compatibility check. (#12093)

Folds in the fix from https://github.com/iree-org/iree/pull/12066 in
addition to some nice textual error messages when the buffer
compatibility check fails during allocation or import.

This also fixes the aliasing of IREE_HAL_MEMORY_TYPE_HOST_LOCAL with
IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL that made it impossible to
differentiate the two based solely on HOST_VISIBLE/DEVICE_VISIBLE. I
don't recall why the bits aliased and can't find the reason. It's
possible that since the compiler was never producing the values with the
bit set there was other code
compensating for this as part of the change to use OPTIMAL bits and it
went unnoticed.

In order to avoid breaking the binary format the bit is now added but
existing vmfb files should continue to work as expected.
Whenever a full breaking change is introduced we can clean up the bit
ordering in the bitfield.

Fixes #12066.
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/allocator_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/allocator_ops.mlir
index 1bb2f40..0558a83 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/allocator_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/allocator_ops.mlir
@@ -4,7 +4,7 @@
 func.func @allocatorAllocate(%arg0 : !hal.allocator) -> !hal.buffer {
   // CHECK: %[[SIZE:.+]] = vm.const.i64 1024
   %c1024 = arith.constant 1024 : index
-  // CHECK: %ref = vm.call @hal.allocator.allocate(%arg0, %c6, %c3075, %[[SIZE]]) : (!vm.ref<!hal.allocator>, i32, i32, i64) -> !vm.ref<!hal.buffer>
+  // CHECK: %ref = vm.call @hal.allocator.allocate(%arg0, %c70, %c3075, %[[SIZE]]) : (!vm.ref<!hal.allocator>, i32, i32, i64) -> !vm.ref<!hal.buffer>
   %0 = hal.allocator.allocate<%arg0 : !hal.allocator> type("HostLocal") usage("DispatchStorage|Transfer") : !hal.buffer{%c1024}
   return %0 : !hal.buffer
 }
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALBase.td b/compiler/src/iree/compiler/Dialect/HAL/IR/HALBase.td
index b0c85f2..57c3b73 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALBase.td
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALBase.td
@@ -45,7 +45,7 @@
 def HAL_MemoryType_HostVisible      : I32BitEnumAttrCase<"HostVisible",      0x0002>;  // h
 def HAL_MemoryType_HostCoherent     : I32BitEnumAttrCase<"HostCoherent",     0x0004>;  // c
 def HAL_MemoryType_HostCached       : I32BitEnumAttrCase<"HostCached",       0x0008>;  // C
-def HAL_MemoryType_HostLocal        : I32BitEnumAttrCase<"HostLocal",        0x0006>;  // H
+def HAL_MemoryType_HostLocal        : I32BitEnumAttrCase<"HostLocal",        0x0046>;  // H
 def HAL_MemoryType_DeviceVisible    : I32BitEnumAttrCase<"DeviceVisible",    0x0010>;  // d
 def HAL_MemoryType_DeviceLocal      : I32BitEnumAttrCase<"DeviceLocal",      0x0030>;  // D
 def HAL_MemoryTypeBitfieldAttr :
diff --git a/runtime/src/iree/hal/allocator.c b/runtime/src/iree/hal/allocator.c
index b58da51..120605c 100644
--- a/runtime/src/iree/hal/allocator.c
+++ b/runtime/src/iree/hal/allocator.c
@@ -13,6 +13,38 @@
 #include "iree/hal/detail.h"
 #include "iree/hal/resource.h"
 
+//===----------------------------------------------------------------------===//
+// String utils
+//===----------------------------------------------------------------------===//
+
+static const iree_bitfield_string_mapping_t
+    iree_hal_buffer_compatibility_mappings[] = {
+        {IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE, IREE_SVL("ALLOCATABLE")},
+        {IREE_HAL_BUFFER_COMPATIBILITY_IMPORTABLE, IREE_SVL("IMPORTABLE")},
+        {IREE_HAL_BUFFER_COMPATIBILITY_EXPORTABLE, IREE_SVL("EXPORTABLE")},
+        {IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER,
+         IREE_SVL("QUEUE_TRANSFER")},
+        {IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_DISPATCH,
+         IREE_SVL("QUEUE_DISPATCH")},
+        {IREE_HAL_BUFFER_COMPATIBILITY_LOW_PERFORMANCE,
+         IREE_SVL("LOW_PERFORMANCE")},
+};
+
+IREE_API_EXPORT iree_status_t iree_hal_buffer_compatibility_parse(
+    iree_string_view_t value, iree_hal_buffer_compatibility_t* out_value) {
+  return iree_bitfield_parse(
+      value, IREE_ARRAYSIZE(iree_hal_buffer_compatibility_mappings),
+      iree_hal_buffer_compatibility_mappings, out_value);
+}
+
+IREE_API_EXPORT iree_string_view_t
+iree_hal_buffer_compatibility_format(iree_hal_buffer_compatibility_t value,
+                                     iree_bitfield_string_temp_t* out_temp) {
+  return iree_bitfield_format_inline(
+      value, IREE_ARRAYSIZE(iree_hal_buffer_compatibility_mappings),
+      iree_hal_buffer_compatibility_mappings, out_temp);
+}
+
 IREE_API_EXPORT iree_status_t iree_hal_allocator_statistics_format(
     const iree_hal_allocator_statistics_t* statistics,
     iree_string_builder_t* builder) {
@@ -42,6 +74,10 @@
   return iree_ok_status();
 }
 
+//===----------------------------------------------------------------------===//
+// iree_hal_allocator_t
+//===----------------------------------------------------------------------===//
+
 #define _VTABLE_DISPATCH(allocator, method_name) \
   IREE_HAL_VTABLE_DISPATCH(allocator, iree_hal_allocator, method_name)
 
diff --git a/runtime/src/iree/hal/allocator.h b/runtime/src/iree/hal/allocator.h
index 18b4f7b..95366b3 100644
--- a/runtime/src/iree/hal/allocator.h
+++ b/runtime/src/iree/hal/allocator.h
@@ -204,6 +204,17 @@
 };
 typedef uint32_t iree_hal_buffer_compatibility_t;
 
+// Parses a buffer compatibility bitfield from a string.
+// See iree_bitfield_parse for usage.
+IREE_API_EXPORT iree_status_t iree_hal_buffer_compatibility_parse(
+    iree_string_view_t value, iree_hal_buffer_compatibility_t* out_value);
+
+// Formats a buffer compatibility bitfield as a string.
+// See iree_bitfield_format for usage.
+IREE_API_EXPORT iree_string_view_t
+iree_hal_buffer_compatibility_format(iree_hal_buffer_compatibility_t value,
+                                     iree_bitfield_string_temp_t* out_temp);
+
 // Defines the type of an external buffer handle.
 // Each type may only be usable in a subset of implementations and platforms and
 // may even vary based on the runtime device properties or buffer instance.
diff --git a/runtime/src/iree/hal/buffer.h b/runtime/src/iree/hal/buffer.h
index abfd565..8009920 100644
--- a/runtime/src/iree/hal/buffer.h
+++ b/runtime/src/iree/hal/buffer.h
@@ -63,7 +63,7 @@
   // Memory is accessible as normal host allocated memory.
   IREE_HAL_MEMORY_TYPE_HOST_LOCAL = IREE_HAL_MEMORY_TYPE_HOST_VISIBLE |
                                     IREE_HAL_MEMORY_TYPE_HOST_COHERENT |
-                                    (1u << 5),
+                                    (1u << 6),
 
   // The allocator will choose the optimal memory type based on buffer usage,
   // preferring to place the allocation in host-local memory.
diff --git a/runtime/src/iree/hal/drivers/cuda/cuda_allocator.c b/runtime/src/iree/hal/drivers/cuda/cuda_allocator.c
index 41c40e8..4d22a01 100644
--- a/runtime/src/iree/hal/drivers/cuda/cuda_allocator.c
+++ b/runtime/src/iree/hal/drivers/cuda/cuda_allocator.c
@@ -210,12 +210,18 @@
   iree_hal_buffer_compatibility_t compatibility =
       IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE;
 
-  if (iree_any_bit_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER)) {
-    compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER;
+  // Buffers are importable in CUDA under most cases, though performance may
+  // vary wildly. We don't fully verify that the buffer parameters are
+  // self-consistent and just look at whether we can get a device pointer.
+  if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) {
+    compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_IMPORTABLE;
   }
 
   // Buffers can only be used on the queue if they are device visible.
   if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) {
+    if (iree_any_bit_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER)) {
+      compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER;
+    }
     if (iree_any_bit_set(params->usage,
                          IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE)) {
       compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_DISPATCH;
@@ -282,12 +288,30 @@
 
   // Coerce options into those required by the current device.
   iree_hal_buffer_params_t compat_params = *params;
-  if (!iree_all_bits_set(iree_hal_cuda_allocator_query_buffer_compatibility(
-                             base_allocator, &compat_params, &allocation_size),
+  iree_hal_buffer_compatibility_t compatibility =
+      iree_hal_cuda_allocator_query_buffer_compatibility(
+          base_allocator, &compat_params, &allocation_size);
+  if (!iree_all_bits_set(compatibility,
                          IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE)) {
+#if IREE_STATUS_MODE
+    iree_bitfield_string_temp_t temp0, temp1, temp2;
+    iree_string_view_t memory_type_str =
+        iree_hal_memory_type_format(params->type, &temp0);
+    iree_string_view_t usage_str =
+        iree_hal_buffer_usage_format(params->usage, &temp1);
+    iree_string_view_t compatibility_str =
+        iree_hal_buffer_compatibility_format(compatibility, &temp2);
+    return iree_make_status(
+        IREE_STATUS_INVALID_ARGUMENT,
+        "allocator cannot allocate a buffer with the given parameters; "
+        "memory_type=%.*s, usage=%.*s, compatibility=%.*s",
+        (int)memory_type_str.size, memory_type_str.data, (int)usage_str.size,
+        usage_str.data, (int)compatibility_str.size, compatibility_str.data);
+#else
     return iree_make_status(
         IREE_STATUS_INVALID_ARGUMENT,
         "allocator cannot allocate a buffer with the given parameters");
+#endif  // IREE_STATUS_MODE
   }
 
   iree_status_t status = iree_ok_status();
@@ -437,12 +461,30 @@
   // Coerce options into those required by the current device.
   iree_hal_buffer_params_t compat_params = *params;
   iree_device_size_t allocation_size = external_buffer->size;
-  if (!iree_all_bits_set(iree_hal_cuda_allocator_query_buffer_compatibility(
-                             base_allocator, &compat_params, &allocation_size),
+  iree_hal_buffer_compatibility_t compatibility =
+      iree_hal_cuda_allocator_query_buffer_compatibility(
+          base_allocator, &compat_params, &allocation_size);
+  if (!iree_all_bits_set(compatibility,
                          IREE_HAL_BUFFER_COMPATIBILITY_IMPORTABLE)) {
+#if IREE_STATUS_MODE
+    iree_bitfield_string_temp_t temp0, temp1, temp2;
+    iree_string_view_t memory_type_str =
+        iree_hal_memory_type_format(params->type, &temp0);
+    iree_string_view_t usage_str =
+        iree_hal_buffer_usage_format(params->usage, &temp1);
+    iree_string_view_t compatibility_str =
+        iree_hal_buffer_compatibility_format(compatibility, &temp2);
+    return iree_make_status(
+        IREE_STATUS_INVALID_ARGUMENT,
+        "allocator cannot import a buffer with the given parameters; "
+        "memory_type=%.*s, usage=%.*s, compatibility=%.*s",
+        (int)memory_type_str.size, memory_type_str.data, (int)usage_str.size,
+        usage_str.data, (int)compatibility_str.size, compatibility_str.data);
+#else
     return iree_make_status(
         IREE_STATUS_INVALID_ARGUMENT,
         "allocator cannot import a buffer with the given parameters");
+#endif  // IREE_STATUS_MODE
   }
 
   iree_status_t status = iree_ok_status();