Adding iree_hal_queue_affinity_* utilities.
diff --git a/runtime/src/iree/hal/command_buffer.h b/runtime/src/iree/hal/command_buffer.h
index e6523f2..82fdd61 100644
--- a/runtime/src/iree/hal/command_buffer.h
+++ b/runtime/src/iree/hal/command_buffer.h
@@ -443,10 +443,15 @@
 
 // An RGBA color.
 typedef struct iree_hal_label_color_t {
-  uint8_t r;
-  uint8_t g;
-  uint8_t b;
-  uint8_t a;
+  union {
+    struct {
+      uint8_t r;
+      uint8_t g;
+      uint8_t b;
+      uint8_t a;
+    };
+    uint32_t value;
+  };
 } iree_hal_label_color_t;
 
 // A source location attached to debug labels.
@@ -457,7 +462,7 @@
 
 // An unspecified color; debugging tools are to choose their own.
 static inline iree_hal_label_color_t iree_hal_label_color_unspecified() {
-  iree_hal_label_color_t color = {0, 0, 0, 0};
+  iree_hal_label_color_t color = {{{0, 0, 0, 0}}};
   return color;
 }
 
diff --git a/runtime/src/iree/hal/queue.h b/runtime/src/iree/hal/queue.h
index 4e54e0b..b52627e 100644
--- a/runtime/src/iree/hal/queue.h
+++ b/runtime/src/iree/hal/queue.h
@@ -36,6 +36,60 @@
 #define IREE_HAL_QUEUE_AFFINITY_ANY ((iree_hal_queue_affinity_t)(-1))
 #define IREE_HAL_MAX_QUEUES (sizeof(iree_hal_queue_affinity_t) / 8)
 
+// Returns true if the |queue_affinity| is empty (none specified).
+#define iree_hal_queue_affinity_is_empty(queue_affinity) ((queue_affinity) == 0)
+
+// Returns true if the |queue_affinity| is indicating any/all queues.
+#define iree_hal_queue_affinity_is_any(queue_affinity) \
+  ((queue_affinity) == IREE_HAL_QUEUE_AFFINITY_ANY)
+
+// Returns the total number of queues specified in the |queue_affinity| mask.
+#define iree_hal_queue_affinity_count(queue_affinity) \
+  iree_math_count_ones_u64(queue_affinity)
+
+// Returns the index of the first set bit in |queue_affinity|.
+// Requires that at least one bit be set.
+#define iree_hal_queue_affinity_find_first_set(queue_affinity) \
+  iree_math_count_trailing_zeros_u64(queue_affinity)
+
+// Logically shifts the queue affinity to the right by the given amount.
+#define iree_hal_queue_affinity_shr(queue_affinity, amount) \
+  iree_shr((queue_affinity), (amount))
+
+// Updates |inout_affinity| to only include those bits set in |mask_affinity|.
+#define iree_hal_queue_affinity_and_into(inout_affinity, mask_affinity) \
+  (inout_affinity) = ((inout_affinity) & (mask_affinity))
+
+// Updates |inout_affinity| to include bits set in |mask_affinity|.
+#define iree_hal_queue_affinity_or_into(inout_affinity, mask_affinity) \
+  (inout_affinity) = ((inout_affinity) | (mask_affinity))
+
+// Loops over each queue in the given |queue_affinity| bitmap.
+//
+// The following variables are available within the loop:
+//     queue_count: total number of queues used
+//     queue_index: loop index (0 to queue_count)
+//   queue_ordinal: queue ordinal (0 to the total number of queues)
+//
+// Example:
+//  IREE_HAL_FOR_QUEUE_AFFINITY(my_queue_affinity) {
+//    compact_queue_list[queue_index];     // 0 to my_queue_affinity count
+//    full_queue_list[queue_ordinal];      // 0 to available queues
+//  }
+#define IREE_HAL_FOR_QUEUE_AFFINITY(queue_affinity)                           \
+  iree_hal_queue_affinity_t _queue_bits = (queue_affinity);                   \
+  for (int queue_index = 0, _queue_ordinal_base = 0,                          \
+           queue_count = iree_hal_queue_affinity_count(_queue_bits),          \
+           _bit_offset = 0,                                                   \
+           queue_ordinal =                                                    \
+               iree_hal_queue_affinity_find_first_set(_queue_bits);           \
+       queue_index < queue_count;                                             \
+       ++queue_index, _queue_ordinal_base += _bit_offset + 1,                 \
+           _queue_bits =                                                      \
+               iree_hal_queue_affinity_shr(_queue_bits, _bit_offset + 1),     \
+           _bit_offset = iree_hal_queue_affinity_find_first_set(_queue_bits), \
+           queue_ordinal = _queue_ordinal_base + _bit_offset)
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/runtime/src/iree/hal/semaphore.h b/runtime/src/iree/hal/semaphore.h
index 52571ed..5320666 100644
--- a/runtime/src/iree/hal/semaphore.h
+++ b/runtime/src/iree/hal/semaphore.h
@@ -102,7 +102,8 @@
 }
 
 // Frees an iree_status_t encoded in a semaphore |value|, if any.
-static inline void iree_hal_semaphore_failure_free(uint64_t value) {
+IREE_ATTRIBUTE_ALWAYS_INLINE static inline void iree_hal_semaphore_failure_free(
+    uint64_t value) {
   if (value & IREE_HAL_SEMAPHORE_FAILURE_VALUE_STATUS_BIT) {
     iree_status_free((iree_status_t)(((int64_t)value << 1) >> 1));
   }