Modernizing iree_atomic_*. (#18910)

C11's _Generic lets us avoid the need for specifying the type in the
name and more closely match the C11 atomic syntax. This assumes that any
C compiler we have that goes down the disabled atomics path supports
_Generic (modern GCC, Clang, and MSVC all have for awhile).

This allows us to drop-in replace C11-style atomics (useful in the new
AMDGPU backend) and on MSVC will allow us to use their implementation
when it's ready (it's way better than the Interlocked solution we have
now).
diff --git a/experimental/webgpu/nop_semaphore.c b/experimental/webgpu/nop_semaphore.c
index d4151ee..65d2648 100644
--- a/experimental/webgpu/nop_semaphore.c
+++ b/experimental/webgpu/nop_semaphore.c
@@ -38,8 +38,8 @@
     iree_hal_resource_initialize(&iree_hal_webgpu_nop_semaphore_vtable,
                                  &semaphore->resource);
     semaphore->host_allocator = host_allocator;
-    iree_atomic_store_int64(&semaphore->value, initial_value,
-                            iree_memory_order_seq_cst);
+    iree_atomic_store(&semaphore->value, initial_value,
+                      iree_memory_order_seq_cst);
     *out_semaphore = (iree_hal_semaphore_t*)semaphore;
   }
 
@@ -63,8 +63,7 @@
     iree_hal_semaphore_t* base_semaphore, uint64_t* out_value) {
   iree_hal_webgpu_nop_semaphore_t* semaphore =
       iree_hal_webgpu_nop_semaphore_cast(base_semaphore);
-  *out_value =
-      iree_atomic_load_int64(&semaphore->value, iree_memory_order_seq_cst);
+  *out_value = iree_atomic_load(&semaphore->value, iree_memory_order_seq_cst);
   return iree_ok_status();
 }
 
@@ -72,8 +71,7 @@
     iree_hal_semaphore_t* base_semaphore, uint64_t new_value) {
   iree_hal_webgpu_nop_semaphore_t* semaphore =
       iree_hal_webgpu_nop_semaphore_cast(base_semaphore);
-  iree_atomic_store_int64(&semaphore->value, new_value,
-                          iree_memory_order_seq_cst);
+  iree_atomic_store(&semaphore->value, new_value, iree_memory_order_seq_cst);
   return iree_ok_status();
 }
 
@@ -88,7 +86,7 @@
   iree_hal_webgpu_nop_semaphore_t* semaphore =
       iree_hal_webgpu_nop_semaphore_cast(base_semaphore);
   uint64_t current_value =
-      iree_atomic_load_int64(&semaphore->value, iree_memory_order_seq_cst);
+      iree_atomic_load(&semaphore->value, iree_memory_order_seq_cst);
   if (current_value < value) {
     return iree_make_status(
         IREE_STATUS_FAILED_PRECONDITION,
diff --git a/runtime/src/iree/base/internal/atomics.h b/runtime/src/iree/base/internal/atomics.h
index 731d9ee..f428731 100644
--- a/runtime/src/iree/base/internal/atomics.h
+++ b/runtime/src/iree/base/internal/atomics.h
@@ -86,47 +86,6 @@
 
 #endif  // IREE_COMPILER_*
 
-// If the compiler can automatically determine the types:
-#ifdef iree_atomic_load_auto
-
-#define iree_atomic_load_int32 iree_atomic_load_auto
-#define iree_atomic_store_int32 iree_atomic_store_auto
-#define iree_atomic_fetch_add_int32 iree_atomic_fetch_add_auto
-#define iree_atomic_fetch_sub_int32 iree_atomic_fetch_sub_auto
-#define iree_atomic_fetch_and_int32 iree_atomic_fetch_and_auto
-#define iree_atomic_fetch_or_int32 iree_atomic_fetch_or_auto
-#define iree_atomic_fetch_xor_int32 iree_atomic_fetch_xor_auto
-#define iree_atomic_exchange_int32 iree_atomic_exchange_auto
-#define iree_atomic_compare_exchange_strong_int32 \
-  iree_atomic_compare_exchange_strong_auto
-#define iree_atomic_compare_exchange_weak_int32 \
-  iree_atomic_compare_exchange_weak_auto
-
-#define iree_atomic_load_int64 iree_atomic_load_auto
-#define iree_atomic_store_int64 iree_atomic_store_auto
-#define iree_atomic_fetch_add_int64 iree_atomic_fetch_add_auto
-#define iree_atomic_fetch_sub_int64 iree_atomic_fetch_sub_auto
-#define iree_atomic_fetch_and_int64 iree_atomic_fetch_and_auto
-#define iree_atomic_fetch_or_int64 iree_atomic_fetch_or_auto
-#define iree_atomic_fetch_xor_int64 iree_atomic_fetch_xor_auto
-#define iree_atomic_exchange_int64 iree_atomic_exchange_auto
-#define iree_atomic_compare_exchange_strong_int64 \
-  iree_atomic_compare_exchange_strong_auto
-#define iree_atomic_compare_exchange_weak_int64 \
-  iree_atomic_compare_exchange_weak_auto
-
-#define iree_atomic_load_intptr iree_atomic_load_auto
-#define iree_atomic_store_intptr iree_atomic_store_auto
-#define iree_atomic_fetch_add_intptr iree_atomic_fetch_add_auto
-#define iree_atomic_fetch_sub_intptr iree_atomic_fetch_sub_auto
-#define iree_atomic_exchange_intptr iree_atomic_exchange_auto
-#define iree_atomic_compare_exchange_strong_intptr \
-  iree_atomic_compare_exchange_strong_auto
-#define iree_atomic_compare_exchange_weak_intptr \
-  iree_atomic_compare_exchange_weak_auto
-
-#endif  // iree_atomic_load_auto
-
 //==============================================================================
 // Reference count atomics
 //==============================================================================
@@ -140,10 +99,10 @@
 // should use IREE_ATOMIC_VAR_INIT, but apparently this has to be fixed
 // at call sites (where the variables are initialized in the first place).
 #define iree_atomic_ref_count_init_value(count_ptr, value) \
-  iree_atomic_store_int32(count_ptr, value, iree_memory_order_relaxed)
+  iree_atomic_store((count_ptr), (value), iree_memory_order_relaxed)
 
 #define iree_atomic_ref_count_init(count_ptr) \
-  iree_atomic_ref_count_init_value(count_ptr, 1)
+  iree_atomic_ref_count_init_value((count_ptr), 1)
 
 // Why relaxed order:
 // https://www.boost.org/doc/libs/1_57_0/doc/html/atomic/usage_examples.html#boost_atomic.usage_examples.example_reference_counters.discussion
@@ -155,9 +114,9 @@
 // value (unlike iree_atomic_ref_count_dec), so we make sure that it does not,
 // which allows the implementation to use faster atomic instructions where
 // available, e.g. STADD on ARMv8.1-a.
-#define iree_atomic_ref_count_inc(count_ptr)                              \
-  do {                                                                    \
-    iree_atomic_fetch_add_int32(count_ptr, 1, iree_memory_order_relaxed); \
+#define iree_atomic_ref_count_inc(count_ptr)                          \
+  do {                                                                \
+    iree_atomic_fetch_add((count_ptr), 1, iree_memory_order_relaxed); \
   } while (false)
 
 // For now we stick to acq_rel order. TODO: should we follow Boost's advice?
@@ -169,13 +128,13 @@
 // may be a pessimization... I would like to hear a second opinion on this,
 // particularly regarding how x86-centric this might be.
 #define iree_atomic_ref_count_dec(count_ptr) \
-  iree_atomic_fetch_sub_int32(count_ptr, 1, iree_memory_order_acq_rel)
+  iree_atomic_fetch_sub((count_ptr), 1, iree_memory_order_acq_rel)
 
 // memory_order_acquire order ensures that this sees decrements from
 // iree_atomic_ref_count_dec. On the other hand, there is no ordering with
 // iree_atomic_ref_count_inc.
 #define iree_atomic_ref_count_load(count_ptr) \
-  iree_atomic_load_int32(count_ptr, iree_memory_order_acquire)
+  iree_atomic_load((count_ptr), iree_memory_order_acquire)
 
 // Aborts the program if the given reference count value is not 1.
 // This should be avoided in all situations but those where continuing execution
diff --git a/runtime/src/iree/base/internal/atomics_clang.h b/runtime/src/iree/base/internal/atomics_clang.h
index 44514e0..afa7a33 100644
--- a/runtime/src/iree/base/internal/atomics_clang.h
+++ b/runtime/src/iree/base/internal/atomics_clang.h
@@ -33,37 +33,38 @@
 
 typedef _Atomic int32_t iree_atomic_int32_t;
 typedef _Atomic int64_t iree_atomic_int64_t;
+typedef _Atomic uint32_t iree_atomic_uint32_t;
+typedef _Atomic uint64_t iree_atomic_uint64_t;
 // TODO(#3453): check for __int128 support before using
 // typedef _Atomic __int128 iree_atomic_int128_t;
 typedef _Atomic intptr_t iree_atomic_intptr_t;
 
-#define iree_atomic_load_auto(object, order) \
-  __c11_atomic_load((object), (order))
-#define iree_atomic_store_auto(object, desired, order) \
-  __c11_atomic_store((object), (desired), (order))
-#define iree_atomic_fetch_add_auto(object, operand, order) \
-  __c11_atomic_fetch_add((object), (operand), (order))
-#define iree_atomic_fetch_sub_auto(object, operand, order) \
-  __c11_atomic_fetch_sub((object), (operand), (order))
-#define iree_atomic_fetch_and_auto(object, operand, order) \
-  __c11_atomic_fetch_and((object), (operand), (order))
-#define iree_atomic_fetch_or_auto(object, operand, order) \
-  __c11_atomic_fetch_or((object), (operand), (order))
-#define iree_atomic_fetch_xor_auto(object, operand, order) \
-  __c11_atomic_fetch_xor((object), (operand), (order))
-#define iree_atomic_exchange_auto(object, operand, order) \
-  __c11_atomic_exchange((object), (operand), (order))
-#define iree_atomic_compare_exchange_strong_auto(object, expected, desired, \
-                                                 order_succ, order_fail)    \
-  __c11_atomic_compare_exchange_strong((object), (expected), (desired),     \
-                                       (order_succ), (order_fail))
-#define iree_atomic_compare_exchange_weak_auto(object, expected, desired, \
-                                               order_succ, order_fail)    \
-  __c11_atomic_compare_exchange_weak((object), (expected), (desired),     \
-                                     (order_succ), (order_fail))
-
 #define iree_atomic_thread_fence(order) __c11_atomic_thread_fence(order)
 
+#define iree_atomic_load(object, order) __c11_atomic_load((object), (order))
+#define iree_atomic_store(object, desired, order) \
+  __c11_atomic_store((object), (desired), (order))
+#define iree_atomic_fetch_add(object, operand, order) \
+  __c11_atomic_fetch_add((object), (operand), (order))
+#define iree_atomic_fetch_sub(object, operand, order) \
+  __c11_atomic_fetch_sub((object), (operand), (order))
+#define iree_atomic_fetch_and(object, operand, order) \
+  __c11_atomic_fetch_and((object), (operand), (order))
+#define iree_atomic_fetch_or(object, operand, order) \
+  __c11_atomic_fetch_or((object), (operand), (order))
+#define iree_atomic_fetch_xor(object, operand, order) \
+  __c11_atomic_fetch_xor((object), (operand), (order))
+#define iree_atomic_exchange(object, operand, order) \
+  __c11_atomic_exchange((object), (operand), (order))
+#define iree_atomic_compare_exchange_strong(object, expected, desired,  \
+                                            order_succ, order_fail)     \
+  __c11_atomic_compare_exchange_strong((object), (expected), (desired), \
+                                       (order_succ), (order_fail))
+#define iree_atomic_compare_exchange_weak(object, expected, desired,  \
+                                          order_succ, order_fail)     \
+  __c11_atomic_compare_exchange_weak((object), (expected), (desired), \
+                                     (order_succ), (order_fail))
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/runtime/src/iree/base/internal/atomics_disabled.h b/runtime/src/iree/base/internal/atomics_disabled.h
index 5c0a7ca..5dbb272 100644
--- a/runtime/src/iree/base/internal/atomics_disabled.h
+++ b/runtime/src/iree/base/internal/atomics_disabled.h
@@ -16,12 +16,8 @@
 
 #if IREE_SYNCHRONIZATION_DISABLE_UNSAFE
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 typedef enum iree_memory_order_e {
-  iree_memory_order_relaxed,
+  iree_memory_order_relaxed = 0u,
   iree_memory_order_consume,
   iree_memory_order_acquire,
   iree_memory_order_release,
@@ -33,65 +29,197 @@
 
 typedef int32_t iree_atomic_int32_t;
 typedef int64_t iree_atomic_int64_t;
+typedef uint32_t iree_atomic_uint32_t;
+typedef uint64_t iree_atomic_uint64_t;
 // TODO(#3453): check for __int128 support before using
 // typedef __int128 iree_atomic_int128_t;
 typedef intptr_t iree_atomic_intptr_t;
 
-#define iree_atomic_load_int32(object, order) (*(object))
-#define iree_atomic_store_int32(object, desired, order) (*(object) = (desired))
-#define iree_atomic_fetch_add_int32(object, operand, order)                 \
-  iree_atomic_fetch_add_int32_impl((volatile iree_atomic_int32_t*)(object), \
-                                   (int32_t)(operand))
-#define iree_atomic_fetch_sub_int32(object, operand, order)                 \
-  iree_atomic_fetch_add_int32_impl((volatile iree_atomic_int32_t*)(object), \
-                                   -(int32_t)(operand))
-#define iree_atomic_fetch_and_int32(object, operand, order)                 \
-  iree_atomic_fetch_and_int32_impl((volatile iree_atomic_int32_t*)(object), \
-                                   (int32_t)(operand))
-#define iree_atomic_fetch_or_int32(object, operand, order)                 \
-  iree_atomic_fetch_or_int32_impl((volatile iree_atomic_int32_t*)(object), \
-                                  (int32_t)(operand))
-#define iree_atomic_fetch_xor_int32(object, operand, order)                 \
-  iree_atomic_fetch_xor_int32_impl((volatile iree_atomic_int32_t*)(object), \
-                                   (int32_t)(operand))
-#define iree_atomic_exchange_int32(object, desired, order) \
-  iree_atomic_fetch_exchange_int32_impl(                   \
-      (volatile iree_atomic_int32_t*)(object), (int32_t)(desired))
-#define iree_atomic_compare_exchange_strong_int32(object, expected, desired, \
-                                                  order_succ, order_fail)    \
-  iree_atomic_compare_exchange_int32_impl(                                   \
-      (volatile iree_atomic_int32_t*)(object), (int32_t*)(expected),         \
-      (int32_t)(desired))
-#define iree_atomic_compare_exchange_weak_int32 \
-  iree_atomic_compare_exchange_strong_int32
+#define iree_atomic_thread_fence(order)
 
-#define iree_atomic_load_int64(object, order) (*(object))
-#define iree_atomic_store_int64(object, desired, order) (*(object) = (desired))
-#define iree_atomic_fetch_add_int64(object, operand, order)                 \
-  iree_atomic_fetch_add_int64_impl((volatile iree_atomic_int64_t*)(object), \
-                                   (int64_t)(operand))
-#define iree_atomic_fetch_sub_int64(object, operand, order)                 \
-  iree_atomic_fetch_add_int64_impl((volatile iree_atomic_int64_t*)(object), \
-                                   -(int64_t)(operand))
-#define iree_atomic_fetch_and_int64(object, operand, order)                 \
-  iree_atomic_fetch_and_int64_impl((volatile iree_atomic_int64_t*)(object), \
-                                   (int64_t)(operand))
-#define iree_atomic_fetch_or_int64(object, operand, order)                 \
-  iree_atomic_fetch_or_int64_impl((volatile iree_atomic_int64_t*)(object), \
-                                  (int64_t)(operand))
-#define iree_atomic_fetch_xor_int64(object, operand, order)                 \
-  iree_atomic_fetch_xor_int64_impl((volatile iree_atomic_int64_t*)(object), \
-                                   (int64_t)(operand))
-#define iree_atomic_exchange_int64(object, desired, order) \
-  iree_atomic_fetch_exchange_int64_impl(                   \
-      (volatile iree_atomic_int64_t*)(object), (int64_t)(desired))
-#define iree_atomic_compare_exchange_strong_int64(object, expected, desired, \
-                                                  order_succ, order_fail)    \
-  iree_atomic_compare_exchange_int64_impl(                                   \
-      (volatile iree_atomic_int64_t*)(object), (int64_t*)(expected),         \
-      (int64_t)(desired))
-#define iree_atomic_compare_exchange_weak_int64 \
-  iree_atomic_compare_exchange_strong_int64
+#ifdef __cplusplus
+
+extern "C++" {
+
+#define iree_atomic_load(object, order) (*(object))
+#define iree_atomic_store(object, desired, order) (*(object) = (desired))
+#define iree_atomic_fetch_add(object, operand, order) \
+  iree_atomic_fetch_add_impl((object), (operand))
+#define iree_atomic_fetch_sub(object, operand, order) \
+  iree_atomic_fetch_sub_impl((object), (operand))
+#define iree_atomic_fetch_and(object, operand, order) \
+  iree_atomic_fetch_and_impl((object), (operand))
+#define iree_atomic_fetch_or(object, operand, order) \
+  iree_atomic_fetch_or_impl((object), (operand))
+#define iree_atomic_fetch_xor(object, operand, order) \
+  iree_atomic_fetch_xor_impl((object), (operand))
+#define iree_atomic_exchange(object, desired, order) \
+  iree_atomic_fetch_exchange_impl((object), (desired))
+#define iree_atomic_compare_exchange_strong(object, expected, desired, \
+                                            order_succ, order_fail)    \
+  iree_atomic_compare_exchange_impl((object), (expected), (desired))
+#define iree_atomic_compare_exchange_weak iree_atomic_compare_exchange_strong
+
+template <typename T, typename V>
+static inline T iree_atomic_fetch_add_impl(volatile T* object, V operand) {
+  T original = *object;
+  *object += operand;
+  return original;
+}
+
+template <typename T, typename V>
+static inline T iree_atomic_fetch_sub_impl(volatile T* object, V operand) {
+  T original = *object;
+  *object -= operand;
+  return original;
+}
+
+template <typename T, typename V>
+static inline T iree_atomic_fetch_and_impl(volatile T* object, V operand) {
+  T original = *object;
+  *object &= operand;
+  return original;
+}
+
+template <typename T, typename V>
+static inline T iree_atomic_fetch_or_impl(volatile T* object, V operand) {
+  T original = *object;
+  *object |= operand;
+  return original;
+}
+
+template <typename T, typename V>
+static inline T iree_atomic_fetch_xor_impl(volatile T* object, V operand) {
+  T original = *object;
+  *object ^= operand;
+  return original;
+}
+
+template <typename T, typename V>
+static inline T iree_atomic_fetch_exchange_impl(volatile T* object, V desired) {
+  T original = *object;
+  *object = desired;
+  return original;
+}
+
+template <typename T, typename V>
+static inline bool iree_atomic_compare_exchange_impl(volatile T* object,
+                                                     V* expected, V desired) {
+  if (*object == *expected) {
+    *object = desired;
+    return true;
+  } else {
+    *expected = *object;
+    return false;
+  }
+}
+
+}  // extern "C"
+
+#else
+
+#define iree_atomic_load(object, order) (*(object))
+#define iree_atomic_store(object, desired, order) (*(object) = (desired))
+#define iree_atomic_fetch_add(object, operand, order)                     \
+  _Generic((object),                                                      \
+      iree_atomic_int32_t *: iree_atomic_fetch_add_int32_impl(            \
+                               (volatile iree_atomic_int32_t*)(object),   \
+                               (int32_t)(operand)),                       \
+      iree_atomic_int64_t *: iree_atomic_fetch_add_int64_impl(            \
+                               (volatile iree_atomic_int64_t*)(object),   \
+                               (int64_t)(operand)),                       \
+      iree_atomic_uint32_t *: iree_atomic_fetch_add_uint32_impl(          \
+                                (volatile iree_atomic_uint32_t*)(object), \
+                                (uint32_t)(operand)),                     \
+      iree_atomic_uint64_t *: iree_atomic_fetch_add_uint64_impl(          \
+                                (volatile iree_atomic_uint64_t*)(object), \
+                                (uint64_t)(operand)))
+#define iree_atomic_fetch_sub(object, operand, order)                     \
+  _Generic((object),                                                      \
+      iree_atomic_int32_t *: iree_atomic_fetch_sub_int32_impl(            \
+                               (volatile iree_atomic_int32_t*)(object),   \
+                               (int32_t)(operand)),                       \
+      iree_atomic_int64_t *: iree_atomic_fetch_sub_int64_impl(            \
+                               (volatile iree_atomic_int64_t*)(object),   \
+                               (int64_t)(operand)),                       \
+      iree_atomic_uint32_t *: iree_atomic_fetch_sub_uint32_impl(          \
+                                (volatile iree_atomic_uint32_t*)(object), \
+                                (uint32_t)(operand)),                     \
+      iree_atomic_uint64_t *: iree_atomic_fetch_sub_uint64_impl(          \
+                                (volatile iree_atomic_uint64_t*)(object), \
+                                (uint64_t)(operand)))
+#define iree_atomic_fetch_and(object, operand, order)                    \
+  _Generic((object),                                                     \
+      iree_atomic_int32_t *: iree_atomic_fetch_and_int32_impl(           \
+                               (volatile iree_atomic_int32_t*)(object),  \
+                               (int32_t)(operand)),                      \
+      iree_atomic_int64_t *: iree_atomic_fetch_and_int64_impl(           \
+                               (volatile iree_atomic_int64_t*)(object),  \
+                               (int64_t)(operand)),                      \
+      iree_atomic_uint32_t *: iree_atomic_fetch_and_int32_impl(          \
+                                (volatile iree_atomic_int32_t*)(object), \
+                                (int32_t)(operand)),                     \
+      iree_atomic_uint64_t *: iree_atomic_fetch_and_int64_impl(          \
+                                (volatile iree_atomic_int64_t*)(object), \
+                                (int64_t)(operand)))
+#define iree_atomic_fetch_or(object, operand, order)                     \
+  _Generic((object),                                                     \
+      iree_atomic_int32_t *: iree_atomic_fetch_or_int32_impl(            \
+                               (volatile iree_atomic_int32_t*)(object),  \
+                               (int32_t)(operand)),                      \
+      iree_atomic_int64_t *: iree_atomic_fetch_or_int64_impl(            \
+                               (volatile iree_atomic_int64_t*)(object),  \
+                               (int64_t)(operand)),                      \
+      iree_atomic_uint32_t *: iree_atomic_fetch_or_int32_impl(           \
+                                (volatile iree_atomic_int32_t*)(object), \
+                                (int32_t)(operand)),                     \
+      iree_atomic_uint64_t *: iree_atomic_fetch_or_int64_impl(           \
+                                (volatile iree_atomic_int64_t*)(object), \
+                                (int64_t)(operand)))
+#define iree_atomic_fetch_xor(object, operand, order)                    \
+  _Generic((object),                                                     \
+      iree_atomic_int32_t *: iree_atomic_fetch_xor_int32_impl(           \
+                               (volatile iree_atomic_int32_t*)(object),  \
+                               (int32_t)(operand)),                      \
+      iree_atomic_int64_t *: iree_atomic_fetch_xor_int64_impl(           \
+                               (volatile iree_atomic_int64_t*)(object),  \
+                               (int64_t)(operand)),                      \
+      iree_atomic_uint32_t *: iree_atomic_fetch_xor_int32_impl(          \
+                                (volatile iree_atomic_int32_t*)(object), \
+                                (int32_t)(operand)),                     \
+      iree_atomic_uint64_t *: iree_atomic_fetch_xor_int64_impl(          \
+                                (volatile iree_atomic_int64_t*)(object), \
+                                (int64_t)(operand)))
+#define iree_atomic_exchange(object, desired, order)                     \
+  _Generic((object),                                                     \
+      iree_atomic_int32_t *: iree_atomic_fetch_exchange_int32_impl(      \
+                               (volatile iree_atomic_int32_t*)(object),  \
+                               (int32_t)(desired)),                      \
+      iree_atomic_int64_t *: iree_atomic_fetch_exchange_int64_impl(      \
+                               (volatile iree_atomic_int64_t*)(object),  \
+                               (int64_t)(desired)),                      \
+      iree_atomic_uint32_t *: iree_atomic_fetch_exchange_int32_impl(     \
+                                (volatile iree_atomic_int32_t*)(object), \
+                                (int32_t)(desired)),                     \
+      iree_atomic_uint64_t *: iree_atomic_fetch_exchange_int64_impl(     \
+                                (volatile iree_atomic_int64_t*)(object), \
+                                (int64_t)(desired)))
+#define iree_atomic_compare_exchange_strong(object, expected, desired,     \
+                                            order_succ, order_fail)        \
+  _Generic((object),                                                       \
+      iree_atomic_int32_t *: iree_atomic_compare_exchange_int32_impl(      \
+                               (volatile iree_atomic_int32_t*)(object),    \
+                               (int32_t*)(expected), (int32_t)(desired)),  \
+      iree_atomic_int64_t *: iree_atomic_compare_exchange_int64_impl(      \
+                               (volatile iree_atomic_int64_t*)(object),    \
+                               (int64_t*)(expected), (int64_t)(desired)),  \
+      iree_atomic_uint32_t *: iree_atomic_compare_exchange_int32_impl(     \
+                                (volatile iree_atomic_int32_t*)(object),   \
+                                (int32_t*)(expected), (int32_t)(desired)), \
+      iree_atomic_uint64_t *: iree_atomic_compare_exchange_int64_impl(     \
+                                (volatile iree_atomic_int64_t*)(object),   \
+                                (int64_t*)(expected), (int64_t)(desired)))
+#define iree_atomic_compare_exchange_weak iree_atomic_compare_exchange_strong
 
 static inline int32_t iree_atomic_fetch_add_int32_impl(
     volatile iree_atomic_int32_t* object, int32_t operand) {
@@ -100,6 +228,27 @@
   return original;
 }
 
+static inline int32_t iree_atomic_fetch_sub_int32_impl(
+    volatile iree_atomic_int32_t* object, int32_t operand) {
+  int32_t original = *object;
+  *object -= operand;
+  return original;
+}
+
+static inline int32_t iree_atomic_fetch_add_uint32_impl(
+    volatile iree_atomic_int32_t* object, uint32_t operand) {
+  uint32_t original = *object;
+  *object += operand;
+  return original;
+}
+
+static inline int32_t iree_atomic_fetch_sub_uint32_impl(
+    volatile iree_atomic_uint32_t* object, uint32_t operand) {
+  uint32_t original = *object;
+  *object -= operand;
+  return original;
+}
+
 static inline int32_t iree_atomic_fetch_and_int32_impl(
     volatile iree_atomic_int32_t* object, int32_t operand) {
   int32_t original = *object;
@@ -146,6 +295,27 @@
   return original;
 }
 
+static inline int64_t iree_atomic_fetch_sub_int64_impl(
+    volatile iree_atomic_int64_t* object, int64_t operand) {
+  int64_t original = *object;
+  *object -= operand;
+  return original;
+}
+
+static inline int64_t iree_atomic_fetch_add_uint64_impl(
+    volatile iree_atomic_uint64_t* object, uint64_t operand) {
+  uint64_t original = *object;
+  *object += operand;
+  return original;
+}
+
+static inline int64_t iree_atomic_fetch_sub_uint64_impl(
+    volatile iree_atomic_uint64_t* object, uint64_t operand) {
+  uint64_t original = *object;
+  *object -= operand;
+  return original;
+}
+
 static inline int64_t iree_atomic_fetch_and_int64_impl(
     volatile iree_atomic_int64_t* object, int64_t operand) {
   int64_t original = *object;
@@ -185,59 +355,7 @@
   }
 }
 
-// There are no pointer-width atomic ops in MSVC so we need to specialize based
-// on the pointer size.
-#if defined(IREE_PTR_SIZE_32)
-#define iree_atomic_load_intptr(object, order) \
-  (intptr_t) iree_atomic_load_int32((iree_atomic_int32_t*)(object), (order))
-#define iree_atomic_store_intptr(object, desired, order)             \
-  (intptr_t) iree_atomic_store_int32((iree_atomic_int32_t*)(object), \
-                                     (int32_t)(desired), (order))
-#define iree_atomic_fetch_add_intptr(object, operand, order)             \
-  (intptr_t) iree_atomic_fetch_add_int32((iree_atomic_int32_t*)(object), \
-                                         (int32_t)(operand), (order))
-#define iree_atomic_fetch_sub_intptr(object, operand, order)             \
-  (intptr_t) iree_atomic_fetch_sub_int32((iree_atomic_int32_t*)(object), \
-                                         (int32_t)(operand), (order))
-#define iree_atomic_exchange_intptr(object, desired, order)             \
-  (intptr_t) iree_atomic_exchange_int32((iree_atomic_int32_t*)(object), \
-                                        (int32_t)(desired), (order))
-#define iree_atomic_compare_exchange_strong_intptr(object, expected, desired, \
-                                                   order_succ, order_fail)    \
-  iree_atomic_compare_exchange_strong_int32(                                  \
-      (iree_atomic_int32_t*)(object), (int32_t*)(expected),                   \
-      (int32_t)(desired), (order_succ), (order_fail))
-#define iree_atomic_compare_exchange_weak_intptr \
-  iree_atomic_compare_exchange_strong_intptr
-#else
-#define iree_atomic_load_intptr(object, order) \
-  (intptr_t) iree_atomic_load_int64((iree_atomic_int64_t*)(object), (order))
-#define iree_atomic_store_intptr(object, desired, order)             \
-  (intptr_t) iree_atomic_store_int64((iree_atomic_int64_t*)(object), \
-                                     (int64_t)(desired), (order))
-#define iree_atomic_fetch_add_intptr(object, operand, order)             \
-  (intptr_t) iree_atomic_fetch_add_int64((iree_atomic_int64_t*)(object), \
-                                         (int64_t)(operand), (order))
-#define iree_atomic_fetch_sub_intptr(object, operand, order)             \
-  (intptr_t) iree_atomic_fetch_sub_int64((iree_atomic_int64_t*)(object), \
-                                         (int64_t)(operand), (order))
-#define iree_atomic_exchange_intptr(object, desired, order)             \
-  (intptr_t) iree_atomic_exchange_int64((iree_atomic_int64_t*)(object), \
-                                        (int64_t)(desired), (order))
-#define iree_atomic_compare_exchange_strong_intptr(object, expected, desired, \
-                                                   order_succ, order_fail)    \
-  iree_atomic_compare_exchange_strong_int64(                                  \
-      (iree_atomic_int64_t*)(object), (int64_t*)(expected),                   \
-      (int64_t)(desired), (order_succ), (order_fail))
-#define iree_atomic_compare_exchange_weak_intptr \
-  iree_atomic_compare_exchange_strong_intptr
-#endif  // IREE_PTR_SIZE_32
-
-#define iree_atomic_thread_fence(order)
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
+#endif  // __cplusplus
 
 #endif  // IREE_SYNCHRONIZATION_DISABLE_UNSAFE
 
diff --git a/runtime/src/iree/base/internal/atomics_gcc.h b/runtime/src/iree/base/internal/atomics_gcc.h
index d413b98..728add7 100644
--- a/runtime/src/iree/base/internal/atomics_gcc.h
+++ b/runtime/src/iree/base/internal/atomics_gcc.h
@@ -34,6 +34,8 @@
 
 typedef int32_t iree_atomic_int32_t;
 typedef int64_t iree_atomic_int64_t;
+typedef uint32_t iree_atomic_uint32_t;
+typedef uint64_t iree_atomic_uint64_t;
 // typedef __int128 iree_atomic_int128_t;
 typedef intptr_t iree_atomic_intptr_t;
 
@@ -45,40 +47,6 @@
 #define __iree_auto_type __auto_type
 #endif
 
-#define iree_atomic_load_auto(object, order)                       \
-  __extension__({                                                  \
-    __iree_auto_type __atomic_load_ptr = (object);                 \
-    __typeof__(*__atomic_load_ptr) __atomic_load_tmp;              \
-    __atomic_load(__atomic_load_ptr, &__atomic_load_tmp, (order)); \
-    __atomic_load_tmp;                                             \
-  })
-#define iree_atomic_store_auto(object, desired, order)                \
-  __extension__({                                                     \
-    __iree_auto_type __atomic_store_ptr = (object);                   \
-    __typeof__(*__atomic_store_ptr) __atomic_store_tmp = (desired);   \
-    __atomic_store(__atomic_store_ptr, &__atomic_store_tmp, (order)); \
-  })
-#define iree_atomic_fetch_add_auto(object, operand, order) \
-  __atomic_fetch_add((object), (operand), (order))
-#define iree_atomic_fetch_sub_auto(object, operand, order) \
-  __atomic_fetch_sub((object), (operand), (order))
-#define iree_atomic_fetch_and_auto(object, operand, order) \
-  __atomic_fetch_and((object), (operand), (order))
-#define iree_atomic_fetch_or_auto(object, operand, order) \
-  __atomic_fetch_or((object), (operand), (order))
-#define iree_atomic_fetch_xor_auto(object, operand, order) \
-  __atomic_fetch_xor((object), (operand), (order))
-#define iree_atomic_exchange_auto(object, operand, order) \
-  __atomic_exchange_n((object), (operand), (order))
-#define iree_atomic_compare_exchange_strong_auto(object, expected, desired, \
-                                                 order_succ, order_fail)    \
-  __atomic_compare_exchange_n(object, expected, desired, /*weak=*/false,    \
-                              (order_succ), (order_fail))
-#define iree_atomic_compare_exchange_weak_auto(object, expected, desired, \
-                                               order_succ, order_fail)    \
-  __atomic_compare_exchange_n(object, expected, desired, /*weak=*/true,   \
-                              (order_succ), (order_fail))
-
 static inline void iree_atomic_thread_fence(int order) {
   // Ignore error where TSan does not support atomic thread fence.
   IREE_DISABLE_COMPILER_TSAN_ERRORS()
@@ -86,6 +54,40 @@
   IREE_RESTORE_COMPILER_TSAN_ERRORS()
 }
 
+#define iree_atomic_load(object, order)                            \
+  __extension__({                                                  \
+    __iree_auto_type __atomic_load_ptr = (object);                 \
+    __typeof__(*__atomic_load_ptr) __atomic_load_tmp;              \
+    __atomic_load(__atomic_load_ptr, &__atomic_load_tmp, (order)); \
+    __atomic_load_tmp;                                             \
+  })
+#define iree_atomic_store(object, desired, order)                     \
+  __extension__({                                                     \
+    __iree_auto_type __atomic_store_ptr = (object);                   \
+    __typeof__(*__atomic_store_ptr) __atomic_store_tmp = (desired);   \
+    __atomic_store(__atomic_store_ptr, &__atomic_store_tmp, (order)); \
+  })
+#define iree_atomic_fetch_add(object, operand, order) \
+  __atomic_fetch_add((object), (operand), (order))
+#define iree_atomic_fetch_sub(object, operand, order) \
+  __atomic_fetch_sub((object), (operand), (order))
+#define iree_atomic_fetch_and(object, operand, order) \
+  __atomic_fetch_and((object), (operand), (order))
+#define iree_atomic_fetch_or(object, operand, order) \
+  __atomic_fetch_or((object), (operand), (order))
+#define iree_atomic_fetch_xor(object, operand, order) \
+  __atomic_fetch_xor((object), (operand), (order))
+#define iree_atomic_exchange(object, operand, order) \
+  __atomic_exchange_n((object), (operand), (order))
+#define iree_atomic_compare_exchange_strong(object, expected, desired,   \
+                                            order_succ, order_fail)      \
+  __atomic_compare_exchange_n(object, expected, desired, /*weak=*/false, \
+                              (order_succ), (order_fail))
+#define iree_atomic_compare_exchange_weak(object, expected, desired,    \
+                                          order_succ, order_fail)       \
+  __atomic_compare_exchange_n(object, expected, desired, /*weak=*/true, \
+                              (order_succ), (order_fail))
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/runtime/src/iree/base/internal/atomics_msvc.h b/runtime/src/iree/base/internal/atomics_msvc.h
index 5cfbf43..2af2798 100644
--- a/runtime/src/iree/base/internal/atomics_msvc.h
+++ b/runtime/src/iree/base/internal/atomics_msvc.h
@@ -16,12 +16,141 @@
 
 #if defined(IREE_COMPILER_MSVC)
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+// TODO(benvanik): make MSVC's C11 atomic support work.
+// It's difficult to detect and has some weird configuration assertions around
+// mixed C and C++ code. Support is only present when the
+// `/experimental:c11atomics` but that is ignored on /TP (C++) compilation.
+// __STDC_NO_ATOMICS__ is not unset when included/enabled so we can't use the
+// standard check. Hopefully that'd be fixed if it ever leaves experimental.
+#define IREE_ATOMIC_USE_MSVC_C11 0
+#if IREE_ATOMIC_USE_MSVC_C11
+#include <stdatomic.h>
+#endif  // IREE_ATOMIC_USE_MSVC_C11
+
+#if IREE_ATOMIC_USE_MSVC_C11 && defined(atomic_init)
 
 typedef enum iree_memory_order_e {
-  iree_memory_order_relaxed,
+  iree_memory_order_relaxed = _Atomic_memory_order_relaxed,
+  iree_memory_order_consume = _Atomic_memory_order_consume,
+  iree_memory_order_acquire = _Atomic_memory_order_acquire,
+  iree_memory_order_release = _Atomic_memory_order_release,
+  iree_memory_order_acq_rel = _Atomic_memory_order_acq_rel,
+  iree_memory_order_seq_cst = _Atomic_memory_order_seq_cst,
+} iree_memory_order_t;
+
+#define IREE_ATOMIC_VAR_INIT(value) (value)
+
+typedef _Atomic int32_t iree_atomic_int32_t;
+typedef _Atomic int64_t iree_atomic_int64_t;
+typedef _Atomic uint32_t iree_atomic_uint32_t;
+typedef _Atomic uint64_t iree_atomic_uint64_t;
+// TODO(#3453): check for __int128 support before using
+// typedef _Atomic __int128 iree_atomic_int128_t;
+typedef _Atomic intptr_t iree_atomic_intptr_t;
+
+#define iree_atomic_thread_fence(order) atomic_thread_fence(order)
+
+#define iree_atomic_load(object, order) __c11_atomic_load((object), (order))
+#define iree_atomic_store(object, desired, order) \
+  __c11_atomic_store((object), (desired), (order))
+#define iree_atomic_fetch_add(object, operand, order) \
+  __c11_atomic_fetch_add((object), (operand), (order))
+#define iree_atomic_fetch_sub(object, operand, order) \
+  __c11_atomic_fetch_sub((object), (operand), (order))
+#define iree_atomic_fetch_and(object, operand, order) \
+  __c11_atomic_fetch_and((object), (operand), (order))
+#define iree_atomic_fetch_or(object, operand, order) \
+  __c11_atomic_fetch_or((object), (operand), (order))
+#define iree_atomic_fetch_xor(object, operand, order) \
+  __c11_atomic_fetch_xor((object), (operand), (order))
+#define iree_atomic_exchange(object, operand, order) \
+  __c11_atomic_exchange((object), (operand), (order))
+#define iree_atomic_compare_exchange_strong(object, expected, desired,  \
+                                            order_succ, order_fail)     \
+  __c11_atomic_compare_exchange_strong((object), (expected), (desired), \
+                                       (order_succ), (order_fail))
+#define iree_atomic_compare_exchange_weak(object, expected, desired,  \
+                                          order_succ, order_fail)     \
+  __c11_atomic_compare_exchange_weak((object), (expected), (desired), \
+                                     (order_succ), (order_fail))
+
+#elif __cplusplus
+
+// When compiling for C++ we reinterpret atomics as std::atomic<T>. This relies
+// on std::atomic on primitive types being lock-free such that the memory for
+// each atomic is just the atomic value. We need this special path because MSVC
+// doesn't support C features like _Generic in C++.
+
+extern "C++" {
+#include <atomic>
+}  // extern "C++"
+
+extern "C" {
+
+typedef enum iree_memory_order_e {
+  iree_memory_order_relaxed = _Atomic_memory_order_relaxed,
+  iree_memory_order_consume = _Atomic_memory_order_consume,
+  iree_memory_order_acquire = _Atomic_memory_order_acquire,
+  iree_memory_order_release = _Atomic_memory_order_release,
+  iree_memory_order_acq_rel = _Atomic_memory_order_acq_rel,
+  iree_memory_order_seq_cst = _Atomic_memory_order_seq_cst,
+} iree_memory_order_t;
+
+#define IREE_ATOMIC_VAR_INIT(value) (value)
+
+typedef std::atomic<int32_t> iree_atomic_int32_t;
+typedef std::atomic<int64_t> iree_atomic_int64_t;
+typedef std::atomic<uint32_t> iree_atomic_uint32_t;
+typedef std::atomic<uint64_t> iree_atomic_uint64_t;
+typedef std::atomic<intptr_t> iree_atomic_intptr_t;
+
+#define iree_atomic_thread_fence(order) std::atomic_thread_fence(order)
+
+#define iree_atomic_load(object, order) \
+  std::atomic_load_explicit((object), (std::memory_order)(order))
+#define iree_atomic_store(object, desired, order) \
+  std::atomic_store_explicit((object), (desired), (std::memory_order)(order))
+#define iree_atomic_fetch_add(object, operand, order) \
+  std::atomic_fetch_add_explicit((object), (operand), \
+                                 (std::memory_order)(order))
+#define iree_atomic_fetch_sub(object, operand, order) \
+  std::atomic_fetch_sub_explicit((object), (operand), \
+                                 (std::memory_order)(order))
+#define iree_atomic_fetch_and(object, operand, order) \
+  std::atomic_fetch_and_explicit((object), (operand), \
+                                 (std::memory_order)(order))
+#define iree_atomic_fetch_or(object, operand, order) \
+  std::atomic_fetch_or_explicit((object), (operand), (std::memory_order)(order))
+#define iree_atomic_fetch_xor(object, operand, order) \
+  std::atomic_fetch_xor_explicit((object), (operand), \
+                                 (std::memory_order)(order))
+#define iree_atomic_exchange(object, operand, order) \
+  std::atomic_exchange_explicit((object), (operand), (std::memory_order)(order))
+#define iree_atomic_compare_exchange_strong(object, expected, desired,  \
+                                            order_succ, order_fail)     \
+  std::atomic_compare_exchange_strong_explicit(                         \
+      (object), (expected), (desired), (std::memory_order)(order_succ), \
+      (std::memory_order)(order_fail))
+#define iree_atomic_compare_exchange_weak(object, expected, desired,          \
+                                          order_succ, order_fail)             \
+  std::atomic_compare_exchange_weak_explicit((object), (expected), (desired), \
+                                             (std::memory_order)(order_succ), \
+                                             (std::memory_order)(order_fail))
+
+}  // extern "C"
+
+#else
+
+// When compiling in C we can use _Generic to automatically route to the
+// builtins that change their name based on the atomic type. This implementation
+// is not good: it ignores memory order entirely and uses the full barrier
+// implied by any of the _Interlocked* builtins. There are some variants of the
+// builtins that we could use based on the order but their support across
+// targets differs. Hopefully ~soon we can use C11 atomics directly and drop
+// this code path.
+
+typedef enum iree_memory_order_e {
+  iree_memory_order_relaxed = 0u,
   iree_memory_order_consume,
   iree_memory_order_acquire,
   iree_memory_order_release,
@@ -29,72 +158,131 @@
   iree_memory_order_seq_cst,
 } iree_memory_order_t;
 
-#define IREE_ATOMIC_VAR_INIT(value) \
-  { (value) }
+#define IREE_ATOMIC_VAR_INIT(value) (value)
 
-typedef struct {
-  int32_t __val;
-} iree_atomic_int32_t;
-typedef struct {
-  int64_t __val;
-} iree_atomic_int64_t;
-// typedef __declspec(align(16)) struct {
-//   uint64_t __val[2];
-// } iree_atomic_int128_t;
-typedef struct {
-  intptr_t __val;
-} iree_atomic_intptr_t;
-
-#define iree_atomic_load_int32(object, order) \
-  InterlockedExchangeAdd((volatile LONG*)object, 0)
-#define iree_atomic_store_int32(object, desired, order) \
-  InterlockedExchange((volatile LONG*)object, desired)
-#define iree_atomic_fetch_add_int32(object, operand, order) \
-  InterlockedExchangeAdd((volatile LONG*)object, operand)
-#define iree_atomic_fetch_sub_int32(object, operand, order) \
-  InterlockedExchangeAdd((volatile LONG*)object, -((int32_t)(operand)))
-#define iree_atomic_fetch_and_int32(object, operand, order) \
-  InterlockedAnd((volatile LONG*)object, operand)
-#define iree_atomic_fetch_or_int32(object, operand, order) \
-  InterlockedOr((volatile LONG*)object, operand)
-#define iree_atomic_fetch_xor_int32(object, operand, order) \
-  InterlockedXor((volatile LONG*)object, operand)
-#define iree_atomic_exchange_int32(object, desired, order) \
-  InterlockedExchange((volatile LONG*)object, desired)
-#define iree_atomic_compare_exchange_strong_int32(object, expected, desired, \
-                                                  order_succ, order_fail)    \
-  iree_atomic_compare_exchange_strong_int32_impl(                            \
-      (volatile iree_atomic_int32_t*)(object), (int32_t*)(expected),         \
-      (int32_t)(desired), (order_succ), (order_fail))
-#define iree_atomic_compare_exchange_weak_int32 \
-  iree_atomic_compare_exchange_strong_int32
-
-#define iree_atomic_load_int64(object, order) \
-  InterlockedExchangeAdd64((volatile LONG64*)object, 0)
-#define iree_atomic_store_int64(object, desired, order) \
-  InterlockedExchange64((volatile LONG64*)object, (LONG64)desired)
-#define iree_atomic_fetch_add_int64(object, operand, order) \
-  InterlockedExchangeAdd64((volatile LONG64*)object, (LONG64)operand)
-#define iree_atomic_fetch_sub_int64(object, operand, order) \
-  InterlockedExchangeAdd64((volatile LONG64*)object, -(operand))
-#define iree_atomic_fetch_and_int64(object, operand, order) \
-  InterlockedAnd64((volatile LONG64*)object, operand)
-#define iree_atomic_fetch_or_int64(object, operand, order) \
-  InterlockedOr64((volatile LONG64*)object, operand)
-#define iree_atomic_fetch_xor_int64(object, operand, order) \
-  InterlockedXor64((volatile LONG64*)object, operand)
-#define iree_atomic_exchange_int64(object, desired, order) \
-  InterlockedExchange64((volatile LONG64*)object, desired)
-#define iree_atomic_compare_exchange_strong_int64(object, expected, desired, \
-                                                  order_succ, order_fail)    \
-  iree_atomic_compare_exchange_strong_int64_impl(                            \
-      (volatile iree_atomic_int64_t*)(object), (int64_t*)(expected),         \
-      (int64_t)(desired), (order_succ), (order_fail))
-#define iree_atomic_compare_exchange_weak_int64 \
-  iree_atomic_compare_exchange_strong_int64
+typedef int32_t iree_atomic_int32_t;
+typedef int64_t iree_atomic_int64_t;
+typedef uint32_t iree_atomic_uint32_t;
+typedef uint64_t iree_atomic_uint64_t;
+typedef intptr_t iree_atomic_intptr_t;
 
 #define iree_atomic_thread_fence(order) MemoryBarrier()
 
+#define iree_atomic_load(object, order)                          \
+  _Generic((object),                                             \
+      iree_atomic_int32_t *: _InterlockedExchangeAdd(            \
+                               (volatile int32_t*)(object), 0),  \
+      iree_atomic_int64_t *: _InterlockedExchangeAdd64(          \
+                               (volatile int64_t*)(object), 0),  \
+      iree_atomic_uint32_t *: _InterlockedExchangeAdd(           \
+                                (volatile int32_t*)(object), 0), \
+      iree_atomic_uint64_t *: _InterlockedExchangeAdd64(         \
+                                (volatile int64_t*)(object), 0))
+#define iree_atomic_store(object, desired, order)                              \
+  _Generic((object),                                                           \
+      iree_atomic_int32_t *: _InterlockedExchange((volatile int32_t*)(object), \
+                                                  (int32_t)(desired)),         \
+      iree_atomic_int64_t *: _InterlockedExchange64(                           \
+                               (volatile int64_t*)(object),                    \
+                               (int64_t)(desired)),                            \
+      iree_atomic_uint32_t *: _InterlockedExchange(                            \
+                                (volatile int32_t*)(object),                   \
+                                (int32_t)(desired)),                           \
+      iree_atomic_uint64_t *: _InterlockedExchange64(                          \
+                                (volatile int64_t*)(object),                   \
+                                (int64_t)(desired)))
+#define iree_atomic_fetch_add(object, operand, order)        \
+  _Generic((object),                                         \
+      iree_atomic_int32_t *: _InterlockedExchangeAdd(        \
+                               (volatile int32_t*)(object),  \
+                               (int32_t)(operand)),          \
+      iree_atomic_int64_t *: _InterlockedExchangeAdd64(      \
+                               (volatile int64_t*)(object),  \
+                               (int64_t)(operand)),          \
+      iree_atomic_uint32_t *: _InterlockedExchangeAdd(       \
+                                (volatile int32_t*)(object), \
+                                (int32_t)(operand)),         \
+      iree_atomic_uint64_t *: _InterlockedExchangeAdd64(     \
+                                (volatile int64_t*)(object), \
+                                (int64_t)(operand)))
+#define iree_atomic_fetch_sub(object, operand, order)        \
+  _Generic((object),                                         \
+      iree_atomic_int32_t *: _InterlockedExchangeAdd(        \
+                               (volatile int32_t*)(object),  \
+                               -((int32_t)(operand))),       \
+      iree_atomic_int64_t *: _InterlockedExchangeAdd64(      \
+                               (volatile int64_t*)(object),  \
+                               -((int64_t)(operand))),       \
+      iree_atomic_uint32_t *: _InterlockedExchangeAdd(       \
+                                (volatile int32_t*)(object), \
+                                -((int32_t)(operand))),      \
+      iree_atomic_uint64_t *: _InterlockedExchangeAdd64(     \
+                                (volatile int64_t*)(object), \
+                                -((int64_t)(operand))))
+#define iree_atomic_fetch_and(object, operand, order)                        \
+  _Generic((object),                                                         \
+      iree_atomic_int32_t *: _InterlockedAnd((volatile int32_t*)(object),    \
+                                             (int32_t)(operand)),            \
+      iree_atomic_int64_t *: _InterlockedAnd64((volatile int64_t*)(object),  \
+                                               (int64_t)(operand)),          \
+      iree_atomic_uint32_t *: _InterlockedAnd((volatile int32_t*)(object),   \
+                                              (int32_t)(operand)),           \
+      iree_atomic_uint64_t *: _InterlockedAnd64((volatile int64_t*)(object), \
+                                                (int64_t)(operand)))
+#define iree_atomic_fetch_or(object, operand, order)                        \
+  _Generic((object),                                                        \
+      iree_atomic_int32_t *: _InterlockedOr((volatile int32_t*)(object),    \
+                                            (int32_t)(operand)),            \
+      iree_atomic_int64_t *: _InterlockedOr64((volatile int64_t*)(object),  \
+                                              (int64_t)(operand)),          \
+      iree_atomic_uint32_t *: _InterlockedOr((volatile int32_t*)(object),   \
+                                             (int32_t)(operand)),           \
+      iree_atomic_uint64_t *: _InterlockedOr64((volatile int64_t*)(object), \
+                                               (int64_t)(operand)))
+#define iree_atomic_fetch_xor(object, operand, order)                        \
+  _Generic((object),                                                         \
+      iree_atomic_int32_t *: _InterlockedXor((volatile int32_t*)(object),    \
+                                             (int32_t)(operand)),            \
+      iree_atomic_int64_t *: _InterlockedXor64((volatile int64_t*)(object),  \
+                                               (int64_t)(operand)),          \
+      iree_atomic_uint32_t *: _InterlockedXor((volatile int32_t*)(object),   \
+                                              (int32_t)(operand)),           \
+      iree_atomic_uint64_t *: _InterlockedXor64((volatile int64_t*)(object), \
+                                                (int64_t)(operand)))
+#define iree_atomic_exchange(object, desired, order)                           \
+  _Generic((object),                                                           \
+      iree_atomic_int32_t *: _InterlockedExchange((volatile int32_t*)(object), \
+                                                  (int32_t)(desired)),         \
+      iree_atomic_int64_t *: _InterlockedExchange64(                           \
+                               (volatile int64_t*)(object),                    \
+                               (int64_t)(desired)),                            \
+      iree_atomic_uint32_t *: _InterlockedExchange(                            \
+                                (volatile int32_t*)(object),                   \
+                                (int32_t)(desired)),                           \
+      iree_atomic_uint64_t *: _InterlockedExchange64(                          \
+                                (volatile int64_t*)(object),                   \
+                                (int64_t)(desired)))
+#define iree_atomic_compare_exchange_strong(object, expected, desired,        \
+                                            order_succ, order_fail)           \
+  _Generic((object),                                                          \
+      iree_atomic_int32_t *: iree_atomic_compare_exchange_strong_int32_impl(  \
+                               (volatile iree_atomic_int32_t*)(object),       \
+                               (int32_t*)(expected), (int32_t)(desired),      \
+                               (order_succ), (order_fail)),                   \
+      iree_atomic_int64_t *: iree_atomic_compare_exchange_strong_int64_impl(  \
+                               (volatile iree_atomic_int64_t*)(object),       \
+                               (int64_t*)(expected), (int64_t)(desired),      \
+                               (order_succ), (order_fail)),                   \
+      iree_atomic_uint32_t *: iree_atomic_compare_exchange_strong_int32_impl( \
+                                (volatile iree_atomic_int32_t*)(object),      \
+                                (int32_t*)(expected), (int32_t)(desired),     \
+                                (order_succ), (order_fail)),                  \
+      iree_atomic_uint64_t *: iree_atomic_compare_exchange_strong_int64_impl( \
+                                (volatile iree_atomic_int64_t*)(object),      \
+                                (int64_t*)(expected), (int64_t)(desired),     \
+                                (order_succ), (order_fail)))
+#define iree_atomic_compare_exchange_weak iree_atomic_compare_exchange_strong
+
 static inline bool iree_atomic_compare_exchange_strong_int32_impl(
     volatile iree_atomic_int32_t* object, int32_t* expected, int32_t desired,
     iree_memory_order_t order_succ, iree_memory_order_t order_fail) {
@@ -123,59 +311,7 @@
   }
 }
 
-#define iree_atomic_thread_fence(order) MemoryBarrier()
-
-// There are no pointer-width atomic ops in MSVC so we need to specialize based
-// on the pointer size.
-#if defined(IREE_PTR_SIZE_32)
-#define iree_atomic_load_intptr(object, order) \
-  (intptr_t) iree_atomic_load_int32((iree_atomic_int32_t*)(object), (order))
-#define iree_atomic_store_intptr(object, desired, order)             \
-  (intptr_t) iree_atomic_store_int32((iree_atomic_int32_t*)(object), \
-                                     (int32_t)(desired), (order))
-#define iree_atomic_fetch_add_intptr(object, operand, order)             \
-  (intptr_t) iree_atomic_fetch_add_int32((iree_atomic_int32_t*)(object), \
-                                         (int32_t)(operand), (order))
-#define iree_atomic_fetch_sub_intptr(object, operand, order)             \
-  (intptr_t) iree_atomic_fetch_sub_int32((iree_atomic_int32_t*)(object), \
-                                         (int32_t)(operand), (order))
-#define iree_atomic_exchange_intptr(object, desired, order)             \
-  (intptr_t) iree_atomic_exchange_int32((iree_atomic_int32_t*)(object), \
-                                        (int32_t)(desired), (order))
-#define iree_atomic_compare_exchange_strong_intptr(object, expected, desired, \
-                                                   order_succ, order_fail)    \
-  iree_atomic_compare_exchange_strong_int32(                                  \
-      (iree_atomic_int32_t*)(object), (int32_t*)(expected),                   \
-      (int32_t)(desired), (order_succ), (order_fail))
-#define iree_atomic_compare_exchange_weak_intptr \
-  iree_atomic_compare_exchange_strong_intptr
-#else
-#define iree_atomic_load_intptr(object, order) \
-  (intptr_t) iree_atomic_load_int64((iree_atomic_int64_t*)(object), (order))
-#define iree_atomic_store_intptr(object, desired, order)             \
-  (intptr_t) iree_atomic_store_int64((iree_atomic_int64_t*)(object), \
-                                     (int64_t)(desired), (order))
-#define iree_atomic_fetch_add_intptr(object, operand, order)             \
-  (intptr_t) iree_atomic_fetch_add_int64((iree_atomic_int64_t*)(object), \
-                                         (int64_t)(operand), (order))
-#define iree_atomic_fetch_sub_intptr(object, operand, order)             \
-  (intptr_t) iree_atomic_fetch_sub_int64((iree_atomic_int64_t*)(object), \
-                                         (int64_t)(operand), (order))
-#define iree_atomic_exchange_intptr(object, desired, order)             \
-  (intptr_t) iree_atomic_exchange_int64((iree_atomic_int64_t*)(object), \
-                                        (int64_t)(desired), (order))
-#define iree_atomic_compare_exchange_strong_intptr(object, expected, desired, \
-                                                   order_succ, order_fail)    \
-  iree_atomic_compare_exchange_strong_int64(                                  \
-      (iree_atomic_int64_t*)(object), (int64_t*)(expected),                   \
-      (int64_t)(desired), (order_succ), (order_fail))
-#define iree_atomic_compare_exchange_weak_intptr \
-  iree_atomic_compare_exchange_strong_intptr
-#endif  // IREE_PTR_SIZE_32
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
+#endif  // IREE_ATOMIC_USE_MSVC_C11
 
 #endif  // IREE_COMPILER_MSVC
 
diff --git a/runtime/src/iree/base/internal/atomics_test.cc b/runtime/src/iree/base/internal/atomics_test.cc
index a9fce2f..d78890c 100644
--- a/runtime/src/iree/base/internal/atomics_test.cc
+++ b/runtime/src/iree/base/internal/atomics_test.cc
@@ -21,9 +21,9 @@
   intptr_t ptr_0 = 0x0;
   intptr_t ptr_1 = 0x1;
   iree_atomic_intptr_t value = IREE_ATOMIC_VAR_INIT(ptr_0);
-  EXPECT_EQ(ptr_0, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
-  iree_atomic_store_intptr(&value, ptr_1, iree_memory_order_seq_cst);
-  EXPECT_EQ(ptr_1, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
+  EXPECT_EQ(ptr_0, iree_atomic_load(&value, iree_memory_order_seq_cst));
+  iree_atomic_store(&value, ptr_1, iree_memory_order_seq_cst);
+  EXPECT_EQ(ptr_1, iree_atomic_load(&value, iree_memory_order_seq_cst));
 }
 
 TEST(AtomicPtr, AddSub) {
@@ -31,15 +31,15 @@
   intptr_t ptr_1 = 0x1;
   intptr_t ptr_2 = 0x2;
   iree_atomic_intptr_t value = IREE_ATOMIC_VAR_INIT(ptr_0);
-  EXPECT_EQ(ptr_0, iree_atomic_fetch_add_intptr(&value, ptr_1,
-                                                iree_memory_order_seq_cst));
-  EXPECT_EQ(ptr_1, iree_atomic_fetch_add_intptr(&value, ptr_1,
-                                                iree_memory_order_seq_cst));
-  EXPECT_EQ(ptr_2, iree_atomic_fetch_sub_intptr(&value, ptr_1,
-                                                iree_memory_order_seq_cst));
-  EXPECT_EQ(ptr_1, iree_atomic_fetch_sub_intptr(&value, ptr_1,
-                                                iree_memory_order_seq_cst));
-  EXPECT_EQ(ptr_0, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
+  EXPECT_EQ(ptr_0,
+            iree_atomic_fetch_add(&value, ptr_1, iree_memory_order_seq_cst));
+  EXPECT_EQ(ptr_1,
+            iree_atomic_fetch_add(&value, ptr_1, iree_memory_order_seq_cst));
+  EXPECT_EQ(ptr_2,
+            iree_atomic_fetch_sub(&value, ptr_1, iree_memory_order_seq_cst));
+  EXPECT_EQ(ptr_1,
+            iree_atomic_fetch_sub(&value, ptr_1, iree_memory_order_seq_cst));
+  EXPECT_EQ(ptr_0, iree_atomic_load(&value, iree_memory_order_seq_cst));
 }
 
 TEST(AtomicPtr, Exchange) {
@@ -47,11 +47,11 @@
   intptr_t ptr_1 = 0x1;
   intptr_t ptr_2 = 0x2;
   iree_atomic_intptr_t value = IREE_ATOMIC_VAR_INIT(ptr_0);
-  EXPECT_EQ(ptr_0, iree_atomic_exchange_intptr(&value, ptr_1,
-                                               iree_memory_order_seq_cst));
-  EXPECT_EQ(ptr_1, iree_atomic_exchange_intptr(&value, ptr_2,
-                                               iree_memory_order_seq_cst));
-  EXPECT_EQ(ptr_2, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
+  EXPECT_EQ(ptr_0,
+            iree_atomic_exchange(&value, ptr_1, iree_memory_order_seq_cst));
+  EXPECT_EQ(ptr_1,
+            iree_atomic_exchange(&value, ptr_2, iree_memory_order_seq_cst));
+  EXPECT_EQ(ptr_2, iree_atomic_load(&value, iree_memory_order_seq_cst));
 }
 
 TEST(AtomicPtr, CompareExchange) {
@@ -62,31 +62,31 @@
   intptr_t ptr_expected = 0;
 
   // OK: value == ptr_0, CAS(ptr_0 -> ptr_1)
-  iree_atomic_store_intptr(&value, ptr_0, iree_memory_order_seq_cst);
+  iree_atomic_store(&value, ptr_0, iree_memory_order_seq_cst);
   ptr_expected = ptr_0;
-  EXPECT_TRUE(iree_atomic_compare_exchange_strong_intptr(
-      &value, &ptr_expected, ptr_1, iree_memory_order_seq_cst,
-      iree_memory_order_seq_cst));
+  EXPECT_TRUE(iree_atomic_compare_exchange_strong(&value, &ptr_expected, ptr_1,
+                                                  iree_memory_order_seq_cst,
+                                                  iree_memory_order_seq_cst));
   EXPECT_EQ(ptr_0, ptr_expected);
-  EXPECT_EQ(ptr_1, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
+  EXPECT_EQ(ptr_1, iree_atomic_load(&value, iree_memory_order_seq_cst));
 
   // OK: value == ptr_1, CAS(ptr_1 -> ptr_2)
-  iree_atomic_store_intptr(&value, ptr_1, iree_memory_order_seq_cst);
+  iree_atomic_store(&value, ptr_1, iree_memory_order_seq_cst);
   ptr_expected = ptr_1;
-  EXPECT_TRUE(iree_atomic_compare_exchange_strong_intptr(
-      &value, &ptr_expected, ptr_2, iree_memory_order_seq_cst,
-      iree_memory_order_seq_cst));
+  EXPECT_TRUE(iree_atomic_compare_exchange_strong(&value, &ptr_expected, ptr_2,
+                                                  iree_memory_order_seq_cst,
+                                                  iree_memory_order_seq_cst));
   EXPECT_EQ(ptr_1, ptr_expected);
-  EXPECT_EQ(ptr_2, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
+  EXPECT_EQ(ptr_2, iree_atomic_load(&value, iree_memory_order_seq_cst));
 
   // FAIL: value == ptr_0, CAS(ptr_1 -> ptr_2)
-  iree_atomic_store_intptr(&value, ptr_0, iree_memory_order_seq_cst);
+  iree_atomic_store(&value, ptr_0, iree_memory_order_seq_cst);
   ptr_expected = ptr_1;
-  EXPECT_FALSE(iree_atomic_compare_exchange_strong_intptr(
-      &value, &ptr_expected, ptr_2, iree_memory_order_seq_cst,
-      iree_memory_order_seq_cst));
+  EXPECT_FALSE(iree_atomic_compare_exchange_strong(&value, &ptr_expected, ptr_2,
+                                                   iree_memory_order_seq_cst,
+                                                   iree_memory_order_seq_cst));
   EXPECT_EQ(ptr_0, ptr_expected);
-  EXPECT_EQ(ptr_0, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst));
+  EXPECT_EQ(ptr_0, iree_atomic_load(&value, iree_memory_order_seq_cst));
 }
 
 TEST(AtomicRefCount, IncDec) {
diff --git a/runtime/src/iree/base/internal/dynamic_library_win32.c b/runtime/src/iree/base/internal/dynamic_library_win32.c
index af6e4e8..2cbdd07 100644
--- a/runtime/src/iree/base/internal/dynamic_library_win32.c
+++ b/runtime/src/iree/base/internal/dynamic_library_win32.c
@@ -91,7 +91,7 @@
   static iree_atomic_int32_t next_unique_id = IREE_ATOMIC_VAR_INIT(0);
   // relaxed because we only care about uniqueness, we don't care about ordering
   // of accesses to unique_id w.r.t. other memory operations.
-  uint32_t unique_id = (uint32_t)iree_atomic_fetch_add_int32(
+  uint32_t unique_id = (uint32_t)iree_atomic_fetch_add(
       &next_unique_id, 1, iree_memory_order_relaxed);
 
   // Allocate storage for the full file path and format it in.
diff --git a/runtime/src/iree/base/internal/synchronization.c b/runtime/src/iree/base/internal/synchronization.c
index 65fb0d1..960a70c 100644
--- a/runtime/src/iree/base/internal/synchronization.c
+++ b/runtime/src/iree/base/internal/synchronization.c
@@ -447,8 +447,7 @@
 
 void iree_slim_mutex_deinitialize(iree_slim_mutex_t* mutex) {
   // Assert unlocked (callers must ensure the mutex is no longer in use).
-  SYNC_ASSERT(
-      iree_atomic_load_int32(&mutex->value, iree_memory_order_acquire) == 0);
+  SYNC_ASSERT(iree_atomic_load(&mutex->value, iree_memory_order_acquire) == 0);
 }
 
 // Helper to perform a compare_exchange operation on mutex->value, internally
@@ -467,9 +466,9 @@
   // more about efficiency in the uncontended case than we care about avoiding
   // spurious failure. Also, some callers are calling this in a loop, where they
   // would want the weak form anyway.
-  return iree_atomic_compare_exchange_weak_int32(
-      &mutex->value, expected, desired, iree_memory_order_acquire,
-      iree_memory_order_relaxed);
+  return iree_atomic_compare_exchange_weak(&mutex->value, expected, desired,
+                                           iree_memory_order_acquire,
+                                           iree_memory_order_relaxed);
 }
 
 void iree_slim_mutex_lock(iree_slim_mutex_t* mutex)
@@ -490,8 +489,7 @@
   // This uses relaxed order because this is an internal intermediate step and
   // we only need atomicity here.
   value =
-      iree_atomic_fetch_add_int32(&mutex->value, 1, iree_memory_order_relaxed) +
-      1;
+      iree_atomic_fetch_add(&mutex->value, 1, iree_memory_order_relaxed) + 1;
 
   while (true) {
     // While the lock is available: try to acquire it for this thread.
@@ -513,8 +511,7 @@
       int spin_count = 100;
       for (int i = 0; i < spin_count && iree_slim_mutex_is_locked(value); ++i) {
         iree_processor_yield();
-        value =
-            iree_atomic_load_int32(&mutex->value, iree_memory_order_relaxed);
+        value = iree_atomic_load(&mutex->value, iree_memory_order_relaxed);
       }
     }
 
@@ -523,7 +520,7 @@
       // NOTE: we don't care about wait failure here as we are going to loop
       // and check again anyway.
       iree_futex_wait(&mutex->value, value, IREE_TIME_INFINITE_FUTURE);
-      value = iree_atomic_load_int32(&mutex->value, iree_memory_order_relaxed);
+      value = iree_atomic_load(&mutex->value, iree_memory_order_relaxed);
     }
   }
 }
@@ -541,8 +538,8 @@
     IREE_DISABLE_THREAD_SAFETY_ANALYSIS {
   // Refer to the iree_slim_mutex_t struct comment, "Notes on atomics".
   // Transition 1->0 (unlocking with no waiters) or 2->1 (with waiters).
-  if (iree_atomic_fetch_sub_int32(&mutex->value, iree_slim_mutex_value(1),
-                                  iree_memory_order_release) !=
+  if (iree_atomic_fetch_sub(&mutex->value, iree_slim_mutex_value(1),
+                            iree_memory_order_release) !=
       iree_slim_mutex_value(1)) {
     // One (or more) waiters; wake a single one to avoid a thundering herd of
     // multiple threads all waking and trying to grab the lock (as only one will
@@ -749,14 +746,14 @@
 void iree_notification_deinitialize(iree_notification_t* notification) {
   // Assert no more waiters (callers must tear down waiters first).
   SYNC_ASSERT(
-      (iree_atomic_load_int64(&notification->value, iree_memory_order_acquire) &
+      (iree_atomic_load(&notification->value, iree_memory_order_acquire) &
        IREE_NOTIFICATION_WAITER_MASK) == 0);
 }
 
 void iree_notification_post(iree_notification_t* notification, int32_t count) {
-  uint64_t previous_value = iree_atomic_fetch_add_int64(
-      &notification->value, IREE_NOTIFICATION_EPOCH_INC,
-      iree_memory_order_acq_rel);
+  uint64_t previous_value =
+      iree_atomic_fetch_add(&notification->value, IREE_NOTIFICATION_EPOCH_INC,
+                            iree_memory_order_acq_rel);
   // Ensure we have at least one waiter; wake up to |count| of them.
   if (IREE_UNLIKELY(previous_value & IREE_NOTIFICATION_WAITER_MASK)) {
     iree_futex_wake(iree_notification_epoch_address(notification), count);
@@ -765,9 +762,9 @@
 
 iree_wait_token_t iree_notification_prepare_wait(
     iree_notification_t* notification) {
-  uint64_t previous_value = iree_atomic_fetch_add_int64(
-      &notification->value, IREE_NOTIFICATION_WAITER_INC,
-      iree_memory_order_acq_rel);
+  uint64_t previous_value =
+      iree_atomic_fetch_add(&notification->value, IREE_NOTIFICATION_WAITER_INC,
+                            iree_memory_order_acq_rel);
   return (iree_wait_token_t)(previous_value >> IREE_NOTIFICATION_EPOCH_SHIFT);
 }
 
@@ -779,8 +776,7 @@
 
 static iree_notification_result_t iree_notification_test_wait_condition(
     iree_notification_t* notification, iree_wait_token_t wait_token) {
-  return (iree_atomic_load_int64(&notification->value,
-                                 iree_memory_order_acquire) >>
+  return (iree_atomic_load(&notification->value, iree_memory_order_acquire) >>
           IREE_NOTIFICATION_EPOCH_SHIFT) != wait_token
              ? IREE_NOTIFICATION_RESULT_RESOLVED
              : IREE_NOTIFICATION_RESULT_UNRESOLVED;
@@ -830,9 +826,9 @@
   // TODO(benvanik): benchmark under real workloads.
   // iree_memory_order_relaxed would suffice for correctness but the faster
   // the waiter count gets to 0 the less likely we'll wake on the futex.
-  uint64_t previous_value = iree_atomic_fetch_add_int64(
-      &notification->value, IREE_NOTIFICATION_WAITER_DEC,
-      iree_memory_order_acq_rel);
+  uint64_t previous_value =
+      iree_atomic_fetch_add(&notification->value, IREE_NOTIFICATION_WAITER_DEC,
+                            iree_memory_order_acq_rel);
   SYNC_ASSERT((previous_value & IREE_NOTIFICATION_WAITER_MASK) != 0);
 
   return result == IREE_NOTIFICATION_RESULT_RESOLVED;
@@ -842,9 +838,9 @@
   // TODO(benvanik): benchmark under real workloads.
   // iree_memory_order_relaxed would suffice for correctness but the faster
   // the waiter count gets to 0 the less likely we'll wake on the futex.
-  uint64_t previous_value = iree_atomic_fetch_add_int64(
-      &notification->value, IREE_NOTIFICATION_WAITER_DEC,
-      iree_memory_order_acq_rel);
+  uint64_t previous_value =
+      iree_atomic_fetch_add(&notification->value, IREE_NOTIFICATION_WAITER_DEC,
+                            iree_memory_order_acq_rel);
   SYNC_ASSERT((previous_value & IREE_NOTIFICATION_WAITER_MASK) != 0);
 }
 
diff --git a/runtime/src/iree/base/internal/threading_darwin.c b/runtime/src/iree/base/internal/threading_darwin.c
index 52932f8..dc4b5f8 100644
--- a/runtime/src/iree/base/internal/threading_darwin.c
+++ b/runtime/src/iree/base/internal/threading_darwin.c
@@ -104,9 +104,8 @@
   thread->entry_arg = entry_arg;
   iree_strncpy_s(thread->name, IREE_ARRAYSIZE(thread->name), params.name.data,
                  iree_min(params.name.size, IREE_ARRAYSIZE(thread->name) - 1));
-  iree_atomic_store_int32(&thread->is_suspended,
-                          params.create_suspended ? 1 : 0,
-                          iree_memory_order_relaxed);
+  iree_atomic_store(&thread->is_suspended, params.create_suspended ? 1 : 0,
+                    iree_memory_order_relaxed);
 
   pthread_attr_t thread_attr;
   pthread_attr_init(&thread_attr);
@@ -239,7 +238,7 @@
   // always balance suspend/resume or else we'll mess with any
   // debuggers/profilers that may be suspending threads for their own uses.
   int32_t expected = 1;
-  if (iree_atomic_compare_exchange_strong_int32(
+  if (iree_atomic_compare_exchange_strong(
           &thread->is_suspended, &expected, 0, iree_memory_order_acq_rel,
           iree_memory_order_relaxed /* expected is unused */)) {
     thread_resume(thread->mach_port);
diff --git a/runtime/src/iree/base/internal/threading_pthreads.c b/runtime/src/iree/base/internal/threading_pthreads.c
index 1686fd1..3f15987 100644
--- a/runtime/src/iree/base/internal/threading_pthreads.c
+++ b/runtime/src/iree/base/internal/threading_pthreads.c
@@ -51,8 +51,8 @@
 
 static bool iree_thread_resumed_predicate(void* arg) {
   iree_thread_t* thread = (iree_thread_t*)arg;
-  return iree_atomic_load_int32(&thread->suspend_count,
-                                iree_memory_order_acquire) == 0;
+  return iree_atomic_load(&thread->suspend_count, iree_memory_order_acquire) ==
+         0;
 }
 
 #if defined(IREE_PLATFORM_EMSCRIPTEN)
@@ -99,8 +99,8 @@
   IREE_TRACE_SET_THREAD_NAME(thread->name);
 
   // Wait until we resume if we were created suspended.
-  while (iree_atomic_load_int32(&thread->suspend_count,
-                                iree_memory_order_acquire) > 0) {
+  while (iree_atomic_load(&thread->suspend_count, iree_memory_order_acquire) >
+         0) {
     iree_notification_await(&thread->suspend_barrier,
                             iree_thread_resumed_predicate, thread,
                             iree_infinite_timeout());
@@ -335,8 +335,8 @@
 void iree_thread_resume(iree_thread_t* thread) {
   IREE_TRACE_ZONE_BEGIN(z0);
 
-  if (iree_atomic_exchange_int32(&thread->suspend_count, 0,
-                                 iree_memory_order_acq_rel) == 1) {
+  if (iree_atomic_exchange(&thread->suspend_count, 0,
+                           iree_memory_order_acq_rel) == 1) {
     iree_notification_post(&thread->suspend_barrier, IREE_ALL_WAITERS);
   }
 
diff --git a/runtime/src/iree/base/internal/threading_test.cc b/runtime/src/iree/base/internal/threading_test.cc
index 8ee5a96..1fd9730 100644
--- a/runtime/src/iree/base/internal/threading_test.cc
+++ b/runtime/src/iree/base/internal/threading_test.cc
@@ -34,12 +34,11 @@
     iree_atomic_int32_t value;
     iree_notification_t barrier;
   } entry_data;
-  iree_atomic_store_int32(&entry_data.value, 123, iree_memory_order_relaxed);
+  iree_atomic_store(&entry_data.value, 123, iree_memory_order_relaxed);
   iree_notification_initialize(&entry_data.barrier);
   iree_thread_entry_t entry_fn = +[](void* entry_arg) -> int {
     auto* entry_data = reinterpret_cast<struct entry_data_t*>(entry_arg);
-    iree_atomic_fetch_add_int32(&entry_data->value, 1,
-                                iree_memory_order_acq_rel);
+    iree_atomic_fetch_add(&entry_data->value, 1, iree_memory_order_acq_rel);
     iree_notification_post(&entry_data->barrier, IREE_ALL_WAITERS);
     return 0;
   };
@@ -55,8 +54,8 @@
       &entry_data.barrier,
       +[](void* entry_arg) -> bool {
         auto* entry_data = reinterpret_cast<struct entry_data_t*>(entry_arg);
-        return iree_atomic_load_int32(&entry_data->value,
-                                      iree_memory_order_relaxed) == (123 + 1);
+        return iree_atomic_load(&entry_data->value,
+                                iree_memory_order_relaxed) == (123 + 1);
       },
       &entry_data, iree_infinite_timeout());
 
@@ -76,12 +75,11 @@
     iree_atomic_int32_t value;
     iree_notification_t barrier;
   } entry_data;
-  iree_atomic_store_int32(&entry_data.value, 123, iree_memory_order_relaxed);
+  iree_atomic_store(&entry_data.value, 123, iree_memory_order_relaxed);
   iree_notification_initialize(&entry_data.barrier);
   iree_thread_entry_t entry_fn = +[](void* entry_arg) -> int {
     auto* entry_data = reinterpret_cast<struct entry_data_t*>(entry_arg);
-    iree_atomic_fetch_add_int32(&entry_data->value, 1,
-                                iree_memory_order_acq_rel);
+    iree_atomic_fetch_add(&entry_data->value, 1, iree_memory_order_acq_rel);
     iree_notification_post(&entry_data->barrier, IREE_ALL_WAITERS);
     return 0;
   };
@@ -95,11 +93,11 @@
   // the value. I can't think of a good way to test this, though, so we'll just
   // wait a moment here and assume that if the thread was able to run it would
   // have during this wait.
-  ASSERT_EQ(123, iree_atomic_load_int32(&entry_data.value,
-                                        iree_memory_order_seq_cst));
+  ASSERT_EQ(123,
+            iree_atomic_load(&entry_data.value, iree_memory_order_seq_cst));
   std::this_thread::sleep_for(std::chrono::milliseconds(150));
-  ASSERT_EQ(123, iree_atomic_load_int32(&entry_data.value,
-                                        iree_memory_order_seq_cst));
+  ASSERT_EQ(123,
+            iree_atomic_load(&entry_data.value, iree_memory_order_seq_cst));
 
   // Resume the thread and wait for it to finish its work.
   iree_thread_resume(thread);
@@ -107,8 +105,8 @@
       &entry_data.barrier,
       +[](void* entry_arg) -> bool {
         auto* entry_data = reinterpret_cast<struct entry_data_t*>(entry_arg);
-        return iree_atomic_load_int32(&entry_data->value,
-                                      iree_memory_order_relaxed) == (123 + 1);
+        return iree_atomic_load(&entry_data->value,
+                                iree_memory_order_relaxed) == (123 + 1);
       },
       &entry_data, iree_infinite_timeout());
   iree_thread_release(thread);
@@ -126,11 +124,10 @@
   struct entry_data_t {
     iree_atomic_int32_t value;
   } entry_data;
-  iree_atomic_store_int32(&entry_data.value, 0, iree_memory_order_relaxed);
+  iree_atomic_store(&entry_data.value, 0, iree_memory_order_relaxed);
   iree_thread_entry_t entry_fn = +[](void* entry_arg) -> int {
     auto* entry_data = reinterpret_cast<struct entry_data_t*>(entry_arg);
-    iree_atomic_fetch_add_int32(&entry_data->value, 1,
-                                iree_memory_order_release);
+    iree_atomic_fetch_add(&entry_data->value, 1, iree_memory_order_release);
     return 0;
   };
 
@@ -150,8 +147,7 @@
       thread, IREE_THREAD_PRIORITY_CLASS_LOWEST);
 
   // Wait for the thread to finish.
-  while (iree_atomic_load_int32(&entry_data.value, iree_memory_order_acquire) !=
-         1) {
+  while (iree_atomic_load(&entry_data.value, iree_memory_order_acquire) != 1) {
     iree_thread_yield();
   }
 
diff --git a/runtime/src/iree/base/internal/threading_win32.c b/runtime/src/iree/base/internal/threading_win32.c
index 6166ce2..64ddca6 100644
--- a/runtime/src/iree/base/internal/threading_win32.c
+++ b/runtime/src/iree/base/internal/threading_win32.c
@@ -143,9 +143,8 @@
   thread->entry_arg = entry_arg;
   strncpy_s(thread->name, IREE_ARRAYSIZE(thread->name), params.name.data,
             min(params.name.size, IREE_ARRAYSIZE(thread->name) - 1));
-  iree_atomic_store_int32(&thread->is_suspended,
-                          params.create_suspended ? 1 : 0,
-                          iree_memory_order_relaxed);
+  iree_atomic_store(&thread->is_suspended, params.create_suspended ? 1 : 0,
+                    iree_memory_order_relaxed);
   iree_thread_override_list_initialize(iree_thread_set_priority_class,
                                        params.priority_class, thread->allocator,
                                        &thread->qos_override_list);
@@ -304,7 +303,7 @@
   // always balance suspend/resume or else we'll mess with any
   // debuggers/profilers that may be suspending threads for their own uses.
   int32_t expected = 1;
-  if (iree_atomic_compare_exchange_strong_int32(
+  if (iree_atomic_compare_exchange_strong(
           &thread->is_suspended, &expected, 0, iree_memory_order_acq_rel,
           iree_memory_order_relaxed /* expected is unused */)) {
     ResumeThread(thread->handle);
diff --git a/runtime/src/iree/base/internal/wait_handle_inproc.c b/runtime/src/iree/base/internal/wait_handle_inproc.c
index e319259..7f92797 100644
--- a/runtime/src/iree/base/internal/wait_handle_inproc.c
+++ b/runtime/src/iree/base/internal/wait_handle_inproc.c
@@ -240,7 +240,7 @@
     iree_wait_handle_t* wait_handle = &params->set->handles[i];
     iree_futex_handle_t* futex =
         (iree_futex_handle_t*)wait_handle->value.local_futex;
-    if (iree_atomic_load_int64(&futex->value, iree_memory_order_acquire) != 0) {
+    if (iree_atomic_load(&futex->value, iree_memory_order_acquire) != 0) {
       ++ready_count;
       if (params->wake_handle) {
         *params->wake_handle = *wait_handle;
@@ -292,7 +292,7 @@
 }
 
 static bool iree_futex_handle_check(iree_futex_handle_t* futex) {
-  return iree_atomic_load_int64(&futex->value, iree_memory_order_acquire) != 0;
+  return iree_atomic_load(&futex->value, iree_memory_order_acquire) != 0;
 }
 
 iree_status_t iree_wait_one(iree_wait_handle_t* handle,
@@ -335,8 +335,8 @@
   if (iree_status_is_ok(status)) {
     out_event->type = IREE_WAIT_PRIMITIVE_TYPE_LOCAL_FUTEX;
     out_event->value.local_futex = (void*)futex;
-    iree_atomic_store_int64(&futex->value, initial_state ? 1 : 0,
-                            iree_memory_order_release);
+    iree_atomic_store(&futex->value, initial_state ? 1 : 0,
+                      iree_memory_order_release);
     iree_notification_initialize(&futex->notification);
   }
 
@@ -358,8 +358,7 @@
   // Try to transition from unset -> set.
   // No-op if already set and otherwise we successfully signaled the event and
   // need to notify all waiters.
-  if (iree_atomic_exchange_int64(&futex->value, 1, iree_memory_order_release) ==
-      0) {
+  if (iree_atomic_exchange(&futex->value, 1, iree_memory_order_release) == 0) {
     // Notify those waiting on just this event.
     iree_notification_post(&futex->notification, IREE_ALL_WAITERS);
     // Notify any multi-waits that may have this event as part of their set.
@@ -371,7 +370,7 @@
   if (!event) return;
   iree_futex_handle_t* futex = (iree_futex_handle_t*)event->value.local_futex;
   if (!futex) return;
-  iree_atomic_store_int64(&futex->value, 0, iree_memory_order_release);
+  iree_atomic_store(&futex->value, 0, iree_memory_order_release);
 }
 
 #endif  // IREE_WAIT_API == IREE_WAIT_API_INPROC
diff --git a/runtime/src/iree/hal/drivers/cuda/memory_pools.c b/runtime/src/iree/hal/drivers/cuda/memory_pools.c
index 236ffaa..1e34422 100644
--- a/runtime/src/iree/hal/drivers/cuda/memory_pools.c
+++ b/runtime/src/iree/hal/drivers/cuda/memory_pools.c
@@ -121,8 +121,8 @@
     iree_atomic_int64_t* bytes_allocated =
         is_device_local ? &pools->statistics.device_bytes_allocated
                         : &pools->statistics.host_bytes_allocated;
-    iree_atomic_fetch_add_int64(bytes_allocated, allocation_size,
-                                iree_memory_order_relaxed);
+    iree_atomic_fetch_add(bytes_allocated, allocation_size,
+                          iree_memory_order_relaxed);
   });
 }
 
@@ -141,8 +141,8 @@
                         : &pools->statistics.host_bytes_freed;
     iree_device_size_t allocation_size =
         iree_hal_buffer_allocation_size(buffer);
-    iree_atomic_fetch_add_int64(bytes_freed, allocation_size,
-                                iree_memory_order_relaxed);
+    iree_atomic_fetch_add(bytes_freed, allocation_size,
+                          iree_memory_order_relaxed);
   });
 }
 
@@ -150,13 +150,13 @@
     iree_hal_cuda_memory_pools_t* pools,
     iree_hal_allocator_statistics_t* statistics) {
   IREE_STATISTICS({
-    statistics->device_bytes_allocated = iree_atomic_load_int64(
+    statistics->device_bytes_allocated = iree_atomic_load(
         &pools->statistics.device_bytes_allocated, iree_memory_order_relaxed);
-    statistics->host_bytes_allocated = iree_atomic_load_int64(
+    statistics->host_bytes_allocated = iree_atomic_load(
         &pools->statistics.host_bytes_allocated, iree_memory_order_relaxed);
-    statistics->device_bytes_freed = iree_atomic_load_int64(
+    statistics->device_bytes_freed = iree_atomic_load(
         &pools->statistics.device_bytes_freed, iree_memory_order_relaxed);
-    statistics->host_bytes_freed = iree_atomic_load_int64(
+    statistics->host_bytes_freed = iree_atomic_load(
         &pools->statistics.host_bytes_freed, iree_memory_order_relaxed);
     if (pools->device_local) {
       cuuint64_t pool_peak = 0;
diff --git a/runtime/src/iree/hal/drivers/hip/memory_pools.c b/runtime/src/iree/hal/drivers/hip/memory_pools.c
index e599cf6..89e27fa 100644
--- a/runtime/src/iree/hal/drivers/hip/memory_pools.c
+++ b/runtime/src/iree/hal/drivers/hip/memory_pools.c
@@ -121,8 +121,8 @@
     iree_atomic_int64_t* bytes_allocated =
         is_device_local ? &pools->statistics.device_bytes_allocated
                         : &pools->statistics.host_bytes_allocated;
-    iree_atomic_fetch_add_int64(bytes_allocated, allocation_size,
-                                iree_memory_order_relaxed);
+    iree_atomic_fetch_add(bytes_allocated, allocation_size,
+                          iree_memory_order_relaxed);
   });
 }
 
@@ -141,8 +141,8 @@
                         : &pools->statistics.host_bytes_freed;
     iree_device_size_t allocation_size =
         iree_hal_buffer_allocation_size(buffer);
-    iree_atomic_fetch_add_int64(bytes_freed, allocation_size,
-                                iree_memory_order_relaxed);
+    iree_atomic_fetch_add(bytes_freed, allocation_size,
+                          iree_memory_order_relaxed);
   });
 }
 
@@ -150,13 +150,13 @@
     iree_hal_hip_memory_pools_t* pools,
     iree_hal_allocator_statistics_t* statistics) {
   IREE_STATISTICS({
-    statistics->device_bytes_allocated = iree_atomic_load_int64(
+    statistics->device_bytes_allocated = iree_atomic_load(
         &pools->statistics.device_bytes_allocated, iree_memory_order_relaxed);
-    statistics->host_bytes_allocated = iree_atomic_load_int64(
+    statistics->host_bytes_allocated = iree_atomic_load(
         &pools->statistics.host_bytes_allocated, iree_memory_order_relaxed);
-    statistics->device_bytes_freed = iree_atomic_load_int64(
+    statistics->device_bytes_freed = iree_atomic_load(
         &pools->statistics.device_bytes_freed, iree_memory_order_relaxed);
-    statistics->host_bytes_freed = iree_atomic_load_int64(
+    statistics->host_bytes_freed = iree_atomic_load(
         &pools->statistics.host_bytes_freed, iree_memory_order_relaxed);
 
     if (pools->device_local) {
diff --git a/runtime/src/iree/hal/drivers/metal/shared_event.m b/runtime/src/iree/hal/drivers/metal/shared_event.m
index f741f2e..716306c 100644
--- a/runtime/src/iree/hal/drivers/metal/shared_event.m
+++ b/runtime/src/iree/hal/drivers/metal/shared_event.m
@@ -231,7 +231,7 @@
   // Create an atomic to count how many semaphores have signaled. Mark it as `__block` so different
   // threads are sharing the same data via reference.
   __block iree_atomic_int32_t wait_count;
-  iree_atomic_store_int32(&wait_count, 0, iree_memory_order_release);
+  iree_atomic_store(&wait_count, 0, iree_memory_order_release);
   // The total count we are expecting to see.
   iree_host_size_t total_count = (wait_mode == IREE_HAL_WAIT_MODE_ALL) ? semaphore_list->count : 1;
   // Theoretically we don't really need to mark the semaphore handle as __block given that the
@@ -253,7 +253,7 @@
                                         // Fail as a whole if any participating semaphore failed.
                                         if (v >= IREE_HAL_SEMAPHORE_FAILURE_VALUE) did_fail = true;
 
-                                        int32_t old_value = iree_atomic_fetch_add_int32(
+                                        int32_t old_value = iree_atomic_fetch_add(
                                             &wait_count, 1, iree_memory_order_release);
                                         // The last signaled semaphore send out the notification.
                                         // Atomic fetch add returns the old value, so need to +1.
diff --git a/runtime/src/iree/hal/drivers/metal/staging_buffer.m b/runtime/src/iree/hal/drivers/metal/staging_buffer.m
index ca0128f..e83e622 100644
--- a/runtime/src/iree/hal/drivers/metal/staging_buffer.m
+++ b/runtime/src/iree/hal/drivers/metal/staging_buffer.m
@@ -37,8 +37,7 @@
   out_staging_buffer->host_buffer = metal_buffer.contents;
   iree_slim_mutex_initialize(&out_staging_buffer->offset_mutex);
   out_staging_buffer->offset = 0;
-  iree_atomic_store_int32(&out_staging_buffer->pending_command_buffers, 0,
-                          iree_memory_order_relaxed);
+  iree_atomic_store(&out_staging_buffer->pending_command_buffers, 0, iree_memory_order_relaxed);
 
   IREE_TRACE_ZONE_END(z0);
   return iree_ok_status();
@@ -97,14 +96,13 @@
 
 void iree_hal_metal_staging_buffer_increase_command_buffer_refcount(
     iree_hal_metal_staging_buffer_t* staging_buffer) {
-  iree_atomic_fetch_add_int32(&staging_buffer->pending_command_buffers, 1,
-                              iree_memory_order_relaxed);
+  iree_atomic_fetch_add(&staging_buffer->pending_command_buffers, 1, iree_memory_order_relaxed);
 }
 
 void iree_hal_metal_staging_buffer_decrease_command_buffer_refcount(
     iree_hal_metal_staging_buffer_t* staging_buffer) {
-  if (iree_atomic_fetch_sub_int32(&staging_buffer->pending_command_buffers, 1,
-                                  iree_memory_order_acq_rel) == 1) {
+  if (iree_atomic_fetch_sub(&staging_buffer->pending_command_buffers, 1,
+                            iree_memory_order_acq_rel) == 1) {
     iree_hal_metal_staging_buffer_reset(staging_buffer);
   }
 }
diff --git a/runtime/src/iree/hal/drivers/vulkan/native_semaphore.cc b/runtime/src/iree/hal/drivers/vulkan/native_semaphore.cc
index f75b2c0..631f138 100644
--- a/runtime/src/iree/hal/drivers/vulkan/native_semaphore.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/native_semaphore.cc
@@ -68,8 +68,7 @@
                                   &semaphore->base);
     semaphore->logical_device = logical_device;
     semaphore->handle = handle;
-    iree_atomic_store_intptr(&semaphore->failure_status, 0,
-                             iree_memory_order_release);
+    iree_atomic_store(&semaphore->failure_status, 0, iree_memory_order_release);
     *out_semaphore = &semaphore->base;
   } else {
     logical_device->syms()->vkDestroySemaphore(*logical_device, handle,
@@ -87,7 +86,7 @@
   iree_allocator_t host_allocator = semaphore->logical_device->host_allocator();
   IREE_TRACE_ZONE_BEGIN(z0);
 
-  iree_status_ignore((iree_status_t)iree_atomic_load_intptr(
+  iree_status_ignore((iree_status_t)iree_atomic_load(
       &semaphore->failure_status, iree_memory_order_acquire));
 
   semaphore->logical_device->syms()->vkDestroySemaphore(
@@ -127,7 +126,7 @@
 
   // If the semaphore failed then clone the status so we can report it.
   if (value >= IREE_HAL_SEMAPHORE_FAILURE_VALUE) {
-    iree_status_t failure_status = (iree_status_t)iree_atomic_load_intptr(
+    iree_status_t failure_status = (iree_status_t)iree_atomic_load(
         &semaphore->failure_status, iree_memory_order_acquire);
     if (iree_status_is_ok(failure_status)) {
       return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
@@ -178,7 +177,7 @@
   // Try to set our local status - we only preserve the first failure so only
   // do this if we are going from a valid semaphore to a failed one.
   iree_status_t old_status = iree_ok_status();
-  if (!iree_atomic_compare_exchange_strong_intptr(
+  if (!iree_atomic_compare_exchange_strong(
           &semaphore->failure_status, (intptr_t*)&old_status, (intptr_t)status,
           iree_memory_order_acq_rel,
           iree_memory_order_relaxed /* old_status is unused */)) {
diff --git a/runtime/src/iree/hal/local/executable_plugin_manager.c b/runtime/src/iree/hal/local/executable_plugin_manager.c
index 6d41c76..2739aa9 100644
--- a/runtime/src/iree/hal/local/executable_plugin_manager.c
+++ b/runtime/src/iree/hal/local/executable_plugin_manager.c
@@ -432,8 +432,8 @@
 
   // Get the next provider slot. Note that we don't yet increment it as we need
   // to put the provider in there first.
-  int32_t slot = iree_atomic_load_int32(&manager->provider_count,
-                                        iree_memory_order_acquire);
+  int32_t slot =
+      iree_atomic_load(&manager->provider_count, iree_memory_order_acquire);
   if (slot >= manager->capacity) {
     iree_slim_mutex_unlock(&manager->mutex);
     return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
@@ -449,8 +449,7 @@
   }
 
   // Mark the slot as valid now that the provider is in it.
-  iree_atomic_fetch_add_int32(&manager->provider_count, 1,
-                              iree_memory_order_release);
+  iree_atomic_fetch_add(&manager->provider_count, 1, iree_memory_order_release);
 
   iree_slim_mutex_unlock(&manager->mutex);
   return iree_ok_status();
@@ -506,8 +505,8 @@
   // but that's ok: multithreaded registration/resolution is non-deterministic
   // by nature. Not holding the lock here means we allow multiple threads to
   // resolve imports at the same time.
-  int32_t provider_count = iree_atomic_load_int32(&manager->provider_count,
-                                                  iree_memory_order_acquire);
+  int32_t provider_count =
+      iree_atomic_load(&manager->provider_count, iree_memory_order_acquire);
 
   // Scan in reverse registration order so that more recently registered
   // providers get queried first. try_resolve will populate any function
diff --git a/runtime/src/iree/hal/utils/deferred_work_queue.c b/runtime/src/iree/hal/utils/deferred_work_queue.c
index b4b2285..e41fe35 100644
--- a/runtime/src/iree/hal/utils/deferred_work_queue.c
+++ b/runtime/src/iree/hal/utils/deferred_work_queue.c
@@ -393,9 +393,9 @@
   iree_notification_initialize(&working_area->state_notification);
   iree_hal_deferred_work_queue_ready_action_list_deinitialize(
       &working_area->ready_worklist, host_allocator);
-  iree_atomic_store_int32(&working_area->worker_state,
-                          IREE_HAL_WORKER_STATE_IDLE_WAITING,
-                          iree_memory_order_release);
+  iree_atomic_store(&working_area->worker_state,
+                    IREE_HAL_WORKER_STATE_IDLE_WAITING,
+                    iree_memory_order_release);
 }
 
 static void iree_hal_deferred_work_queue_working_area_deinitialize(
@@ -413,9 +413,9 @@
   iree_notification_initialize(&completion_area->state_notification);
   iree_hal_deferred_work_queue_completion_list_initialize(
       &completion_area->completion_list);
-  iree_atomic_store_int32(&completion_area->worker_state,
-                          IREE_HAL_WORKER_STATE_IDLE_WAITING,
-                          iree_memory_order_release);
+  iree_atomic_store(&completion_area->worker_state,
+                    IREE_HAL_WORKER_STATE_IDLE_WAITING,
+                    iree_memory_order_release);
 }
 
 static void iree_hal_deferred_work_queue_completion_area_deinitialize(
@@ -557,17 +557,17 @@
 
 static void iree_hal_deferred_work_queue_notify_worker_thread(
     iree_hal_deferred_work_queue_working_area_t* working_area) {
-  iree_atomic_store_int32(&working_area->worker_state,
-                          IREE_HAL_WORKER_STATE_WORKLOAD_PENDING,
-                          iree_memory_order_release);
+  iree_atomic_store(&working_area->worker_state,
+                    IREE_HAL_WORKER_STATE_WORKLOAD_PENDING,
+                    iree_memory_order_release);
   iree_notification_post(&working_area->state_notification, IREE_ALL_WAITERS);
 }
 
 static void iree_hal_deferred_work_queue_notify_completion_thread(
     iree_hal_deferred_work_queue_completion_area_t* completion_area) {
-  iree_atomic_store_int32(&completion_area->worker_state,
-                          IREE_HAL_WORKER_STATE_WORKLOAD_PENDING,
-                          iree_memory_order_release);
+  iree_atomic_store(&completion_area->worker_state,
+                    IREE_HAL_WORKER_STATE_WORKLOAD_PENDING,
+                    iree_memory_order_release);
   iree_notification_post(&completion_area->state_notification,
                          IREE_ALL_WAITERS);
 }
@@ -1236,14 +1236,14 @@
 
 static bool iree_hal_deferred_work_queue_worker_has_incoming_request(
     iree_hal_deferred_work_queue_working_area_t* working_area) {
-  iree_hal_deferred_work_queue_worker_state_t value = iree_atomic_load_int32(
-      &working_area->worker_state, iree_memory_order_acquire);
+  iree_hal_deferred_work_queue_worker_state_t value =
+      iree_atomic_load(&working_area->worker_state, iree_memory_order_acquire);
   return value == IREE_HAL_WORKER_STATE_WORKLOAD_PENDING;
 }
 
 static bool iree_hal_deferred_work_queue_completion_has_incoming_request(
     iree_hal_deferred_work_queue_completion_area_t* completion_area) {
-  iree_hal_deferred_work_queue_worker_state_t value = iree_atomic_load_int32(
+  iree_hal_deferred_work_queue_worker_state_t value = iree_atomic_load(
       &completion_area->worker_state, iree_memory_order_acquire);
   return value == IREE_HAL_WORKER_STATE_WORKLOAD_PENDING;
 }
@@ -1369,9 +1369,9 @@
     // sure that we don't accidentally ignore new workload pushed after done
     // ready list processing but before overwriting the state from this worker
     // thread.
-    iree_atomic_store_int32(&completion_area->worker_state,
-                            IREE_HAL_WORKER_STATE_IDLE_WAITING,
-                            iree_memory_order_release);
+    iree_atomic_store(&completion_area->worker_state,
+                      IREE_HAL_WORKER_STATE_IDLE_WAITING,
+                      iree_memory_order_release);
     iree_hal_deferred_work_queue_worker_process_completion(actions);
 
     iree_slim_mutex_lock(&actions->action_mutex);
@@ -1424,9 +1424,9 @@
     // sure that we don't accidentally ignore new workload pushed after done
     // ready list processing but before overwriting the state from this worker
     // thread.
-    iree_atomic_store_int32(&working_area->worker_state,
-                            IREE_HAL_WORKER_STATE_IDLE_WAITING,
-                            iree_memory_order_release);
+    iree_atomic_store(&working_area->worker_state,
+                      IREE_HAL_WORKER_STATE_IDLE_WAITING,
+                      iree_memory_order_release);
 
     iree_hal_deferred_work_queue_worker_process_ready_list(actions);
 
diff --git a/runtime/src/iree/hal/utils/file_transfer.c b/runtime/src/iree/hal/utils/file_transfer.c
index cee1df6..2bc8dec 100644
--- a/runtime/src/iree/hal/utils/file_transfer.c
+++ b/runtime/src/iree/hal/utils/file_transfer.c
@@ -242,8 +242,8 @@
   // steps are part of this transfer.
   IREE_TRACE({
     static iree_atomic_int32_t next_trace_id = IREE_ATOMIC_VAR_INIT(0);
-    operation->trace_id = iree_atomic_fetch_add_int32(
-        &next_trace_id, 1, iree_memory_order_seq_cst);
+    operation->trace_id =
+        iree_atomic_fetch_add(&next_trace_id, 1, iree_memory_order_seq_cst);
     IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, operation->trace_id);
   });
 
diff --git a/runtime/src/iree/task/affinity_set.h b/runtime/src/iree/task/affinity_set.h
index 3dbf756..dfe6a7a 100644
--- a/runtime/src/iree/task/affinity_set.h
+++ b/runtime/src/iree/task/affinity_set.h
@@ -61,25 +61,25 @@
 
 static inline iree_task_affinity_set_t iree_atomic_task_affinity_set_load(
     iree_atomic_task_affinity_set_t* set, iree_memory_order_t order) {
-  return iree_atomic_load_int64(set, order);
+  return iree_atomic_load(set, order);
 }
 
 static inline void iree_atomic_task_affinity_set_store(
     iree_atomic_task_affinity_set_t* set, iree_task_affinity_set_t value,
     iree_memory_order_t order) {
-  iree_atomic_store_int64(set, value, order);
+  iree_atomic_store(set, value, order);
 }
 
 static inline iree_task_affinity_set_t iree_atomic_task_affinity_set_fetch_and(
     iree_atomic_task_affinity_set_t* set, iree_task_affinity_set_t value,
     iree_memory_order_t order) {
-  return iree_atomic_fetch_and_int64(set, value, order);
+  return iree_atomic_fetch_and(set, value, order);
 }
 
 static inline iree_task_affinity_set_t iree_atomic_task_affinity_set_fetch_or(
     iree_atomic_task_affinity_set_t* set, iree_task_affinity_set_t value,
     iree_memory_order_t order) {
-  return iree_atomic_fetch_or_int64(set, value, order);
+  return iree_atomic_fetch_or(set, value, order);
 }
 
 #ifdef __cplusplus
diff --git a/runtime/src/iree/task/executor.c b/runtime/src/iree/task/executor.c
index ff3280a..6fc98e2 100644
--- a/runtime/src/iree/task/executor.c
+++ b/runtime/src/iree/task/executor.c
@@ -103,10 +103,9 @@
   IREE_TRACE({
     static iree_atomic_int32_t executor_id = IREE_ATOMIC_VAR_INIT(0);
     char trace_name[32];
-    int trace_name_length =
-        snprintf(trace_name, sizeof(trace_name), "iree-executor-%d",
-                 iree_atomic_fetch_add_int32(&executor_id, 1,
-                                             iree_memory_order_seq_cst));
+    int trace_name_length = snprintf(
+        trace_name, sizeof(trace_name), "iree-executor-%d",
+        iree_atomic_fetch_add(&executor_id, 1, iree_memory_order_seq_cst));
     IREE_LEAK_CHECK_DISABLE_PUSH();
     executor->trace_name = malloc(trace_name_length + 1);
     memcpy((void*)executor->trace_name, trace_name, trace_name_length + 1);
@@ -540,8 +539,7 @@
     worker_index += offset + 1;
     mask = iree_shr(mask, offset + 1);
     iree_task_worker_t* victim_worker = &executor->workers[victim_index];
-    if (iree_atomic_load_int32(&victim_worker->state,
-                               iree_memory_order_acquire) !=
+    if (iree_atomic_load(&victim_worker->state, iree_memory_order_acquire) !=
         IREE_TASK_WORKER_STATE_RUNNING) {
       return NULL;
     }
diff --git a/runtime/src/iree/task/executor_demo.cc b/runtime/src/iree/task/executor_demo.cc
index 63dba4c..972d16b 100644
--- a/runtime/src/iree/task/executor_demo.cc
+++ b/runtime/src/iree/task/executor_demo.cc
@@ -89,8 +89,8 @@
             IREE_TRACE_SCOPE_NAMED("tile0");
             IREE_ASSERT_EQ(0, user_context);
             simulate_work(tile_context);
-            iree_atomic_fetch_add_int32(&tile_context->statistics->reserved, 1,
-                                        iree_memory_order_relaxed);
+            iree_atomic_fetch_add(&tile_context->statistics->reserved, 1,
+                                  iree_memory_order_relaxed);
             return iree_ok_status();
           },
           0),
@@ -107,8 +107,8 @@
             IREE_TRACE_SCOPE_NAMED("tile1");
             IREE_ASSERT_EQ(0, user_context);
             simulate_work(tile_context);
-            iree_atomic_fetch_add_int32(&tile_context->statistics->reserved, 1,
-                                        iree_memory_order_relaxed);
+            iree_atomic_fetch_add(&tile_context->statistics->reserved, 1,
+                                  iree_memory_order_relaxed);
             return iree_ok_status();
           },
           0),
diff --git a/runtime/src/iree/task/poller.c b/runtime/src/iree/task/poller.c
index e314379..e04aa3b 100644
--- a/runtime/src/iree/task/poller.c
+++ b/runtime/src/iree/task/poller.c
@@ -32,8 +32,8 @@
   // thread as it performs the initial resume of the wait thread. We'll need to
   // check in enqueue to see if the wait thread needs to be resumed.
   // initial_state = IREE_TASK_POLLER_STATE_SUSPENDED;
-  iree_atomic_store_int32(&out_poller->state, initial_state,
-                          iree_memory_order_release);
+  iree_atomic_store(&out_poller->state, initial_state,
+                    iree_memory_order_release);
 
   // Acquire an event we can use to wake the wait thread from other threads.
   iree_status_t status = iree_event_pool_acquire(
@@ -83,7 +83,7 @@
   // If the thread is already in the exiting/zombie state we don't need to do
   // anything.
   iree_task_poller_state_t prev_state =
-      (iree_task_poller_state_t)iree_atomic_exchange_int32(
+      (iree_task_poller_state_t)iree_atomic_exchange(
           &poller->state, IREE_TASK_POLLER_STATE_EXITING,
           iree_memory_order_acq_rel);
   switch (prev_state) {
@@ -93,8 +93,8 @@
       break;
     case IREE_TASK_POLLER_STATE_ZOMBIE:
       // Poller already exited; reset state to ZOMBIE.
-      iree_atomic_store_int32(&poller->state, IREE_TASK_POLLER_STATE_ZOMBIE,
-                              iree_memory_order_release);
+      iree_atomic_store(&poller->state, IREE_TASK_POLLER_STATE_ZOMBIE,
+                        iree_memory_order_release);
       break;
     default:
       // Poller now set to EXITING and should exit soon.
@@ -111,7 +111,7 @@
 // Returns true if the wait thread is in the zombie state (exited and awaiting
 // teardown).
 static bool iree_task_poller_is_zombie(iree_task_poller_t* poller) {
-  return iree_atomic_load_int32(&poller->state, iree_memory_order_acquire) ==
+  return iree_atomic_load(&poller->state, iree_memory_order_acquire) ==
          IREE_TASK_POLLER_STATE_ZOMBIE;
 }
 
@@ -240,8 +240,8 @@
     // scan of tasks.
     wait_status_code = IREE_STATUS_OK;
   } else if (task->cancellation_flag != NULL &&
-             iree_atomic_load_int32(task->cancellation_flag,
-                                    iree_memory_order_acquire) != 0) {
+             iree_atomic_load(task->cancellation_flag,
+                              iree_memory_order_acquire) != 0) {
     // Task was cancelled by the user (or a wait-any). These retire without
     // failure and it's up to the user to handle what happens to them.
     wait_status_code = IREE_STATUS_CANCELLED;
@@ -313,8 +313,8 @@
   // If this was part of a wait-any operation then set the cancellation flag
   // such that other waits are cancelled.
   if (iree_any_bit_set(task->header.flags, IREE_TASK_FLAG_WAIT_ANY)) {
-    if (iree_atomic_fetch_add_int32(task->cancellation_flag, 1,
-                                    iree_memory_order_release) == 0) {
+    if (iree_atomic_fetch_add(task->cancellation_flag, 1,
+                              iree_memory_order_release) == 0) {
       // Ensure we scan again to clean up any potentially cancelled tasks.
       // If this was task 4 in a wait-any list then tasks 0-3 need to be
       // retired.
@@ -429,7 +429,7 @@
 // wait handles were resolved.
 static void iree_task_poller_commit_wait(iree_task_poller_t* poller,
                                          iree_time_t deadline_ns) {
-  if (iree_atomic_load_int32(&poller->state, iree_memory_order_acquire) ==
+  if (iree_atomic_load(&poller->state, iree_memory_order_acquire) ==
       IREE_TASK_POLLER_STATE_EXITING) {
     // Thread exit requested - don't block shutdown.
     return;
@@ -486,7 +486,7 @@
 static void iree_task_poller_pump_until_exit(iree_task_poller_t* poller) {
   while (true) {
     // Check state to see if we've been asked to exit.
-    if (iree_atomic_load_int32(&poller->state, iree_memory_order_acquire) ==
+    if (iree_atomic_load(&poller->state, iree_memory_order_acquire) ==
         IREE_TASK_POLLER_STATE_EXITING) {
       // Thread exit requested - cancel pumping.
       break;
@@ -536,8 +536,8 @@
   // to exit while suspended/still starting up, so check that here before we
   // mess with any data structures.
   const bool should_run =
-      iree_atomic_exchange_int32(&poller->state, IREE_TASK_POLLER_STATE_RUNNING,
-                                 iree_memory_order_acq_rel) !=
+      iree_atomic_exchange(&poller->state, IREE_TASK_POLLER_STATE_RUNNING,
+                           iree_memory_order_acq_rel) !=
       IREE_TASK_POLLER_STATE_EXITING;
   if (IREE_LIKELY(should_run)) {
     // << work happens here >>
@@ -545,8 +545,8 @@
   }
 
   IREE_TRACE_ZONE_END(thread_zone);
-  iree_atomic_store_int32(&poller->state, IREE_TASK_POLLER_STATE_ZOMBIE,
-                          iree_memory_order_release);
+  iree_atomic_store(&poller->state, IREE_TASK_POLLER_STATE_ZOMBIE,
+                    iree_memory_order_release);
   iree_notification_post(&poller->state_notification, IREE_ALL_WAITERS);
   return 0;
 }
diff --git a/runtime/src/iree/task/scope.c b/runtime/src/iree/task/scope.c
index 3ccf6ae..a777d3d 100644
--- a/runtime/src/iree/task/scope.c
+++ b/runtime/src/iree/task/scope.c
@@ -49,12 +49,12 @@
   memset(scope->name, 0xCD, sizeof(scope->name));
 
   // In most cases the status will have been consumed by the scope owner.
-  iree_status_t status = (iree_status_t)iree_atomic_exchange_intptr(
+  iree_status_t status = (iree_status_t)iree_atomic_exchange(
       &scope->permanent_status, (intptr_t)NULL, iree_memory_order_acquire);
   IREE_IGNORE_ERROR(status);
 
-  while (iree_atomic_load_int32(&scope->pending_idle_notification_posts,
-                                iree_memory_order_acquire)) {
+  while (iree_atomic_load(&scope->pending_idle_notification_posts,
+                          iree_memory_order_acquire)) {
     iree_thread_yield();
   }
   iree_notification_deinitialize(&scope->idle_notification);
@@ -74,14 +74,14 @@
 }
 
 bool iree_task_scope_has_failed(iree_task_scope_t* scope) {
-  return iree_atomic_load_intptr(&scope->permanent_status,
-                                 iree_memory_order_acquire) != 0;
+  return iree_atomic_load(&scope->permanent_status,
+                          iree_memory_order_acquire) != 0;
 }
 
 iree_status_t iree_task_scope_consume_status(iree_task_scope_t* scope) {
   iree_status_t old_status = iree_ok_status();
   iree_status_t new_status = iree_ok_status();
-  while (!iree_atomic_compare_exchange_strong_intptr(
+  while (!iree_atomic_compare_exchange_strong(
       &scope->permanent_status, (intptr_t*)&old_status, (intptr_t)new_status,
       iree_memory_order_acq_rel,
       iree_memory_order_acquire /* old_status is actually used */)) {
@@ -114,7 +114,7 @@
   }
 
   iree_status_t old_status = iree_ok_status();
-  if (!iree_atomic_compare_exchange_strong_intptr(
+  if (!iree_atomic_compare_exchange_strong(
           &scope->permanent_status, (intptr_t*)&old_status,
           (intptr_t)new_status, iree_memory_order_acq_rel,
           iree_memory_order_relaxed /* old_status is unused */)) {
@@ -140,16 +140,16 @@
   // relaxed because this 'begin' call will be paired with a 'end' call that
   // will perform the release-store, and this value is only read by
   // 'deinitialize'.
-  iree_atomic_store_int32(&scope->pending_idle_notification_posts, 1,
-                          iree_memory_order_relaxed);
+  iree_atomic_store(&scope->pending_idle_notification_posts, 1,
+                    iree_memory_order_relaxed);
 }
 
 void iree_task_scope_end(iree_task_scope_t* scope) {
   if (iree_atomic_ref_count_dec(&scope->pending_submissions) == 1) {
     // All submissions have completed in this scope - notify any waiters.
     iree_notification_post(&scope->idle_notification, IREE_ALL_WAITERS);
-    iree_atomic_store_int32(&scope->pending_idle_notification_posts, 0,
-                            iree_memory_order_release);
+    iree_atomic_store(&scope->pending_idle_notification_posts, 0,
+                      iree_memory_order_release);
   }
 }
 
diff --git a/runtime/src/iree/task/task.c b/runtime/src/iree/task/task.c
index ae4fbf9..d0e4010 100644
--- a/runtime/src/iree/task/task.c
+++ b/runtime/src/iree/task/task.c
@@ -39,13 +39,13 @@
                                    iree_task_t* completion_task) {
   IREE_ASSERT(!task->completion_task);
   task->completion_task = completion_task;
-  iree_atomic_fetch_add_int32(&completion_task->pending_dependency_count, 1,
-                              iree_memory_order_acq_rel);
+  iree_atomic_fetch_add(&completion_task->pending_dependency_count, 1,
+                        iree_memory_order_acq_rel);
 }
 
 bool iree_task_is_ready(iree_task_t* task) {
-  if (iree_atomic_load_int32(&task->pending_dependency_count,
-                             iree_memory_order_acquire) > 0) {
+  if (iree_atomic_load(&task->pending_dependency_count,
+                       iree_memory_order_acquire) > 0) {
     // At least one dependency is still pending.
     return false;
   }
@@ -62,7 +62,7 @@
       z0, iree_status_code_string(iree_status_code(new_status)));
 
   iree_status_t old_status = iree_ok_status();
-  if (!iree_atomic_compare_exchange_strong_intptr(
+  if (!iree_atomic_compare_exchange_strong(
           permanent_status, (intptr_t*)&old_status, (intptr_t)new_status,
           iree_memory_order_acq_rel,
           iree_memory_order_relaxed /* old_status is unused */)) {
@@ -102,16 +102,15 @@
   // tasks in the appropriate order: if we had a DAG of A -> B, C -> D we must
   // discard respecting the same topological ordering.
 
-  IREE_ASSERT_EQ(0, iree_atomic_load_int32(&task->pending_dependency_count,
-                                           iree_memory_order_acquire));
+  IREE_ASSERT_EQ(0, iree_atomic_load(&task->pending_dependency_count,
+                                     iree_memory_order_acquire));
 
   // Almost all tasks will have a completion task; some may have additional
   // dependent tasks (like barriers) that will be handled below.
   const bool completion_task_ready =
       task->completion_task &&
-      iree_atomic_fetch_sub_int32(
-          &task->completion_task->pending_dependency_count, 1,
-          iree_memory_order_acq_rel) == 1;
+      iree_atomic_fetch_sub(&task->completion_task->pending_dependency_count, 1,
+                            iree_memory_order_acq_rel) == 1;
   if (completion_task_ready) {
     iree_task_list_push_back(discard_worklist, task->completion_task);
   }
@@ -147,8 +146,8 @@
 static void iree_task_retire(iree_task_t* task,
                              iree_task_submission_t* pending_submission,
                              iree_status_t status) {
-  IREE_ASSERT_EQ(0, iree_atomic_load_int32(&task->pending_dependency_count,
-                                           iree_memory_order_acquire));
+  IREE_ASSERT_EQ(0, iree_atomic_load(&task->pending_dependency_count,
+                                     iree_memory_order_acquire));
 
   // Decrement the pending count on the completion task, if any.
   iree_task_t* completion_task = task->completion_task;
@@ -159,8 +158,8 @@
     iree_task_cleanup(task, IREE_STATUS_OK);
     bool completion_task_ready =
         completion_task &&
-        iree_atomic_fetch_sub_int32(&completion_task->pending_dependency_count,
-                                    1, iree_memory_order_acq_rel) == 1;
+        iree_atomic_fetch_sub(&completion_task->pending_dependency_count, 1,
+                              iree_memory_order_acq_rel) == 1;
     if (completion_task_ready) {
       // This was the last pending dependency and the completion task is ready
       // to run.
@@ -180,8 +179,8 @@
 
     bool completion_task_ready =
         completion_task &&
-        iree_atomic_fetch_sub_int32(&completion_task->pending_dependency_count,
-                                    1, iree_memory_order_acq_rel) == 1;
+        iree_atomic_fetch_sub(&completion_task->pending_dependency_count, 1,
+                              iree_memory_order_acq_rel) == 1;
     if (completion_task_ready) {
       // This was the last pending dependency and we know that we can safely
       // abort the completion task by discarding.
@@ -239,7 +238,7 @@
                                iree_task_call_t* out_task) {
   iree_task_initialize(IREE_TASK_TYPE_CALL, scope, &out_task->header);
   out_task->closure = closure;
-  iree_atomic_store_intptr(&out_task->status, 0, iree_memory_order_release);
+  iree_atomic_store(&out_task->status, 0, iree_memory_order_release);
 }
 
 void iree_task_call_execute(iree_task_call_t* task,
@@ -272,9 +271,9 @@
 
   // Check to see if there are no pending dependencies before retiring; the
   // dependency count can go up if new nested tasks were enqueued.
-  if (iree_atomic_load_int32(&task->header.pending_dependency_count,
-                             iree_memory_order_acquire) == 0) {
-    iree_status_t status = (iree_status_t)iree_atomic_exchange_intptr(
+  if (iree_atomic_load(&task->header.pending_dependency_count,
+                       iree_memory_order_acquire) == 0) {
+    iree_status_t status = (iree_status_t)iree_atomic_exchange(
         &task->status, 0, iree_memory_order_acq_rel);
     iree_task_retire(&task->header, pending_submission, status);
   }
@@ -295,8 +294,8 @@
   out_task->dependent_tasks = dependent_tasks;
   for (iree_host_size_t i = 0; i < out_task->dependent_task_count; ++i) {
     iree_task_t* dependent_task = out_task->dependent_tasks[i];
-    iree_atomic_fetch_add_int32(&dependent_task->pending_dependency_count, 1,
-                                iree_memory_order_acq_rel);
+    iree_atomic_fetch_add(&dependent_task->pending_dependency_count, 1,
+                          iree_memory_order_acq_rel);
   }
 }
 
@@ -314,8 +313,8 @@
   task->dependent_tasks = dependent_tasks;
   for (iree_host_size_t i = 0; i < task->dependent_task_count; ++i) {
     iree_task_t* dependent_task = task->dependent_tasks[i];
-    iree_atomic_fetch_add_int32(&dependent_task->pending_dependency_count, 1,
-                                iree_memory_order_acq_rel);
+    iree_atomic_fetch_add(&dependent_task->pending_dependency_count, 1,
+                          iree_memory_order_acq_rel);
   }
 }
 
@@ -329,8 +328,8 @@
   for (iree_host_size_t i = 0; i < task->dependent_task_count; ++i) {
     iree_task_t* dependent_task = task->dependent_tasks[i];
     const bool dependent_task_ready =
-        iree_atomic_fetch_sub_int32(&dependent_task->pending_dependency_count,
-                                    1, iree_memory_order_acq_rel) == 1;
+        iree_atomic_fetch_sub(&dependent_task->pending_dependency_count, 1,
+                              iree_memory_order_acq_rel) == 1;
     if (dependent_task_ready) {
       // The dependent task has retired and can now be discard.
       iree_task_list_push_back(discard_worklist, dependent_task);
@@ -348,8 +347,8 @@
   for (iree_host_size_t i = 0; i < task->dependent_task_count; ++i) {
     iree_task_t* dependent_task =
         task->dependent_tasks[task->dependent_task_count - i - 1];
-    if (iree_atomic_fetch_sub_int32(&dependent_task->pending_dependency_count,
-                                    1, iree_memory_order_acq_rel) == 1) {
+    if (iree_atomic_fetch_sub(&dependent_task->pending_dependency_count, 1,
+                              iree_memory_order_acq_rel) == 1) {
       // The dependent task has retired and can now be made ready.
       iree_task_submission_enqueue(pending_submission, dependent_task);
     }
@@ -530,13 +529,13 @@
   memcpy(out_task->workgroup_size, workgroup_size,
          sizeof(out_task->workgroup_size));
   out_task->local_memory_size = 0;
-  iree_atomic_store_intptr(&out_task->status, 0, iree_memory_order_release);
+  iree_atomic_store(&out_task->status, 0, iree_memory_order_release);
   memset(&out_task->statistics, 0, sizeof(out_task->statistics));
 
   IREE_TRACE({
     static iree_atomic_int64_t next_dispatch_id = IREE_ATOMIC_VAR_INIT(0);
-    out_task->dispatch_id = iree_atomic_fetch_add_int64(
-        &next_dispatch_id, 1ll, iree_memory_order_acq_rel);
+    out_task->dispatch_id = iree_atomic_fetch_add(&next_dispatch_id, 1ll,
+                                                  iree_memory_order_acq_rel);
   });
 }
 
@@ -597,8 +596,7 @@
 #endif  // IREE_HAL_VERBOSE_TRACING_ENABLE
 
   // Setup the iteration space for shards to pull work from the complete grid.
-  iree_atomic_store_int32(&dispatch_task->tile_index, 0,
-                          iree_memory_order_relaxed);
+  iree_atomic_store(&dispatch_task->tile_index, 0, iree_memory_order_relaxed);
   dispatch_task->tile_count =
       workgroup_count[0] * workgroup_count[1] * workgroup_count[2];
 
@@ -672,7 +670,7 @@
   // any other has hit an error; failure in a dispatch should be so exceedingly
   // rare that allowing some shards to complete after one encounters an error is
   // not a problem.
-  iree_status_t status = (iree_status_t)iree_atomic_exchange_intptr(
+  iree_status_t status = (iree_status_t)iree_atomic_exchange(
       &dispatch_task->status, 0, iree_memory_order_acq_rel);
 
   iree_task_retire(&dispatch_task->header, pending_submission, status);
@@ -763,9 +761,9 @@
   const uint32_t tiles_per_reservation = dispatch_task->tiles_per_reservation;
   // relaxed order because we only care about atomic increments, not about
   // ordering of tile_index accesses w.r.t. other memory accesses.
-  uint32_t tile_base = iree_atomic_fetch_add_int32(&dispatch_task->tile_index,
-                                                   tiles_per_reservation,
-                                                   iree_memory_order_relaxed);
+  uint32_t tile_base =
+      iree_atomic_fetch_add(&dispatch_task->tile_index, tiles_per_reservation,
+                            iree_memory_order_relaxed);
   while (tile_base < tile_count) {
     const uint32_t tile_range =
         iree_min(tile_base + tiles_per_reservation, tile_count);
@@ -813,9 +811,9 @@
     }
 
     // Try to grab the next slice of tiles.
-    tile_base = iree_atomic_fetch_add_int32(&dispatch_task->tile_index,
-                                            tiles_per_reservation,
-                                            iree_memory_order_relaxed);
+    tile_base =
+        iree_atomic_fetch_add(&dispatch_task->tile_index, tiles_per_reservation,
+                              iree_memory_order_relaxed);
   }
 abort_shard:
 
diff --git a/runtime/src/iree/task/task_test_dispatch.cc b/runtime/src/iree/task/task_test_dispatch.cc
index 3324b6c..b18c26e 100644
--- a/runtime/src/iree/task/task_test_dispatch.cc
+++ b/runtime/src/iree/task/task_test_dispatch.cc
@@ -35,8 +35,7 @@
   bool Verify() {
     fflush(stdout);
     for (iree_host_size_t i = 0; i < workgroup_count_; ++i) {
-      if (iree_atomic_load_int32(&storage_[i], iree_memory_order_seq_cst) !=
-          1) {
+      if (iree_atomic_load(&storage_[i], iree_memory_order_seq_cst) != 1) {
         return false;
       }
     }
@@ -52,8 +51,8 @@
                                           tile_context->workgroup_count[0]) +
         tile_context->workgroup_xyz[1] * tile_context->workgroup_count[0] +
         tile_context->workgroup_xyz[0];
-    iree_atomic_fetch_add_int32(&coverage->storage_[slot], 1,
-                                iree_memory_order_seq_cst);
+    iree_atomic_fetch_add(&coverage->storage_[slot], 1,
+                          iree_memory_order_seq_cst);
 
     // Useful when testing large grids:
     // printf("%u, %u, %u\n", tile_context->workgroup_xyz[0],
diff --git a/runtime/src/iree/task/worker.c b/runtime/src/iree/task/worker.c
index 5bebaa5..e0e1efd 100644
--- a/runtime/src/iree/task/worker.c
+++ b/runtime/src/iree/task/worker.c
@@ -48,8 +48,8 @@
   iree_task_queue_initialize(&out_worker->local_task_queue);
 
   iree_task_worker_state_t initial_state = IREE_TASK_WORKER_STATE_RUNNING;
-  iree_atomic_store_int32(&out_worker->state, initial_state,
-                          iree_memory_order_release);
+  iree_atomic_store(&out_worker->state, initial_state,
+                    iree_memory_order_release);
 
   iree_thread_create_params_t thread_params;
   memset(&thread_params, 0, sizeof(thread_params));
@@ -78,14 +78,14 @@
   // If the thread is already in the exiting/zombie state we don't need to do
   // anything.
   iree_task_worker_state_t prev_state =
-      (iree_task_worker_state_t)iree_atomic_exchange_int32(
+      (iree_task_worker_state_t)iree_atomic_exchange(
           &worker->state, IREE_TASK_WORKER_STATE_EXITING,
           iree_memory_order_acq_rel);
   switch (prev_state) {
     case IREE_TASK_WORKER_STATE_ZOMBIE:
       // Worker already exited; reset state to ZOMBIE.
-      iree_atomic_store_int32(&worker->state, IREE_TASK_WORKER_STATE_ZOMBIE,
-                              iree_memory_order_release);
+      iree_atomic_store(&worker->state, IREE_TASK_WORKER_STATE_ZOMBIE,
+                        iree_memory_order_release);
       break;
     default:
       // Worker now set to EXITING and should exit soon.
@@ -101,7 +101,7 @@
 // Returns true if the worker is in the zombie state (exited and awaiting
 // teardown).
 static bool iree_task_worker_is_zombie(iree_task_worker_t* worker) {
-  return iree_atomic_load_int32(&worker->state, iree_memory_order_acquire) ==
+  return iree_atomic_load(&worker->state, iree_memory_order_acquire) ==
          IREE_TASK_WORKER_STATE_ZOMBIE;
 }
 
@@ -310,7 +310,7 @@
     iree_task_worker_mark_active(worker);
 
     // Check state to see if we've been asked to exit.
-    if (iree_atomic_load_int32(&worker->state, iree_memory_order_acquire) ==
+    if (iree_atomic_load(&worker->state, iree_memory_order_acquire) ==
         IREE_TASK_WORKER_STATE_EXITING) {
       // Thread exit requested - cancel pumping.
       iree_notification_cancel_wait(&worker->wake_notification);
@@ -395,8 +395,8 @@
   // to exit while suspended/still starting up, so check that here before we
   // mess with any data structures.
   const bool should_run =
-      iree_atomic_exchange_int32(&worker->state, IREE_TASK_WORKER_STATE_RUNNING,
-                                 iree_memory_order_acq_rel) !=
+      iree_atomic_exchange(&worker->state, IREE_TASK_WORKER_STATE_RUNNING,
+                           iree_memory_order_acq_rel) !=
       IREE_TASK_WORKER_STATE_EXITING;
   if (IREE_LIKELY(should_run)) {
     // << work happens here >>
@@ -407,8 +407,8 @@
   iree_task_worker_mark_idle(worker);
 
   IREE_TRACE_ZONE_END(thread_zone);
-  iree_atomic_store_int32(&worker->state, IREE_TASK_WORKER_STATE_ZOMBIE,
-                          iree_memory_order_release);
+  iree_atomic_store(&worker->state, IREE_TASK_WORKER_STATE_ZOMBIE,
+                    iree_memory_order_release);
   iree_notification_post(&worker->state_notification, IREE_ALL_WAITERS);
   return 0;
 }
diff --git a/runtime/src/iree/vm/context.c b/runtime/src/iree/vm/context.c
index d55e67f..3a1fc23 100644
--- a/runtime/src/iree/vm/context.c
+++ b/runtime/src/iree/vm/context.c
@@ -51,8 +51,8 @@
   static iree_atomic_int32_t next_context_id = IREE_ATOMIC_VAR_INIT(1);
   // relaxed because we only care about atomic increments, not ordering w.r.t.
   // other memory accesses.
-  uint32_t context_id = iree_atomic_fetch_add_int32(&next_context_id, 1,
-                                                    iree_memory_order_relaxed);
+  uint32_t context_id =
+      iree_atomic_fetch_add(&next_context_id, 1, iree_memory_order_relaxed);
 #if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_FIBERS
   // This is what we pass to Tracy as the fiber name.
   // The string must remain live for the lifetime of the process.
diff --git a/runtime/src/iree/vm/invocation.c b/runtime/src/iree/vm/invocation.c
index 2ba5bab..d3fe20a 100644
--- a/runtime/src/iree/vm/invocation.c
+++ b/runtime/src/iree/vm/invocation.c
@@ -226,8 +226,8 @@
     // The string must remain live for the lifetime of the process.
     // TODO(benvanik): name it based on the function?
     static iree_atomic_int32_t next_invocation_id = IREE_ATOMIC_VAR_INIT(1);
-    uint32_t invocation_id = iree_atomic_fetch_add_int32(
-        &next_invocation_id, 1, iree_memory_order_relaxed);
+    uint32_t invocation_id = iree_atomic_fetch_add(&next_invocation_id, 1,
+                                                   iree_memory_order_relaxed);
     IREE_LEAK_CHECK_DISABLE_PUSH();
     char* name = (char*)malloc(32);
     snprintf(name, 32, "invoke-%04d", invocation_id - 1);
diff --git a/runtime/src/iree/vm/ref.c b/runtime/src/iree/vm/ref.c
index 3d5f255..fe33136 100644
--- a/runtime/src/iree/vm/ref.c
+++ b/runtime/src/iree/vm/ref.c
@@ -12,15 +12,15 @@
 
 // Useful debugging tool:
 #if 0
-static inline volatile iree_atomic_ref_count_t* iree_vm_get_raw_counter_ptr(
+static inline iree_atomic_ref_count_t* iree_vm_get_raw_counter_ptr(
     void* ptr, iree_vm_ref_type_t type);
 
-static inline volatile iree_atomic_ref_count_t* iree_vm_get_ref_counter_ptr(
+static inline iree_atomic_ref_count_t* iree_vm_get_ref_counter_ptr(
     iree_vm_ref_t* ref);
 
 static void iree_vm_ref_trace(const char* msg, iree_vm_ref_t* ref) {
   if (!ref->ptr) return;
-  volatile iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref);
+  iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref);
   iree_string_view_t name = iree_vm_ref_type_name(ref->type);
   fprintf(stderr, "%s %.*s 0x%p %d\n", msg, (int)name.size, name.data, ref->ptr,
           iree_atomic_ref_count_load(counter));
@@ -28,7 +28,7 @@
 static void iree_vm_ref_ptr_trace(const char* msg, void* ptr,
                                   iree_vm_ref_type_t type) {
   if (!ptr) return;
-  volatile iree_atomic_ref_count_t* counter =
+  iree_atomic_ref_count_t* counter =
       iree_vm_get_raw_counter_ptr(ptr, type);
   iree_string_view_t name = iree_vm_ref_type_name(type);
   fprintf(stderr, "%s %.*s 0x%p %d\n", msg, (int)name.size, name.data, ptr,
@@ -45,19 +45,18 @@
   return iree_vm_ref_type_descriptor(type)->type_name;
 }
 
-static inline volatile iree_atomic_ref_count_t* iree_vm_get_raw_counter_ptr(
+static inline iree_atomic_ref_count_t* iree_vm_get_raw_counter_ptr(
     void* ptr, iree_vm_ref_type_t type) {
   IREE_VM_REF_ASSERT(ptr);
   IREE_VM_REF_ASSERT(type_descriptor);
-  return (volatile iree_atomic_ref_count_t*)ptr +
-         (type & IREE_VM_REF_TYPE_TAG_BIT_MASK);
+  return (iree_atomic_ref_count_t*)ptr + (type & IREE_VM_REF_TYPE_TAG_BIT_MASK);
 }
 
-static inline volatile iree_atomic_ref_count_t* iree_vm_get_ref_counter_ptr(
+static inline iree_atomic_ref_count_t* iree_vm_get_ref_counter_ptr(
     iree_vm_ref_t* ref) {
   IREE_VM_REF_ASSERT(ref);
   IREE_VM_REF_ASSERT(ref->ptr);
-  return (volatile iree_atomic_ref_count_t*)ref->ptr +
+  return (iree_atomic_ref_count_t*)ref->ptr +
          (ref->type & IREE_VM_REF_TYPE_TAG_BIT_MASK);
 }
 
@@ -65,8 +64,7 @@
                                                iree_vm_ref_type_t type) {
   if (!ptr) return;
   IREE_VM_REF_ASSERT(type);
-  volatile iree_atomic_ref_count_t* counter =
-      iree_vm_get_raw_counter_ptr(ptr, type);
+  iree_atomic_ref_count_t* counter = iree_vm_get_raw_counter_ptr(ptr, type);
   iree_atomic_ref_count_inc(counter);
   iree_vm_ref_ptr_trace("RETAIN", ptr, type);
 }
@@ -76,8 +74,7 @@
   if (!ptr) return;
   IREE_VM_REF_ASSERT(type);
   iree_vm_ref_ptr_trace("RELEASE", ptr, type);
-  volatile iree_atomic_ref_count_t* counter =
-      iree_vm_get_raw_counter_ptr(ptr, type);
+  iree_atomic_ref_count_t* counter = iree_vm_get_raw_counter_ptr(ptr, type);
   if (iree_atomic_ref_count_dec(counter) == 1) {
     const iree_vm_ref_type_descriptor_t* descriptor =
         iree_vm_ref_type_descriptor(type);
@@ -130,8 +127,7 @@
   out_ref->ptr = ptr;
   out_ref->type = type;
   if (out_ref->ptr) {
-    volatile iree_atomic_ref_count_t* counter =
-        iree_vm_get_ref_counter_ptr(out_ref);
+    iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(out_ref);
     iree_atomic_ref_count_inc(counter);
     iree_vm_ref_trace("WRAP RETAIN", out_ref);
   }
@@ -142,8 +138,7 @@
 IREE_API_EXPORT void iree_vm_ref_retain_inplace(iree_vm_ref_t* ref) {
   IREE_VM_REF_ASSERT(ref);
   if (ref->ptr) {
-    volatile iree_atomic_ref_count_t* counter =
-        iree_vm_get_ref_counter_ptr(ref);
+    iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref);
     iree_atomic_ref_count_inc(counter);
     iree_vm_ref_trace("RETAIN", ref);
   }
@@ -157,8 +152,7 @@
   IREE_VM_REF_ASSERT(out_ref);
   iree_vm_ref_t temp_ref = *ref;
   if (ref->ptr) {
-    volatile iree_atomic_ref_count_t* counter =
-        iree_vm_get_ref_counter_ptr(ref);
+    iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref);
     iree_atomic_ref_count_inc(counter);
     iree_vm_ref_trace("RETAIN", ref);
   }
@@ -217,7 +211,7 @@
   if (ref->type == IREE_VM_REF_TYPE_NULL || ref->ptr == NULL) return;
 
   iree_vm_ref_trace("RELEASE", ref);
-  volatile iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref);
+  iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref);
   if (iree_atomic_ref_count_dec(counter) == 1) {
     const iree_vm_ref_type_descriptor_t* descriptor =
         iree_vm_ref_type_descriptor(ref->type);
diff --git a/runtime/src/iree/vm/ref_test.cc b/runtime/src/iree/vm/ref_test.cc
index 68eaa5e..5260749 100644
--- a/runtime/src/iree/vm/ref_test.cc
+++ b/runtime/src/iree/vm/ref_test.cc
@@ -73,9 +73,9 @@
 // WARNING: this is an implementation detail and must never be relied on - it's
 // only here to test the expected behavior.
 static int32_t ReadCounter(iree_vm_ref_t* ref) {
-  return iree_atomic_load_int32((iree_atomic_ref_count_t*)ref->ptr +
-                                    (ref->type & IREE_VM_REF_TYPE_TAG_BIT_MASK),
-                                iree_memory_order_seq_cst);
+  return iree_atomic_load((iree_atomic_ref_count_t*)ref->ptr +
+                              (ref->type & IREE_VM_REF_TYPE_TAG_BIT_MASK),
+                          iree_memory_order_seq_cst);
 }
 
 }  // namespace