Remove message queues and semaphores from the scheduler.

Message queues are now provided by a shared library (for communication
within a compartment) and a compartment (for communication between
mutually distrusting compartments).  The version in the library can also
be used between compartments with some care.

Along the way:

 - Allow LockGuard to work nicely with timeouts.
 - Make the C11/C++11 atomics sufficiently complete that we can claim to
   support them.
 - Allow check_pointer to skip the stack tests, for use in shared
   libraries.

The multiwaiter can still be used with message queues, but requires a
call to set up the wait object.
diff --git a/examples/06.producer-consumer/consumer.cc b/examples/06.producer-consumer/consumer.cc
index 6e8ba0f..203317e 100644
--- a/examples/06.producer-consumer/consumer.cc
+++ b/examples/06.producer-consumer/consumer.cc
@@ -7,20 +7,21 @@
 #include <futex.h>
 #include <queue.h>
 #include <timeout.hh>
+#include <token.h>
 
 using Debug = ConditionalDebug<true, "Consumer">;
 
 // The queue that we will wait on.
-void *queue;
+SObj queue;
 
 /**
  * Set the queue that the thread in this compartment will use.
  */
-void set_queue(void *newQueue)
+void set_queue(SObj newQueue)
 {
 	// Check that this is a valid queue
 	size_t items;
-	if (queue_items_remaining(newQueue, &items) != 0)
+	if (queue_items_remaining_sealed(newQueue, &items) != 0)
 	{
 		return;
 	}
@@ -43,7 +44,7 @@
 	// Get a message from the queue and print it.  This blocks indefinitely.
 	int     value = 0;
 	Timeout t{UnlimitedTimeout};
-	while ((value != 199) && (queue_recv(&t, queue, &value) == 0))
+	while ((value != 199) && (queue_receive_sealed(&t, queue, &value) == 0))
 	{
 		Debug::log("Read {} from queue", value);
 	}
diff --git a/examples/06.producer-consumer/consumer.h b/examples/06.producer-consumer/consumer.h
index 54adb8f..03e875e 100644
--- a/examples/06.producer-consumer/consumer.h
+++ b/examples/06.producer-consumer/consumer.h
@@ -2,5 +2,6 @@
 // SPDX-License-Identifier: MIT
 
 #include <compartment.h>
+#include <cstdlib>
 
-void __cheri_compartment("consumer") set_queue(void *queue);
+void __cheri_compartment("consumer") set_queue(SObjStruct *queueHandle);
diff --git a/examples/06.producer-consumer/producer.cc b/examples/06.producer-consumer/producer.cc
index 6b2a816..a4ea318 100644
--- a/examples/06.producer-consumer/producer.cc
+++ b/examples/06.producer-consumer/producer.cc
@@ -6,6 +6,7 @@
 #include <fail-simulator-on-error.h>
 #include <queue.h>
 #include <timeout.hh>
+#include <token.h>
 
 using Debug = ConditionalDebug<true, "Producer">;
 
@@ -15,15 +16,17 @@
 void __cheri_compartment("producer") run()
 {
 	// Allocate the queue
-	void *queue;
-	non_blocking<queue_create>(MALLOC_CAPABILITY, &queue, sizeof(int), 16);
+	SObj sendHandle;
+	SObj receiveHandle;
+	non_blocking<queue_create_sealed>(
+	  MALLOC_CAPABILITY, &sendHandle, &receiveHandle, sizeof(int), 16);
 	// Pass the queue handle to the consumer.
-	set_queue(queue);
+	set_queue(receiveHandle);
 	Debug::log("Starting producer loop");
 	// Loop, sending some numbers to the other thread.
 	for (int i = 1; i < 200; i++)
 	{
-		int ret = blocking_forever<queue_send>(queue, &i);
+		int ret = blocking_forever<queue_send_sealed>(sendHandle, &i);
 		// Abort if the queue send errors.
 		Debug::Invariant(ret == 0, "Queue send failed {}", ret);
 	}
diff --git a/examples/06.producer-consumer/xmake.lua b/examples/06.producer-consumer/xmake.lua
index c5d6642..fb9e7ca 100644
--- a/examples/06.producer-consumer/xmake.lua
+++ b/examples/06.producer-consumer/xmake.lua
@@ -9,6 +9,8 @@
 -- Support libraries
 includes(path.join(sdkdir, "lib/freestanding"),
          path.join(sdkdir, "lib/atomic"),
+         path.join(sdkdir, "lib/queue"),
+         path.join(sdkdir, "lib/compartment_helpers"),
          path.join(sdkdir, "lib/crt"))
 
 option("board")
@@ -22,7 +24,7 @@
 
 -- Firmware image for the example.
 firmware("producer-consumer")
-    add_deps("crt", "freestanding", "atomic_fixed")
+    add_deps("crt", "freestanding", "atomic_fixed", "message_queue", "message_queue_library", "compartment_helpers")
     add_deps("producer", "consumer")
     on_load(function(target)
         target:values_set("board", "$(board)")
@@ -31,8 +33,8 @@
                 compartment = "producer",
                 priority = 1,
                 entry_point = "run",
-                stack_size = 0x400,
-                trusted_stack_frames = 3
+                stack_size = 0x500,
+                trusted_stack_frames = 5
             },
             {
                 compartment = "consumer",
diff --git a/sdk/core/scheduler/main.cc b/sdk/core/scheduler/main.cc
index c674296..4f8007b 100644
--- a/sdk/core/scheduler/main.cc
+++ b/sdk/core/scheduler/main.cc
@@ -419,125 +419,6 @@
 
 using namespace sched;
 
-// queue APIs
-int __cheri_compartment("sched") queue_create(Timeout           *timeout,
-                                              struct SObjStruct *heapCapability,
-                                              void             **ret,
-                                              size_t             itemSize,
-                                              size_t             maxNItems)
-{
-	HeapBuffer storage{timeout, heapCapability, itemSize, maxNItems};
-
-	if (!storage)
-	{
-		return -ENOMEM;
-	}
-
-	HeapObject<Queue> queue{
-	  timeout, heapCapability, std::move(storage), itemSize, maxNItems};
-
-	if (!queue)
-	{
-		return -ENOMEM;
-	}
-	return write_result(ret, queue);
-}
-
-[[cheri::interrupt_state(disabled)]] int __cheri_compartment("sched")
-  queue_delete(struct SObjStruct *heapCapability, void *queue)
-{
-	return deallocate<Queue>(heapCapability, queue);
-}
-
-[[cheri::interrupt_state(disabled)]] int __cheri_compartment("sched")
-  queue_items_remaining(void *que, size_t *ret)
-{
-	if (!check_pointer<PermissionSet{Permission::Store}>(ret))
-	{
-		return -EINVAL;
-	}
-	return typed_op<Queue>(que, [&](Queue &queue) {
-		*ret = queue.items_remaining();
-		return 0;
-	});
-}
-
-[[cheri::interrupt_state(disabled)]] int __cheri_compartment("sched")
-  queue_send(Timeout *timeout, void *que, const void *src)
-{
-	return typed_op<Queue>(que, [&](Queue &queue) {
-		if (!check_pointer<PermissionSet{Permission::Load}>(
-		      src, queue.item_size()) ||
-		    !check_timeout_pointer(timeout))
-		{
-			return std::pair{-EINVAL, false};
-		}
-		return queue.send(src, timeout);
-	});
-}
-
-[[cheri::interrupt_state(disabled)]] int __cheri_compartment("sched")
-  queue_recv(Timeout *timeout, void *que, void *dst)
-{
-	return typed_op<Queue>(que, [&](Queue &queue) {
-		// TODO: We may need to sink this check down further because we may
-		// also require store-capability if the message contains
-		// capabilities.
-		if (!check_pointer<PermissionSet{Permission::Store}>(
-		      dst, queue.item_size()) ||
-		    !check_timeout_pointer(timeout))
-		{
-			return std::pair{-EINVAL, false};
-		}
-		return queue.recv(dst, timeout);
-	});
-}
-
-int __cheri_compartment("sched")
-  semaphore_create(Timeout           *timeout,
-                   struct SObjStruct *heapCapability,
-                   void             **ret,
-                   size_t             maxNItems)
-{
-	HeapObject<Queue> queue{timeout, heapCapability, nullptr, 0, maxNItems};
-	if (!queue)
-	{
-		return -ENOMEM;
-	}
-
-	return write_result(ret, queue);
-}
-
-[[cheri::interrupt_state(disabled)]] int __cheri_compartment("sched")
-  semaphore_delete(struct SObjStruct *heapCapability, void *sema)
-{
-	return deallocate<Queue>(heapCapability, sema);
-}
-
-[[cheri::interrupt_state(disabled)]] int __cheri_compartment("sched")
-  semaphore_take(Timeout *timeout, void *sema)
-{
-	return typed_op<Queue>(sema, [&](Queue &queue) {
-		if (!queue.is_semaphore())
-		{
-			return std::pair{-EINVAL, false};
-		}
-		return queue.send(nullptr, timeout);
-	});
-}
-
-[[cheri::interrupt_state(disabled)]] int __cheri_compartment("sched")
-  semaphore_give(Timeout *timeout, void *sema)
-{
-	return typed_op<Queue>(sema, [&](Queue &queue) {
-		if (!queue.is_semaphore())
-		{
-			return std::pair{-EINVAL, false};
-		}
-		return queue.recv(nullptr, timeout);
-	});
-}
-
 // thread APIs
 SystickReturn __cheri_compartment("sched") thread_systemtick_get()
 {
diff --git a/sdk/core/scheduler/multiwait.h b/sdk/core/scheduler/multiwait.h
index 49462e2..26cbf54 100644
--- a/sdk/core/scheduler/multiwait.h
+++ b/sdk/core/scheduler/multiwait.h
@@ -12,7 +12,6 @@
 namespace sched
 {
 	using namespace CHERI;
-	class Queue;
 	class Event;
 
 	/**
@@ -70,7 +69,6 @@
 		 * and the user-provided word describing when it should fire.
 		 */
 		///@{
-		bool reset(Queue *queue, uint32_t conditions);
 		/*
 		 * Event channel reset depends on event->bits_get(), so the definition
 		 * of reset() is in event.h.
@@ -102,7 +100,6 @@
 		 * registered event type.
 		 */
 		///@{
-		bool trigger(Queue *queue);
 		bool trigger(Event *event, uint32_t info);
 
 		bool trigger(ptraddr_t address)
@@ -136,8 +133,6 @@
 		template<typename T>
 		static constexpr std::nullptr_t KindFor = nullptr;
 		template<>
-		static constexpr EventWaiterKind KindFor<Queue *> = EventWaiterQueue;
-		template<>
 		static constexpr EventWaiterKind KindFor<Event *> =
 		  EventWaiterEventChannel;
 		template<>
@@ -281,17 +276,6 @@
 				{
 					default:
 						return EventOperationResult::Error;
-					case EventWaiterQueue:
-					{
-						auto *queue = Handle::unseal<Queue>(ptr);
-						if (queue == nullptr)
-						{
-							return EventOperationResult::Error;
-						}
-						eventTriggered |=
-						  events[i].reset(queue, newEvents[i].value);
-						break;
-					}
 					case EventWaiterEventChannel:
 					{
 						auto *event = Handle::unseal<Event>(ptr);
diff --git a/sdk/core/scheduler/queue.h b/sdk/core/scheduler/queue.h
deleted file mode 100644
index 56468a6..0000000
--- a/sdk/core/scheduler/queue.h
+++ /dev/null
@@ -1,313 +0,0 @@
-// Copyright Microsoft and CHERIoT Contributors.
-// SPDX-License-Identifier: MIT
-
-#pragma once
-
-#include "multiwait.h"
-#include "thread.h"
-#include "timer.h"
-#include <cdefs.h>
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <utility>
-#include <utils.hh>
-
-namespace sched
-{
-	class Queue final : private utils::NoCopyNoMove, public Handle
-	{
-		/// The address is used as the send offset.
-		HeapBuffer storage;
-		/**
-		 * The address to the recv location. We don't have a SendAddr because
-		 * it's already captured in the address field of Storage.
-		 */
-		size_t recvAddr;
-		/// size of each queue message
-		const size_t ItemSize;
-		/// max number of messages of this queue
-		size_t maxNItems;
-		/// current number of items in the queue
-		size_t nItems;
-		/// linked list containing all senders blocked on this queue
-		Thread *sendWaitList;
-		/// linked list containing all receivers blocked on this queue
-		Thread *recvWaitList;
-
-		public:
-		/**
-		 * Type marker used for `Handle::unseal_as`.
-		 */
-		static constexpr auto TypeMarker = Handle::Type::Queue;
-
-		/**
-		 * Unseal this as a queue, returning nullptr if it is not a sealed
-		 * queue.
-		 */
-		Queue *unseal()
-		{
-			return unseal_as<Queue>();
-		}
-
-		/**
-		 * Initialise a queue object. When `storage` is nullptr and `itemSize`
-		 * is 0, this queue is used as a semaphore.
-		 * @param storage memory allocated to store messages in queue
-		 * @param itemSize size of each queue item
-		 * @param maxNItems maximum number of items allowed in queue
-		 */
-		Queue(HeapBuffer &&messageStorage, size_t itemSize, size_t maxNItems)
-		  : Handle(TypeMarker),
-		    storage(std::move(messageStorage)),
-		    recvAddr(CHERI::Capability(storage.get()).base()),
-		    ItemSize(itemSize),
-		    maxNItems(maxNItems),
-		    nItems(0),
-		    sendWaitList(nullptr),
-		    recvWaitList(nullptr)
-		{
-			Debug::Assert(
-			  (!storage && itemSize == 0) ||
-			    (storage && (itemSize > 0) && (maxNItems > 0)),
-			  "Invalid queue constructor arguments.  Storage: {}, itemSize: {}",
-			  storage.get(),
-			  itemSize);
-		}
-
-		Queue(std::nullptr_t, size_t itemSize, size_t maxNItems)
-		  : Handle(TypeMarker),
-		    storage(),
-		    recvAddr(0),
-		    ItemSize(itemSize),
-		    maxNItems(maxNItems),
-		    nItems(0),
-		    sendWaitList(nullptr),
-		    recvWaitList(nullptr)
-		{
-			Debug::Assert(itemSize == 0,
-			              "Invalid queue constructor arguments. Storage: "
-			              "nullptr, itemSize: {}",
-			              itemSize);
-		}
-
-		/**
-		 * Returns the size of an item.  Send and receive calls expect to be
-		 * able to read or write this much data.
-		 */
-		size_t item_size()
-		{
-			return ItemSize;
-		}
-
-		/// Returns true if the queue is full, false otherwise.
-		bool is_full()
-		{
-			return nItems == maxNItems;
-		}
-
-		/// Returns true if the queue is empty, false otherwise.
-		bool is_empty()
-		{
-			return nItems == 0;
-		}
-
-		[[nodiscard]] __always_inline bool is_semaphore() const
-		{
-			return ItemSize == 0;
-		}
-
-		std::pair<int, bool> send(const void *src, Timeout *timeout)
-		{
-			Thread *curr = Thread::current_get();
-
-			// No room for a new message
-			if (is_full())
-			{
-				if (!timeout->may_block())
-				{
-					return {-EWOULDBLOCK, false};
-				}
-
-				ExceptionGuard::assert_safe_to_block();
-				do
-				{
-					if (curr->suspend(timeout, &sendWaitList))
-					{
-						return {-ETIMEDOUT, false};
-					}
-					/*
-					 * Waking up here doesn't necessarily mean there's space
-					 * for our message. A higher-priority thread could have
-					 * already filled the vacancy, so we have to double
-					 * check there's really room for us.
-					 *
-					 * It is also possible that the queue object itself is freed
-					 * while we were sleeping. In this case the while() check
-					 * fails on a tag exception, and the default unwind
-					 * behaviour is actually what we want.
-					 */
-				} while (nItems == maxNItems);
-			}
-
-			Debug::Assert(
-			  nItems < maxNItems,
-			  "Adding an item to a full queue (contains {} items, max: {})",
-			  nItems,
-			  maxNItems);
-			// If we arrived here, then NItems must < MaxNItems and we must be
-			// on the ready list.
-			if (!is_semaphore())
-			{
-				// The clang analyser raises a false positive here for the
-				// semaphore calls because it does not propagate the fact that
-				// the caller has already checked `is_semaphore` and had it
-				// return `true`.
-				// Also, we want this memcpy() to finish before any internal
-				// state of this queue is touched, because it may fault (storing
-				// a local capability from the user to the storage) and we want
-				// a clean force-unwind.
-				memcpy(storage.get(), src, ItemSize); // NOLINT
-				auto      storageCap     = CHERI::Capability{storage.get()};
-				ptraddr_t storageAddress = storageCap.address() + ItemSize;
-				ptraddr_t storageBase    = storageCap.base();
-				/*
-				 * Ring buffer wrap around.
-				 * N.B. Never check against the top of `Storage` here. `Storage`
-				 * may be a capability with larger length than the requested
-				 * storage space because of imprecision or malloc alignment.
-				 * Always check against the real intended storage space.
-				 */
-				if (storageAddress >= storageBase + maxNItems * ItemSize)
-				{
-					storageAddress = storageBase;
-				}
-				storage.set_address(storageAddress);
-			}
-			nItems++;
-
-			bool shouldYield =
-			  waitlist_unblock_one(recvWaitList, Thread::WakeReason::Queue);
-
-			return {0, shouldYield};
-		}
-
-		std::pair<int, bool> recv(void *dst, Timeout *timeout)
-		{
-			Thread *curr = Thread::current_get();
-
-			if (is_empty())
-			{
-				if (!timeout->may_block())
-				{
-					// Semaphore (ItemSize == 0) give cannot enter here. Nobody
-					// has taken it.
-					Debug::Assert(
-					  ItemSize > 0, "Item size {} should be > 0", ItemSize);
-					return {-EWOULDBLOCK, false};
-				}
-
-				ExceptionGuard::assert_safe_to_block();
-				do
-				{
-					if (curr->suspend(timeout, &recvWaitList))
-					{
-						return {-ETIMEDOUT, false};
-					}
-					// Same as send(), we could force unwind here.
-				} while (nItems == 0);
-			}
-
-			Debug::Assert(nItems > 0 && nItems <= maxNItems,
-			              "Number of items {} in queue is out of range 0-{}",
-			              nItems,
-			              maxNItems);
-			// If we arrived here, then NItems must > 0 and we must be on the
-			// ready list.
-			if (!is_semaphore())
-			{
-				CHERI::Capability newStorage{storage.get()};
-				newStorage.address() = recvAddr;
-				// The clang analyser raises a false positive here for the
-				// semaphore calls because it does not propagate the fact that
-				// the caller has already checked `is_semaphore` and had it
-				// return `true`.
-				// Similar to send(), make sure no internal state is changed
-				// before changing internal state, for a clean force-unwind.
-				memcpy(dst, newStorage.get(), ItemSize); // NOLINT
-				recvAddr += ItemSize;
-				ptraddr_t storageBase = CHERI::Capability{storage.get()}.base();
-				if (recvAddr >= storageBase + ItemSize * maxNItems)
-				{
-					recvAddr = storageBase;
-				}
-			}
-			nItems--;
-
-			bool shouldYield =
-			  waitlist_unblock_one(sendWaitList, Thread::WakeReason::Queue);
-
-			return {0, shouldYield};
-		}
-
-		size_t items_remaining()
-		{
-			return nItems;
-		}
-
-		~Queue()
-		{
-			while (recvWaitList)
-			{
-				waitlist_unblock_one(recvWaitList, Thread::WakeReason::Delete);
-			}
-			while (sendWaitList)
-			{
-				waitlist_unblock_one(sendWaitList, Thread::WakeReason::Delete);
-			}
-		}
-
-		private:
-		/**
-		 * Unblock one thread from a send or recv waiting list.
-		 * @return true if rescheduling is needed
-		 */
-		bool waitlist_unblock_one(Thread *head, Thread::WakeReason reason)
-		{
-			Debug::Assert((head == nullptr) || (*head->sleepQueue == head),
-			              "Item is on the wrong wait list ({}, expected {}",
-			              (head != nullptr) ? *head->sleepQueue : nullptr,
-			              head);
-			auto wokeMultiwaiters = sched::MultiWaiter::wake_waiters(this);
-			return ((head != nullptr) && head->ready(reason)) ||
-			       wokeMultiwaiters;
-		}
-	};
-
-	inline bool EventWaiter::trigger(Queue *queue)
-	{
-		if ((kind != EventWaiterQueue) || (Capability{queue} != eventSource))
-		{
-			return false;
-		}
-		if ((flags & EventWaiterQueueSendReady) && !queue->is_full())
-		{
-			set_ready(EventWaiterQueueSendReady);
-		}
-		if ((flags & EventWaiterQueueReceiveReady) && !queue->is_empty())
-		{
-			set_ready(EventWaiterQueueReceiveReady);
-		}
-		return readyEvents != 0;
-	}
-
-	inline bool EventWaiter::reset(Queue *queue, uint32_t conditions)
-	{
-		eventSource = queue;
-		eventValue  = 0;
-		flags       = conditions;
-		kind        = EventWaiterQueue;
-		readyEvents = 0;
-		return trigger(queue);
-	}
-} // namespace sched
diff --git a/sdk/core/scheduler/thread.h b/sdk/core/scheduler/thread.h
index e432c17..c85d298 100644
--- a/sdk/core/scheduler/thread.h
+++ b/sdk/core/scheduler/thread.h
@@ -55,7 +55,6 @@
 		enum class WakeReason : uint8_t
 		{
 			Timer,
-			Queue,
 			Event,
 			Futex,
 			/**
diff --git a/sdk/include/c++-config/atomic b/sdk/include/c++-config/atomic
new file mode 100644
index 0000000..412f09e
--- /dev/null
+++ b/sdk/include/c++-config/atomic
@@ -0,0 +1,1087 @@
+// Copyright Microsoft and CHERIoT Contributors.
+// SPDX-License-Identifier: MIT
+
+#pragma once
+#include <concepts>
+#include <futex.h>
+#include <type_traits>
+
+__clang_ignored_warning_push("-Watomic-alignment") namespace std
+{
+	enum class memory_order : int
+	{
+		relaxed = __ATOMIC_RELAXED,
+		consume = __ATOMIC_CONSUME,
+		acquire = __ATOMIC_ACQUIRE,
+		release = __ATOMIC_RELEASE,
+		acq_rel = __ATOMIC_ACQ_REL,
+		seq_cst = __ATOMIC_SEQ_CST,
+	};
+	inline constexpr memory_order memory_order_relaxed = memory_order::relaxed;
+	inline constexpr memory_order memory_order_consume = memory_order::consume;
+	inline constexpr memory_order memory_order_acquire = memory_order::acquire;
+	inline constexpr memory_order memory_order_release = memory_order::release;
+	inline constexpr memory_order memory_order_acq_rel = memory_order::acq_rel;
+	inline constexpr memory_order memory_order_seq_cst = memory_order::seq_cst;
+
+	namespace detail
+	{
+		/**
+		 * Version of atomic<T> for primitive types.  This calls the atomics
+		 * support library for everything on platforms with no A extension and
+		 * will use atomic instructions on ones that do.
+		 *
+		 * This differs from std::atomic in two intentional ways:
+		 *
+		 *  - The `wait` and `notify_*` methods are defined only on 4-byte
+		 *    values.  If there is a requirement for anything else, we should
+		 *    extend the futex APIs in the scheduler to deal with other types.
+		 *  - The `wait` call has a non-standard extension that handles takes a
+		 *    CHERIoT timeout parameter.
+		 *
+		 *  Any other divergence is a bug.
+		 *
+		 *  This is a base class that is extended for arithmetic and pointer
+		 *  types.
+		 */
+		template<typename T>
+		class primitive_atomic
+		{
+			/**
+			 * SFINAE helper to give us the underlying type of enums and the
+			 * raw type of everything else.
+			 */
+			template<typename U, bool = std::is_enum_v<U>>
+			struct underlying_type
+			{
+				using type = U;
+			};
+
+			template<typename U>
+			struct underlying_type<U, true> : ::std::underlying_type<U>
+			{
+			};
+
+			protected:
+			typename underlying_type<T>::type value;
+			static_assert(std::is_arithmetic_v<T> || std::is_enum_v<T> ||
+			                std::is_pointer_v<T> || std::is_null_pointer_v<T>,
+			              "Invalid type for primitive atomic");
+
+			__always_inline auto *pointer_for_intrinsics(T *pointer)
+			{
+				if constexpr (std::is_enum_v<T>)
+				{
+					return static_cast<std::underlying_type_t<T> *>(pointer);
+				}
+				else
+				{
+					return pointer;
+				}
+			}
+
+			static decltype(value) *as_underlying(T *v)
+			{
+				return reinterpret_cast<decltype(value) *>(v);
+			}
+			static decltype(value) as_underlying(T v)
+			{
+				return static_cast<decltype(value)>(v);
+			}
+
+			public:
+			using value_type                          = T;
+			static constexpr bool is_always_lock_free = true;
+			__always_inline bool  is_lock_free() const noexcept
+			{
+				return true;
+			}
+			__always_inline bool is_lock_free() const volatile noexcept
+			{
+				return true;
+			}
+
+			constexpr primitive_atomic() noexcept = default;
+			__always_inline constexpr primitive_atomic(T desired) noexcept
+			{
+				value = desired;
+			}
+			primitive_atomic(const primitive_atomic &) = delete;
+
+			__always_inline T operator=(T desired) noexcept
+			{
+				store(desired);
+				return desired;
+			}
+			__always_inline T operator=(T desired) volatile noexcept
+			{
+				return *const_cast<primitive_atomic<T> *>(this) = desired;
+			}
+			primitive_atomic &operator=(const primitive_atomic &) = delete;
+			primitive_atomic &
+			operator=(const primitive_atomic &) volatile = delete;
+
+			__always_inline void
+			store(T desired, memory_order order = memory_order_seq_cst) noexcept
+			{
+				__atomic_store_n(&value, as_underlying(desired), int(order));
+			}
+			__always_inline void
+			store(T            desired,
+			      memory_order order = memory_order_seq_cst) volatile noexcept
+			{
+				const_cast<primitive_atomic<T> *>(this)->store(desired, order);
+			}
+
+			__always_inline T
+			load(memory_order order = memory_order_seq_cst) const noexcept
+			{
+				return __atomic_load_n(&value, int(order));
+			}
+
+			__always_inline T load(
+			  memory_order order = memory_order_seq_cst) const volatile noexcept
+			{
+				return const_cast<primitive_atomic<T> *>(this)->load(order);
+			}
+
+			__always_inline operator T() const noexcept
+			{
+				return load();
+			}
+			__always_inline operator T() const volatile noexcept
+			{
+				return load();
+			}
+
+			__always_inline T
+			exchange(T            desired,
+			         memory_order order = memory_order_seq_cst) noexcept
+			{
+				return T(__atomic_exchange_n(
+				  &value, as_underlying(desired), int(order)));
+			}
+			__always_inline T exchange(
+			  T            desired,
+			  memory_order order = memory_order_seq_cst) volatile noexcept
+			{
+				return const_cast<primitive_atomic<T> *>(this)->exchange(
+				  desired, order);
+			}
+
+			__always_inline bool
+			compare_exchange_weak(T           &expected,
+			                      T            desired,
+			                      memory_order success,
+			                      memory_order failure) noexcept
+			{
+				__atomic_compare_exchange_n(&value,
+				                            as_underlying(&expected),
+				                            as_underlying(desired),
+				                            true,
+				                            int(success),
+				                            int(failure));
+			}
+			__always_inline bool
+			compare_exchange_weak(T           &expected,
+			                      T            desired,
+			                      memory_order success,
+			                      memory_order failure) volatile noexcept
+			{
+				return const_cast<primitive_atomic<T> *>(this)
+				  ->compare_exchange_weak(expected, desired, success, failure);
+			}
+			__always_inline bool compare_exchange_weak(
+			  T           &expected,
+			  T            desired,
+			  memory_order order = memory_order_seq_cst) noexcept
+			{
+				return compare_exchange_weak(expected, desired, order, order);
+			}
+			__always_inline bool compare_exchange_weak(
+			  T           &expected,
+			  T            desired,
+			  memory_order order = memory_order_seq_cst) volatile noexcept
+			{
+				return const_cast<primitive_atomic<T> *>(this)
+				  ->compare_exchange_weak(expected, desired, order);
+			}
+
+			__always_inline bool
+			compare_exchange_strong(T           &expected,
+			                        T            desired,
+			                        memory_order success,
+			                        memory_order failure) noexcept
+			{
+				return __atomic_compare_exchange_n(&value,
+				                                   as_underlying(&expected),
+				                                   as_underlying(desired),
+				                                   false,
+				                                   int(success),
+				                                   int(failure));
+			}
+			__always_inline bool
+			compare_exchange_strong(T           &expected,
+			                        T            desired,
+			                        memory_order success,
+			                        memory_order failure) volatile noexcept
+			{
+				return const_cast<primitive_atomic<T> *>(this)
+				  ->compare_exchange_strong(
+				    expected, desired, success, failure);
+			}
+			__always_inline bool compare_exchange_strong(
+			  T           &expected,
+			  T            desired,
+			  memory_order order = memory_order_seq_cst) noexcept
+			{
+				return compare_exchange_strong(expected, desired, order, order);
+			}
+
+			__always_inline bool compare_exchange_strong(
+			  T           &expected,
+			  T            desired,
+			  memory_order order = memory_order_seq_cst) volatile noexcept
+			{
+				return const_cast<primitive_atomic<T> *>(this)
+				  ->compare_exchange_strong(expected, desired, order);
+			}
+
+			__always_inline void
+			wait(T            old,
+			     memory_order order = memory_order::seq_cst) const noexcept
+			  requires(sizeof(T) == sizeof(uint32_t))
+			{
+				futex_wait(reinterpret_cast<const uint32_t *>(&value),
+				           reinterpret_cast<uint32_t>(as_underlying(old)));
+			}
+
+			__always_inline int
+			wait(Timeout       *timeout,
+			     T              old,
+			     memory_order   order = memory_order::seq_cst,
+			     FutexWaitFlags flags = FutexNone) const noexcept
+			  requires(sizeof(T) == sizeof(uint32_t))
+			{
+				return futex_timed_wait(
+				  timeout,
+				  reinterpret_cast<const uint32_t *>(&value),
+				  static_cast<uint32_t>(as_underlying(old)),
+				  flags);
+			}
+
+			__always_inline int
+			wait(Timeout *timeout, T old, FutexWaitFlags flags) const noexcept
+			  requires(sizeof(T) == sizeof(uint32_t))
+			{
+				return wait(timeout, old, memory_order::seq_cst, flags);
+			}
+
+			__always_inline void
+			wait(T old, memory_order order = memory_order::seq_cst) const
+			  volatile noexcept requires(sizeof(T) == sizeof(uint32_t))
+			{
+				const_cast<primitive_atomic<T> *>(this)->wait(old, order);
+			}
+
+			__always_inline void notify_one() noexcept
+			  requires(sizeof(T) == sizeof(uint32_t))
+			{
+				futex_wake(reinterpret_cast<uint32_t *>(&value), 1);
+			}
+			__always_inline void notify_one() volatile noexcept
+			  requires(sizeof(T) == sizeof(uint32_t))
+			{
+				const_cast<primitive_atomic<T> *>(this)->notify_one();
+			}
+
+			__always_inline void notify_all() noexcept
+			  requires(sizeof(T) == sizeof(uint32_t))
+			{
+				futex_wake(reinterpret_cast<uint32_t *>(&value),
+				           std::numeric_limits<uint32_t>::max());
+			}
+			__always_inline void notify_all() volatile noexcept
+			  requires(sizeof(T) == sizeof(uint32_t))
+			{
+				const_cast<primitive_atomic<T> *>(this)->notify_all();
+			}
+		};
+
+		/**
+		 * Version of atomic for arithmetic types.  This adds the arithmetic
+		 * methods.
+		 */
+		template<typename T>
+		class arithmetic_atomic : public primitive_atomic<T>
+		{
+			public:
+			using primitive_atomic<T>::primitive_atomic;
+			using primitive_atomic<T>::operator=;
+			using difference_type = typename primitive_atomic<T>::value_type;
+
+			__always_inline T
+			fetch_add(T arg, memory_order order = memory_order_seq_cst) noexcept
+			{
+				return __atomic_fetch_add(&this->value, arg, int(order));
+			}
+			__always_inline T fetch_add(
+			  T            arg,
+			  memory_order order = memory_order_seq_cst) volatile noexcept
+			{
+				const_cast<arithmetic_atomic<T> *>(this)->fetch_add(arg, order);
+			}
+
+			__always_inline T
+			fetch_sub(T arg, memory_order order = memory_order_seq_cst) noexcept
+			{
+				return __atomic_fetch_sub(&this->value, arg, int(order));
+			}
+			__always_inline T fetch_sub(
+			  T            arg,
+			  memory_order order = memory_order_seq_cst) volatile noexcept
+			{
+				const_cast<arithmetic_atomic<T> *>(this)->fetch_sub(arg, order);
+			}
+
+			__always_inline T
+			fetch_and(T arg, memory_order order = memory_order_seq_cst) noexcept
+			{
+				return __atomic_fetch_and(&this->value, arg, int(order));
+			}
+			__always_inline T fetch_and(
+			  T            arg,
+			  memory_order order = memory_order_seq_cst) volatile noexcept
+			{
+				const_cast<arithmetic_atomic<T> *>(this)->fetch_and(arg, order);
+			}
+
+			__always_inline T
+			fetch_or(T arg, memory_order order = memory_order_seq_cst) noexcept
+			{
+				return __atomic_fetch_or(&this->value, arg, int(order));
+			}
+			__always_inline T fetch_or(
+			  T            arg,
+			  memory_order order = memory_order_seq_cst) volatile noexcept
+			{
+				const_cast<arithmetic_atomic<T> *>(this)->fetch_or(arg, order);
+			}
+
+			__always_inline T
+			fetch_xor(T arg, memory_order order = memory_order_seq_cst) noexcept
+			{
+				return __atomic_fetch_xor(&this->value, arg, int(order));
+			}
+			__always_inline T fetch_xor(
+			  T            arg,
+			  memory_order order = memory_order_seq_cst) volatile noexcept
+			{
+				const_cast<arithmetic_atomic<T> *>(this)->fetch_xor(arg, order);
+			}
+
+			__always_inline T operator++() noexcept
+			{
+				return fetch_add(1) + 1;
+			}
+			__always_inline T operator++() volatile noexcept
+			{
+				return fetch_add(1) + 1;
+			}
+			__always_inline T operator++(int) noexcept
+			{
+				return fetch_add(1);
+			}
+			__always_inline T operator++(int) volatile noexcept
+			{
+				return fetch_add(1);
+			}
+			__always_inline T operator--() noexcept
+			{
+				return fetch_sub(1) - 1;
+			}
+			__always_inline T operator--() volatile noexcept
+			{
+				return fetch_sub(1) - 1;
+			}
+			__always_inline T operator--(int) noexcept
+			{
+				return fetch_sub(1);
+			}
+			__always_inline T operator--(int) volatile noexcept
+			{
+				return fetch_sub(1);
+			}
+
+			__always_inline T operator+=(T arg) noexcept
+			{
+				return fetch_add(arg) + arg;
+			}
+			__always_inline T operator+=(T arg) volatile noexcept
+			{
+				return fetch_add(arg) + arg;
+			}
+			__always_inline T operator-=(T arg) noexcept
+			{
+				return fetch_sub(arg) - arg;
+			}
+			__always_inline T operator-=(T arg) volatile noexcept
+			{
+				return fetch_sub(arg) - arg;
+			}
+
+			__always_inline T operator&=(T arg) noexcept
+			{
+				return fetch_and(arg) & arg;
+			}
+			__always_inline T operator&=(T arg) volatile noexcept
+			{
+				return fetch_and(arg) & arg;
+			}
+			__always_inline T operator|=(T arg) noexcept
+			{
+				return fetch_or(arg) | arg;
+			}
+			__always_inline T operator|=(T arg) volatile noexcept
+			{
+				return fetch_or(arg) | arg;
+			}
+			__always_inline T operator^=(T arg) noexcept
+			{
+				return fetch_xor(arg) ^ arg;
+			}
+			__always_inline T operator^=(T arg) volatile noexcept
+			{
+				return fetch_xor(arg) ^ arg;
+			}
+		};
+
+		/**
+		 * Version of atomic for pointer types.  This adds pointer arithmetic
+		 * methods.
+		 */
+		template<typename T>
+		class pointer_atomic : primitive_atomic<T>
+		{
+			public:
+			using primitive_atomic<T>::primitive_atomic;
+			using difference_type = std::ptrdiff_t;
+
+			T *fetch_add(std::ptrdiff_t arg,
+			             memory_order   order = memory_order_seq_cst) noexcept
+			{
+				return __atomic_fetch_add(
+				  &this->value, arg * sizeof(T), int(order));
+			}
+			T *fetch_add(
+			  std::ptrdiff_t arg,
+			  memory_order   order = memory_order_seq_cst) volatile noexcept
+			{
+				const_cast<pointer_atomic<T> *>(this)->fetch_add(arg,
+				                                                 int(order));
+			}
+
+			T *fetch_sub(std::ptrdiff_t arg,
+			             memory_order   order = memory_order_seq_cst) noexcept
+			{
+				return __atomic_fetch_sub(
+				  &this->value, arg * sizeof(T), int(order));
+			}
+			T *fetch_sub(
+			  std::ptrdiff_t arg,
+			  memory_order   order = memory_order_seq_cst) volatile noexcept
+			{
+				const_cast<pointer_atomic<T> *>(this)->fetch_sub(arg,
+				                                                 int(order));
+			}
+
+			__always_inline T *operator++() noexcept
+			{
+				return fetch_add(1) + 1;
+			}
+			__always_inline T *operator++() volatile noexcept
+			{
+				return fetch_add(1) + 1;
+			}
+			__always_inline T *operator++(int) noexcept
+			{
+				return fetch_add(1);
+			}
+			__always_inline T *operator++(int) volatile noexcept
+			{
+				return fetch_add(1);
+			}
+			__always_inline T *operator--() noexcept
+			{
+				return fetch_sub(1) - 1;
+			}
+			__always_inline T *operator--() volatile noexcept
+			{
+				return fetch_sub(1) - 1;
+			}
+			__always_inline T *operator--(int) noexcept
+			{
+				return fetch_sub(1);
+			}
+			__always_inline T *operator--(int) volatile noexcept
+			{
+				return fetch_sub(1);
+			}
+
+			__always_inline T *operator+=(std::ptrdiff_t arg) noexcept
+			{
+				return fetch_add(arg) + arg;
+			}
+			__always_inline T *operator+=(std::ptrdiff_t arg) volatile noexcept
+			{
+				return fetch_add(arg) + arg;
+			}
+			__always_inline T *operator-=(std::ptrdiff_t arg) noexcept
+			{
+				return fetch_sub(arg) - arg;
+			}
+			__always_inline T *operator-=(std::ptrdiff_t arg) volatile noexcept
+			{
+				return fetch_sub(arg) - arg;
+			}
+		};
+
+		/**
+		 * Simple flag lock.  This uses `primitive_atomic` to build a trivial
+		 * lock.
+		 */
+		class flag_lock
+		{
+			/**
+			 * Possible states of the flag lock.
+			 */
+			enum class LockState : uint32_t
+			{
+				/// Lock is not held.
+				Unlocked,
+				/// Lock is held, no waiters.
+				Locked,
+				/// Lock is held and one or more waiters exist.
+				LockedWithWaiters
+			};
+			/// The lock state.
+			primitive_atomic<LockState> lockWord;
+
+			public:
+			/**
+			 * Acquire the lock.  Blocks indefinitely.
+			 */
+			__noinline void lock()
+			{
+				LockState old = LockState::Unlocked;
+				while (true)
+				{
+					switch (old)
+					{
+						// If the lock is not held, try to acquire it and return
+						// if we can.
+						case LockState::Unlocked:
+							if (lockWord.compare_exchange_strong(
+							      old, LockState::Locked))
+							{
+								return;
+							}
+							break;
+							// If the lock is held, mark it as having waiters
+							// and then wait.
+						case LockState::Locked:
+							lockWord.exchange(LockState::LockedWithWaiters);
+							[[fallthrough]];
+							// If the lock is blocked with waiters, sleep
+						case LockState::LockedWithWaiters:
+							lockWord.wait(LockState::LockedWithWaiters);
+					}
+				}
+			}
+
+			/**
+			 * Release the lock, waking any waiters if there are any.
+			 */
+			__noinline void unlock()
+			{
+				auto old = lockWord.exchange(LockState::Unlocked);
+				if (old == LockState::LockedWithWaiters)
+				{
+					lockWord.notify_all();
+				}
+			}
+		};
+
+		/**
+		 * Fallback `atomic` implementation that uses a lock to protect the
+		 * value.
+		 */
+		template<typename T>
+		class locked_atomic
+		{
+			private:
+			/// The atomic value.
+			T value;
+			/// Lock protecting this object, at the end so that it can go into
+			/// padding.
+			mutable flag_lock lock;
+
+			struct guard
+			{
+				flag_lock &lock;
+				__always_inline ~guard()
+				{
+					lock.unlock();
+				}
+			};
+			__always_inline guard acquire_lock()
+			{
+				lock.lock();
+				return {lock};
+			}
+
+			public:
+			using value_type                          = T;
+			static constexpr bool is_always_lock_free = false;
+			__always_inline bool  is_lock_free() const noexcept
+			{
+				return false;
+			}
+			__always_inline bool is_lock_free() const volatile noexcept
+			{
+				return false;
+			}
+
+			constexpr locked_atomic() noexcept = default;
+			__always_inline constexpr locked_atomic(T desired) noexcept
+			{
+				value = desired;
+			}
+			locked_atomic(const locked_atomic &) = delete;
+
+			__always_inline T operator=(T desired) noexcept
+			{
+				auto g = acquire_lock();
+				value  = desired;
+				return desired;
+			}
+			__always_inline T operator=(T desired) volatile noexcept
+			{
+				return *const_cast<locked_atomic<T> *>(this) = desired;
+			}
+			locked_atomic &operator=(const locked_atomic &) = delete;
+			locked_atomic &operator=(const locked_atomic &) volatile = delete;
+
+			__always_inline void
+			store(T desired, memory_order order = memory_order_seq_cst) noexcept
+			{
+				auto g = acquire_lock();
+				value  = desired;
+			}
+			__always_inline void
+			store(T            desired,
+			      memory_order order = memory_order_seq_cst) volatile noexcept
+			{
+				const_cast<locked_atomic<T> *>(this)->store(desired, order);
+			}
+
+			__always_inline T
+			load(memory_order order = memory_order_seq_cst) const noexcept
+			{
+				auto g = acquire_lock();
+				return value;
+			}
+
+			__always_inline T load(
+			  memory_order order = memory_order_seq_cst) const volatile noexcept
+			{
+				return const_cast<locked_atomic<T> *>(this)->load(order);
+			}
+
+			__always_inline operator T() const noexcept
+			{
+				return load();
+			}
+			__always_inline operator T() const volatile noexcept
+			{
+				return load();
+			}
+
+			__always_inline T
+			exchange(T            desired,
+			         memory_order order = memory_order_seq_cst) noexcept
+			{
+				auto g   = acquire_lock();
+				T    tmp = value;
+				value    = desired;
+				return tmp;
+			}
+			__always_inline T exchange(
+			  T            desired,
+			  memory_order order = memory_order_seq_cst) volatile noexcept
+			{
+				return const_cast<locked_atomic<T> *>(this)->exchange(desired,
+				                                                      order);
+			}
+
+			__always_inline bool
+			compare_exchange_weak(T           &expected,
+			                      T            desired,
+			                      memory_order success,
+			                      memory_order failure) noexcept
+			{
+				auto g = acquire_lock();
+				if (value == expected)
+				{
+					desired = value;
+				}
+				expected = value;
+				return true;
+			}
+			__always_inline bool
+			compare_exchange_weak(T           &expected,
+			                      T            desired,
+			                      memory_order success,
+			                      memory_order failure) volatile noexcept
+			{
+				return const_cast<locked_atomic<T> *>(this)
+				  ->compare_exchange_weak(expected, desired, success, failure);
+			}
+			__always_inline bool compare_exchange_weak(
+			  T           &expected,
+			  T            desired,
+			  memory_order order = memory_order_seq_cst) noexcept
+			{
+				return compare_exchange_weak(expected, desired, order, order);
+			}
+			__always_inline bool compare_exchange_weak(
+			  T           &expected,
+			  T            desired,
+			  memory_order order = memory_order_seq_cst) volatile noexcept
+			{
+				return const_cast<locked_atomic<T> *>(this)
+				  ->compare_exchange_weak(expected, desired, order);
+			}
+
+			__always_inline bool
+			compare_exchange_strong(T           &expected,
+			                        T            desired,
+			                        memory_order success,
+			                        memory_order failure) noexcept
+			{
+				return compare_exchange_weak(
+				  expected, desired, success, failure);
+			}
+			__always_inline bool
+			compare_exchange_strong(T           &expected,
+			                        T            desired,
+			                        memory_order success,
+			                        memory_order failure) volatile noexcept
+			{
+				return const_cast<locked_atomic<T> *>(this)
+				  ->compare_exchange_strong(
+				    expected, desired, success, failure);
+			}
+			__always_inline bool compare_exchange_strong(
+			  T           &expected,
+			  T            desired,
+			  memory_order order = memory_order_seq_cst) noexcept
+			{
+				return compare_exchange_strong(expected, desired, order, order);
+			}
+
+			__always_inline bool compare_exchange_strong(
+			  T           &expected,
+			  T            desired,
+			  memory_order order = memory_order_seq_cst) volatile noexcept
+			{
+				return const_cast<locked_atomic<T> *>(this)
+				  ->compare_exchange_strong(expected, desired, order);
+			}
+		};
+
+		template<typename T>
+		using atomic = std::conditional_t<
+		  std::is_pointer_v<T>,
+		  detail::pointer_atomic<T>,
+		  std::conditional_t<std::is_arithmetic_v<T>,
+		                     detail::arithmetic_atomic<T>,
+		                     std::conditional_t<std::is_enum_v<T>,
+		                                        detail::primitive_atomic<T>,
+		                                        detail::locked_atomic<T>>>>;
+	}; // namespace detail
+
+	/**
+	 * Select the correct `atomic` implementation based on the type.
+	 */
+	template<typename T>
+	class atomic : public detail::atomic<T>
+	{
+		public:
+		using detail::atomic<T>::atomic;
+		using detail::atomic<T>::operator=;
+	};
+
+	template<class T>
+	void atomic_store(std::atomic<T> * obj,
+	                  typename std::atomic<T>::value_type desired) noexcept
+	{
+		return obj->store(desired);
+	}
+	template<class T>
+	void atomic_store(volatile std::atomic<T> * obj,
+	                  typename std::atomic<T>::value_type desired) noexcept
+	{
+		return obj->store(desired);
+	}
+	template<class T>
+	void atomic_store_explicit(std::atomic<T> * obj,
+	                           typename std::atomic<T>::value_type desired,
+	                           std::memory_order order) noexcept
+	{
+		return obj->store(desired, order);
+	}
+	template<class T>
+	void atomic_store_explicit(volatile std::atomic<T> * obj,
+	                           typename std::atomic<T>::value_type desired,
+	                           std::memory_order order) noexcept
+	{
+		return obj->store(desired, order);
+	}
+
+	template<class T>
+	T atomic_load(const std::atomic<T> *obj) noexcept
+	{
+		return obj->load();
+	}
+	template<class T>
+	T atomic_load(const volatile std::atomic<T> *obj) noexcept
+	{
+		return obj->load();
+	}
+	template<class T>
+	T atomic_load_explicit(const std::atomic<T> *obj,
+	                       std::memory_order     order) noexcept
+	{
+		return obj->load(order);
+	}
+	template<class T>
+	T atomic_load_explicit(const volatile std::atomic<T> *obj,
+	                       std::memory_order              order) noexcept
+	{
+		return obj->load(order);
+	}
+
+	template<class T>
+	T atomic_exchange(std::atomic<T> * obj,
+	                  typename std::atomic<T>::value_type desired) noexcept
+	{
+		return obj->exchange(desired);
+	}
+	template<class T>
+	T atomic_exchange(volatile std::atomic<T> * obj,
+	                  typename std::atomic<T>::value_type desired) noexcept
+	{
+		return obj->exchange(desired);
+	}
+	template<class T>
+	T atomic_exchange_explicit(std::atomic<T> * obj,
+	                           typename std::atomic<T>::value_type desired,
+	                           std::memory_order order) noexcept
+	{
+		return obj->exchange(desired, order);
+	}
+	template<class T>
+	T atomic_exchange_explicit(volatile std::atomic<T> * obj,
+	                           typename std::atomic<T>::value_type desired,
+	                           std::memory_order order) noexcept
+	{
+		return obj->exchange(desired, order);
+	}
+
+	template<class T>
+	T atomic_fetch_add(std::atomic<T> * obj,
+	                   typename std::atomic<T>::difference_type arg) noexcept
+	{
+		return obj->fetch_add(arg);
+	}
+	template<class T>
+	T atomic_fetch_add(volatile std::atomic<T> * obj,
+	                   typename std::atomic<T>::difference_type arg) noexcept
+	{
+		return obj->fetch_add(arg);
+	}
+	template<class T>
+	T atomic_fetch_add_explicit(std::atomic<T> * obj,
+	                            typename std::atomic<T>::difference_type arg,
+	                            std::memory_order order) noexcept
+	{
+		return obj->fetch_add(arg, order);
+	}
+	template<class T>
+	T atomic_fetch_add_explicit(volatile std::atomic<T> * obj,
+	                            typename std::atomic<T>::difference_type arg,
+	                            std::memory_order order) noexcept
+	{
+		return obj->fetch_add(arg, order);
+	}
+
+	template<class T>
+	T atomic_fetch_sub(std::atomic<T> * obj,
+	                   typename std::atomic<T>::difference_type arg) noexcept
+	{
+		return obj->fetch_sub(arg);
+	}
+	template<class T>
+	T atomic_fetch_sub(volatile std::atomic<T> * obj,
+	                   typename std::atomic<T>::difference_type arg) noexcept
+	{
+		return obj->fetch_sub(arg);
+	}
+	template<class T>
+	T atomic_fetch_sub_explicit(std::atomic<T> * obj,
+	                            typename std::atomic<T>::difference_type arg,
+	                            std::memory_order order) noexcept
+	{
+		return obj->fetch_sub(arg, order);
+	}
+	template<class T>
+	T atomic_fetch_sub_explicit(volatile std::atomic<T> * obj,
+	                            typename std::atomic<T>::difference_type arg,
+	                            std::memory_order order) noexcept
+	{
+		return obj->fetch_sub(arg, order);
+	}
+
+	template<class T>
+	T atomic_fetch_and(std::atomic<T> * obj,
+	                   typename std::atomic<T>::difference_type arg) noexcept
+	{
+		return obj->fetch_and(arg);
+	}
+	template<class T>
+	T atomic_fetch_and(volatile std::atomic<T> * obj,
+	                   typename std::atomic<T>::difference_type arg) noexcept
+	{
+		return obj->fetch_and(arg);
+	}
+	template<class T>
+	T atomic_fetch_and_explicit(std::atomic<T> * obj,
+	                            typename std::atomic<T>::difference_type arg,
+	                            std::memory_order order) noexcept
+	{
+		return obj->fetch_and(arg, order);
+	}
+	template<class T>
+	T atomic_fetch_and_explicit(volatile std::atomic<T> * obj,
+	                            typename std::atomic<T>::difference_type arg,
+	                            std::memory_order order) noexcept
+	{
+		return obj->fetch_and(arg, order);
+	}
+
+	template<class T>
+	T atomic_fetch_or(std::atomic<T> * obj,
+	                  typename std::atomic<T>::difference_type arg) noexcept
+	{
+		return obj->fetch_or(arg);
+	}
+	template<class T>
+	T atomic_fetch_or(volatile std::atomic<T> * obj,
+	                  typename std::atomic<T>::difference_type arg) noexcept
+	{
+		return obj->fetch_or(arg);
+	}
+	template<class T>
+	T atomic_fetch_or_explicit(std::atomic<T> * obj,
+	                           typename std::atomic<T>::difference_type arg,
+	                           std::memory_order order) noexcept
+	{
+		return obj->fetch_or(arg, order);
+	}
+	template<class T>
+	T atomic_fetch_or_explicit(volatile std::atomic<T> * obj,
+	                           typename std::atomic<T>::difference_type arg,
+	                           std::memory_order order) noexcept
+	{
+		return obj->fetch_or(arg, order);
+	}
+
+	template<class T>
+	T atomic_fetch_xor(std::atomic<T> * obj,
+	                   typename std::atomic<T>::difference_type arg) noexcept
+	{
+		return obj->fetch_xor(arg);
+	}
+	template<class T>
+	T atomic_fetch_xor(volatile std::atomic<T> * obj,
+	                   typename std::atomic<T>::difference_type arg) noexcept
+	{
+		return obj->fetch_xor(arg);
+	}
+	template<class T>
+	T atomic_fetch_xor_explicit(std::atomic<T> * obj,
+	                            typename std::atomic<T>::difference_type arg,
+	                            std::memory_order order) noexcept
+	{
+		return obj->fetch_xor(arg, order);
+	}
+	template<class T>
+	T atomic_fetch_xor_explicit(volatile std::atomic<T> * obj,
+	                            typename std::atomic<T>::difference_type arg,
+	                            std::memory_order order) noexcept
+	{
+		return obj->fetch_xor(arg, order);
+	}
+
+	template<class T>
+	void atomic_wait(const std::atomic<T>               *object,
+	                 typename std::atomic<T>::value_type old)
+	{
+		object->wait(old);
+	}
+	template<class T>
+	void atomic_wait(const volatile std::atomic<T>      *object,
+	                 typename std::atomic<T>::value_type old)
+	{
+		object->wait(old);
+	}
+	template<class T>
+	void atomic_wait_explicit(const std::atomic<T>               *object,
+	                          typename std::atomic<T>::value_type old,
+	                          std::memory_order                   order)
+	{
+		object->wait(old, order);
+	}
+	template<class T>
+	void atomic_wait_explicit(const volatile std::atomic<T>      *object,
+	                          typename std::atomic<T>::value_type old,
+	                          std::memory_order                   order)
+	{
+		object->wait(old, order);
+	}
+
+	template<class T>
+	void atomic_notify_one(std::atomic<T> * object)
+	{
+		object->notify_one();
+	}
+	template<class T>
+	void atomic_notify_one(volatile std::atomic<T> * object)
+	{
+		object->notify_one();
+	}
+
+	template<class T>
+	void atomic_notify_all(std::atomic<T> * object)
+	{
+		object->notify_all();
+	}
+	template<class T>
+	void atomic_notify_all(volatile std::atomic<T> * object)
+	{
+		object->notify_all();
+	}
+} // namespace std
+
+__clang_ignored_warning_pop()
diff --git a/sdk/include/c++-config/cheriot-atomic.hh b/sdk/include/c++-config/cheriot-atomic.hh
index 54a3962..6453166 100644
--- a/sdk/include/c++-config/cheriot-atomic.hh
+++ b/sdk/include/c++-config/cheriot-atomic.hh
@@ -1,818 +1,9 @@
-// Copyright Microsoft and CHERIoT Contributors.
-// SPDX-License-Identifier: MIT
+#include <atomic>
 
-#pragma once
-#include <concepts>
-#include <futex.h>
-#include <type_traits>
-
-__clang_ignored_warning_push("-Watomic-alignment") namespace cheriot
+namespace cheriot
 {
-	enum class memory_order : int
-	{
-		relaxed = __ATOMIC_RELAXED,
-		consume = __ATOMIC_CONSUME,
-		acquire = __ATOMIC_ACQUIRE,
-		release = __ATOMIC_RELEASE,
-		acq_rel = __ATOMIC_ACQ_REL,
-		seq_cst = __ATOMIC_SEQ_CST,
-	};
-	inline constexpr memory_order memory_order_relaxed = memory_order::relaxed;
-	inline constexpr memory_order memory_order_consume = memory_order::consume;
-	inline constexpr memory_order memory_order_acquire = memory_order::acquire;
-	inline constexpr memory_order memory_order_release = memory_order::release;
-	inline constexpr memory_order memory_order_acq_rel = memory_order::acq_rel;
-	inline constexpr memory_order memory_order_seq_cst = memory_order::seq_cst;
-
-	namespace detail
-	{
-		/**
-		 * Version of atomic<T> for primitive types.  This calls the atomics
-		 * support library for everything on platforms with no A extension and
-		 * will use atomic instructions on ones that do.
-		 *
-		 * This differs from std::atomic in two intentional ways:
-		 *
-		 *  - The `wait` and `notify_*` methods are defined only on 4-byte
-		 *    values.  If there is a requirement for anything else, we should
-		 *    extend the futex APIs in the scheduler to deal with other types.
-		 *  - The `wait` call has a non-standard extension that handles takes a
-		 *    CHERIoT timeout parameter.
-		 *
-		 *  Any other divergence is a bug.
-		 *
-		 *  This is a base class that is extended for arithmetic and pointer
-		 *  types.
-		 */
-		template<typename T>
-		class primitive_atomic
-		{
-			/**
-			 * SFINAE helper to give us the underlying type of enums and the
-			 * raw type of everything else.
-			 */
-			template<typename U, bool = std::is_enum_v<U>>
-			struct underlying_type
-			{
-				using type = U;
-			};
-
-			template<typename U>
-			struct underlying_type<U, true> : ::std::underlying_type<U>
-			{
-			};
-
-			protected:
-			typename underlying_type<T>::type value;
-			static_assert(std::is_arithmetic_v<T> || std::is_enum_v<T> ||
-			                std::is_pointer_v<T> || std::is_null_pointer_v<T>,
-			              "Invalid type for primitive atomic");
-
-			__always_inline auto *pointer_for_intrinsics(T *pointer)
-			{
-				if constexpr (std::is_enum_v<T>)
-				{
-					return static_cast<std::underlying_type_t<T> *>(pointer);
-				}
-				else
-				{
-					return pointer;
-				}
-			}
-
-			static decltype(value) *as_underlying(T *v)
-			{
-				return reinterpret_cast<decltype(value) *>(v);
-			}
-			static decltype(value) as_underlying(T v)
-			{
-				return static_cast<decltype(value)>(v);
-			}
-
-			public:
-			using value_type                          = T;
-			static constexpr bool is_always_lock_free = true;
-			__always_inline bool  is_lock_free() const noexcept
-			{
-				return true;
-			}
-			__always_inline bool is_lock_free() const volatile noexcept
-			{
-				return true;
-			}
-
-			constexpr primitive_atomic() noexcept = default;
-			__always_inline constexpr primitive_atomic(T desired) noexcept
-			{
-				value = desired;
-			}
-			primitive_atomic(const primitive_atomic &) = delete;
-
-			__always_inline T operator=(T desired) noexcept
-			{
-				store(desired);
-				return desired;
-			}
-			__always_inline T operator=(T desired) volatile noexcept
-			{
-				return *const_cast<primitive_atomic<T> *>(this) = desired;
-			}
-			primitive_atomic &operator=(const primitive_atomic &) = delete;
-			primitive_atomic &
-			operator=(const primitive_atomic &) volatile = delete;
-
-			__always_inline void
-			store(T desired, memory_order order = memory_order_seq_cst) noexcept
-			{
-				__atomic_store_n(&value, as_underlying(desired), int(order));
-			}
-			__always_inline void
-			store(T            desired,
-			      memory_order order = memory_order_seq_cst) volatile noexcept
-			{
-				const_cast<primitive_atomic<T> *>(this)->store(desired, order);
-			}
-
-			__always_inline T
-			load(memory_order order = memory_order_seq_cst) const noexcept
-			{
-				return __atomic_load_n(&value, int(order));
-			}
-
-			__always_inline T load(
-			  memory_order order = memory_order_seq_cst) const volatile noexcept
-			{
-				return const_cast<primitive_atomic<T> *>(this)->load(order);
-			}
-
-			__always_inline operator T() const noexcept
-			{
-				return load();
-			}
-			__always_inline operator T() const volatile noexcept
-			{
-				return load();
-			}
-
-			__always_inline T
-			exchange(T            desired,
-			         memory_order order = memory_order_seq_cst) noexcept
-			{
-				return T(__atomic_exchange_n(
-				  &value, as_underlying(desired), int(order)));
-			}
-			__always_inline T exchange(
-			  T            desired,
-			  memory_order order = memory_order_seq_cst) volatile noexcept
-			{
-				return const_cast<primitive_atomic<T> *>(this)->exchange(
-				  desired, order);
-			}
-
-			__always_inline bool
-			compare_exchange_weak(T           &expected,
-			                      T            desired,
-			                      memory_order success,
-			                      memory_order failure) noexcept
-			{
-				__atomic_compare_exchange_n(&value,
-				                            as_underlying(&expected),
-				                            as_underlying(desired),
-				                            true,
-				                            int(success),
-				                            int(failure));
-			}
-			__always_inline bool
-			compare_exchange_weak(T           &expected,
-			                      T            desired,
-			                      memory_order success,
-			                      memory_order failure) volatile noexcept
-			{
-				return const_cast<primitive_atomic<T> *>(this)
-				  ->compare_exchange_weak(expected, desired, success, failure);
-			}
-			__always_inline bool compare_exchange_weak(
-			  T           &expected,
-			  T            desired,
-			  memory_order order = memory_order_seq_cst) noexcept
-			{
-				return compare_exchange_weak(expected, desired, order, order);
-			}
-			__always_inline bool compare_exchange_weak(
-			  T           &expected,
-			  T            desired,
-			  memory_order order = memory_order_seq_cst) volatile noexcept
-			{
-				return const_cast<primitive_atomic<T> *>(this)
-				  ->compare_exchange_weak(expected, desired, order);
-			}
-
-			__always_inline bool
-			compare_exchange_strong(T           &expected,
-			                        T            desired,
-			                        memory_order success,
-			                        memory_order failure) noexcept
-			{
-				return __atomic_compare_exchange_n(&value,
-				                                   as_underlying(&expected),
-				                                   as_underlying(desired),
-				                                   false,
-				                                   int(success),
-				                                   int(failure));
-			}
-			__always_inline bool
-			compare_exchange_strong(T           &expected,
-			                        T            desired,
-			                        memory_order success,
-			                        memory_order failure) volatile noexcept
-			{
-				return const_cast<primitive_atomic<T> *>(this)
-				  ->compare_exchange_strong(
-				    expected, desired, success, failure);
-			}
-			__always_inline bool compare_exchange_strong(
-			  T           &expected,
-			  T            desired,
-			  memory_order order = memory_order_seq_cst) noexcept
-			{
-				return compare_exchange_strong(expected, desired, order, order);
-			}
-
-			__always_inline bool compare_exchange_strong(
-			  T           &expected,
-			  T            desired,
-			  memory_order order = memory_order_seq_cst) volatile noexcept
-			{
-				return const_cast<primitive_atomic<T> *>(this)
-				  ->compare_exchange_strong(expected, desired, order);
-			}
-
-			__always_inline void
-			wait(T            old,
-			     memory_order order = memory_order::seq_cst) const noexcept
-			  requires(sizeof(T) == sizeof(uint32_t))
-			{
-				futex_wait(reinterpret_cast<const uint32_t *>(&value),
-				           reinterpret_cast<uint32_t>(as_underlying(old)));
-			}
-
-			__always_inline int
-			wait(Timeout       *timeout,
-			     T              old,
-			     memory_order   order = memory_order::seq_cst,
-			     FutexWaitFlags flags = FutexNone) const noexcept
-			  requires(sizeof(T) == sizeof(uint32_t))
-			{
-				return futex_timed_wait(
-				  timeout,
-				  reinterpret_cast<const uint32_t *>(&value),
-				  static_cast<uint32_t>(as_underlying(old)),
-				  flags);
-			}
-
-			__always_inline int
-			wait(Timeout *timeout, T old, FutexWaitFlags flags) const noexcept
-			  requires(sizeof(T) == sizeof(uint32_t))
-			{
-				return wait(timeout, old, memory_order::seq_cst, flags);
-			}
-
-			__always_inline void
-			wait(T old, memory_order order = memory_order::seq_cst) const
-			  volatile noexcept requires(sizeof(T) == sizeof(uint32_t))
-			{
-				const_cast<primitive_atomic<T> *>(this)->wait(old, order);
-			}
-
-			__always_inline void notify_one() noexcept
-			  requires(sizeof(T) == sizeof(uint32_t))
-			{
-				futex_wake(reinterpret_cast<uint32_t *>(&value), 1);
-			}
-			__always_inline void notify_one() volatile noexcept
-			  requires(sizeof(T) == sizeof(uint32_t))
-			{
-				const_cast<primitive_atomic<T> *>(this)->notify_one();
-			}
-
-			__always_inline void notify_all() noexcept
-			  requires(sizeof(T) == sizeof(uint32_t))
-			{
-				futex_wake(reinterpret_cast<uint32_t *>(&value),
-				           std::numeric_limits<uint32_t>::max());
-			}
-			__always_inline void notify_all() volatile noexcept
-			  requires(sizeof(T) == sizeof(uint32_t))
-			{
-				const_cast<primitive_atomic<T> *>(this)->notify_all();
-			}
-		};
-
-		/**
-		 * Version of atomic for arithmetic types.  This adds the arithmetic
-		 * methods.
-		 */
-		template<typename T>
-		class arithmetic_atomic : public primitive_atomic<T>
-		{
-			public:
-			using primitive_atomic<T>::primitive_atomic;
-			using primitive_atomic<T>::operator=;
-			using difference_type = typename primitive_atomic<T>::value_type;
-
-			__always_inline T
-			fetch_add(T arg, memory_order order = memory_order_seq_cst) noexcept
-			{
-				return __atomic_fetch_add(&this->value, arg, int(order));
-			}
-			__always_inline T fetch_add(
-			  T            arg,
-			  memory_order order = memory_order_seq_cst) volatile noexcept
-			{
-				const_cast<arithmetic_atomic<T> *>(this)->fetch_add(arg, order);
-			}
-
-			__always_inline T
-			fetch_sub(T arg, memory_order order = memory_order_seq_cst) noexcept
-			{
-				return __atomic_fetch_sub(&this->value, arg, int(order));
-			}
-			__always_inline T fetch_sub(
-			  T            arg,
-			  memory_order order = memory_order_seq_cst) volatile noexcept
-			{
-				const_cast<arithmetic_atomic<T> *>(this)->fetch_sub(arg, order);
-			}
-
-			__always_inline T
-			fetch_and(T arg, memory_order order = memory_order_seq_cst) noexcept
-			{
-				return __atomic_fetch_and(&this->value, arg, int(order));
-			}
-			__always_inline T fetch_and(
-			  T            arg,
-			  memory_order order = memory_order_seq_cst) volatile noexcept
-			{
-				const_cast<arithmetic_atomic<T> *>(this)->fetch_and(arg, order);
-			}
-
-			__always_inline T
-			fetch_or(T arg, memory_order order = memory_order_seq_cst) noexcept
-			{
-				return __atomic_fetch_or(&this->value, arg, int(order));
-			}
-			__always_inline T fetch_or(
-			  T            arg,
-			  memory_order order = memory_order_seq_cst) volatile noexcept
-			{
-				const_cast<arithmetic_atomic<T> *>(this)->fetch_or(arg, order);
-			}
-
-			__always_inline T
-			fetch_xor(T arg, memory_order order = memory_order_seq_cst) noexcept
-			{
-				return __atomic_fetch_xor(&this->value, arg, int(order));
-			}
-			__always_inline T fetch_xor(
-			  T            arg,
-			  memory_order order = memory_order_seq_cst) volatile noexcept
-			{
-				const_cast<arithmetic_atomic<T> *>(this)->fetch_xor(arg, order);
-			}
-
-			__always_inline T operator++() noexcept
-			{
-				return fetch_add(1) + 1;
-			}
-			__always_inline T operator++() volatile noexcept
-			{
-				return fetch_add(1) + 1;
-			}
-			__always_inline T operator++(int) noexcept
-			{
-				return fetch_add(1);
-			}
-			__always_inline T operator++(int) volatile noexcept
-			{
-				return fetch_add(1);
-			}
-			__always_inline T operator--() noexcept
-			{
-				return fetch_sub(1) - 1;
-			}
-			__always_inline T operator--() volatile noexcept
-			{
-				return fetch_sub(1) - 1;
-			}
-			__always_inline T operator--(int) noexcept
-			{
-				return fetch_sub(1);
-			}
-			__always_inline T operator--(int) volatile noexcept
-			{
-				return fetch_sub(1);
-			}
-
-			__always_inline T operator+=(T arg) noexcept
-			{
-				return fetch_add(arg) + arg;
-			}
-			__always_inline T operator+=(T arg) volatile noexcept
-			{
-				return fetch_add(arg) + arg;
-			}
-			__always_inline T operator-=(T arg) noexcept
-			{
-				return fetch_sub(arg) - arg;
-			}
-			__always_inline T operator-=(T arg) volatile noexcept
-			{
-				return fetch_sub(arg) - arg;
-			}
-
-			__always_inline T operator&=(T arg) noexcept
-			{
-				return fetch_and(arg) & arg;
-			}
-			__always_inline T operator&=(T arg) volatile noexcept
-			{
-				return fetch_and(arg) & arg;
-			}
-			__always_inline T operator|=(T arg) noexcept
-			{
-				return fetch_or(arg) | arg;
-			}
-			__always_inline T operator|=(T arg) volatile noexcept
-			{
-				return fetch_or(arg) | arg;
-			}
-			__always_inline T operator^=(T arg) noexcept
-			{
-				return fetch_xor(arg) ^ arg;
-			}
-			__always_inline T operator^=(T arg) volatile noexcept
-			{
-				return fetch_xor(arg) ^ arg;
-			}
-		};
-
-		/**
-		 * Version of atomic for pointer types.  This adds pointer arithmetic
-		 * methods.
-		 */
-		template<typename T>
-		class pointer_atomic : primitive_atomic<T>
-		{
-			public:
-			using primitive_atomic<T>::primitive_atomic;
-			using difference_type = std::ptrdiff_t;
-
-			T *fetch_add(std::ptrdiff_t arg,
-			             memory_order   order = memory_order_seq_cst) noexcept
-			{
-				return __atomic_fetch_add(
-				  &this->value, arg * sizeof(T), int(order));
-			}
-			T *fetch_add(
-			  std::ptrdiff_t arg,
-			  memory_order   order = memory_order_seq_cst) volatile noexcept
-			{
-				const_cast<pointer_atomic<T> *>(this)->fetch_add(arg,
-				                                                 int(order));
-			}
-
-			T *fetch_sub(std::ptrdiff_t arg,
-			             memory_order   order = memory_order_seq_cst) noexcept
-			{
-				return __atomic_fetch_sub(
-				  &this->value, arg * sizeof(T), int(order));
-			}
-			T *fetch_sub(
-			  std::ptrdiff_t arg,
-			  memory_order   order = memory_order_seq_cst) volatile noexcept
-			{
-				const_cast<pointer_atomic<T> *>(this)->fetch_sub(arg,
-				                                                 int(order));
-			}
-
-			__always_inline T *operator++() noexcept
-			{
-				return fetch_add(1) + 1;
-			}
-			__always_inline T *operator++() volatile noexcept
-			{
-				return fetch_add(1) + 1;
-			}
-			__always_inline T *operator++(int) noexcept
-			{
-				return fetch_add(1);
-			}
-			__always_inline T *operator++(int) volatile noexcept
-			{
-				return fetch_add(1);
-			}
-			__always_inline T *operator--() noexcept
-			{
-				return fetch_sub(1) - 1;
-			}
-			__always_inline T *operator--() volatile noexcept
-			{
-				return fetch_sub(1) - 1;
-			}
-			__always_inline T *operator--(int) noexcept
-			{
-				return fetch_sub(1);
-			}
-			__always_inline T *operator--(int) volatile noexcept
-			{
-				return fetch_sub(1);
-			}
-
-			__always_inline T *operator+=(std::ptrdiff_t arg) noexcept
-			{
-				return fetch_add(arg) + arg;
-			}
-			__always_inline T *operator+=(std::ptrdiff_t arg) volatile noexcept
-			{
-				return fetch_add(arg) + arg;
-			}
-			__always_inline T *operator-=(std::ptrdiff_t arg) noexcept
-			{
-				return fetch_sub(arg) - arg;
-			}
-			__always_inline T *operator-=(std::ptrdiff_t arg) volatile noexcept
-			{
-				return fetch_sub(arg) - arg;
-			}
-		};
-
-		/**
-		 * Simple flag lock.  This uses `primitive_atomic` to build a trivial
-		 * lock.
-		 */
-		class flag_lock
-		{
-			/**
-			 * Possible states of the flag lock.
-			 */
-			enum class LockState : uint32_t
-			{
-				/// Lock is not held.
-				Unlocked,
-				/// Lock is held, no waiters.
-				Locked,
-				/// Lock is held and one or more waiters exist.
-				LockedWithWaiters
-			};
-			/// The lock state.
-			primitive_atomic<LockState> lockWord;
-
-			public:
-			/**
-			 * Acquire the lock.  Blocks indefinitely.
-			 */
-			__noinline void lock()
-			{
-				LockState old = LockState::Unlocked;
-				while (true)
-				{
-					switch (old)
-					{
-						// If the lock is not held, try to acquire it and return
-						// if we can.
-						case LockState::Unlocked:
-							if (lockWord.compare_exchange_strong(
-							      old, LockState::Locked))
-							{
-								return;
-							}
-							break;
-							// If the lock is held, mark it as having waiters
-							// and then wait.
-						case LockState::Locked:
-							lockWord.exchange(LockState::LockedWithWaiters);
-							[[fallthrough]];
-							// If the lock is blocked with waiters, sleep
-						case LockState::LockedWithWaiters:
-							lockWord.wait(LockState::LockedWithWaiters);
-					}
-				}
-			}
-
-			/**
-			 * Release the lock, waking any waiters if there are any.
-			 */
-			__noinline void unlock()
-			{
-				auto old = lockWord.exchange(LockState::Unlocked);
-				if (old == LockState::LockedWithWaiters)
-				{
-					lockWord.notify_all();
-				}
-			}
-		};
-
-		/**
-		 * Fallback `atomic` implementation that uses a lock to protect the
-		 * value.
-		 */
-		template<typename T>
-		class locked_atomic
-		{
-			private:
-			/// The atomic value.
-			T value;
-			/// Lock protecting this object, at the end so that it can go into
-			/// padding.
-			mutable flag_lock lock;
-
-			struct guard
-			{
-				flag_lock &lock;
-				__always_inline ~guard()
-				{
-					lock.unlock();
-				}
-			};
-			__always_inline guard acquire_lock()
-			{
-				lock.lock();
-				return {lock};
-			}
-
-			public:
-			using value_type                          = T;
-			static constexpr bool is_always_lock_free = false;
-			__always_inline bool  is_lock_free() const noexcept
-			{
-				return false;
-			}
-			__always_inline bool is_lock_free() const volatile noexcept
-			{
-				return false;
-			}
-
-			constexpr locked_atomic() noexcept = default;
-			__always_inline constexpr locked_atomic(T desired) noexcept
-			{
-				value = desired;
-			}
-			locked_atomic(const locked_atomic &) = delete;
-
-			__always_inline T operator=(T desired) noexcept
-			{
-				auto g = acquire_lock();
-				value  = desired;
-				return desired;
-			}
-			__always_inline T operator=(T desired) volatile noexcept
-			{
-				return *const_cast<locked_atomic<T> *>(this) = desired;
-			}
-			locked_atomic &operator=(const locked_atomic &) = delete;
-			locked_atomic &operator=(const locked_atomic &) volatile = delete;
-
-			__always_inline void
-			store(T desired, memory_order order = memory_order_seq_cst) noexcept
-			{
-				auto g = acquire_lock();
-				value  = desired;
-			}
-			__always_inline void
-			store(T            desired,
-			      memory_order order = memory_order_seq_cst) volatile noexcept
-			{
-				const_cast<locked_atomic<T> *>(this)->store(desired, order);
-			}
-
-			__always_inline T
-			load(memory_order order = memory_order_seq_cst) const noexcept
-			{
-				auto g = acquire_lock();
-				return value;
-			}
-
-			__always_inline T load(
-			  memory_order order = memory_order_seq_cst) const volatile noexcept
-			{
-				return const_cast<locked_atomic<T> *>(this)->load(order);
-			}
-
-			__always_inline operator T() const noexcept
-			{
-				return load();
-			}
-			__always_inline operator T() const volatile noexcept
-			{
-				return load();
-			}
-
-			__always_inline T
-			exchange(T            desired,
-			         memory_order order = memory_order_seq_cst) noexcept
-			{
-				auto g   = acquire_lock();
-				T    tmp = value;
-				value    = desired;
-				return tmp;
-			}
-			__always_inline T exchange(
-			  T            desired,
-			  memory_order order = memory_order_seq_cst) volatile noexcept
-			{
-				return const_cast<locked_atomic<T> *>(this)->exchange(desired,
-				                                                      order);
-			}
-
-			__always_inline bool
-			compare_exchange_weak(T           &expected,
-			                      T            desired,
-			                      memory_order success,
-			                      memory_order failure) noexcept
-			{
-				auto g = acquire_lock();
-				if (value == expected)
-				{
-					desired = value;
-				}
-				expected = value;
-				return true;
-			}
-			__always_inline bool
-			compare_exchange_weak(T           &expected,
-			                      T            desired,
-			                      memory_order success,
-			                      memory_order failure) volatile noexcept
-			{
-				return const_cast<locked_atomic<T> *>(this)
-				  ->compare_exchange_weak(expected, desired, success, failure);
-			}
-			__always_inline bool compare_exchange_weak(
-			  T           &expected,
-			  T            desired,
-			  memory_order order = memory_order_seq_cst) noexcept
-			{
-				return compare_exchange_weak(expected, desired, order, order);
-			}
-			__always_inline bool compare_exchange_weak(
-			  T           &expected,
-			  T            desired,
-			  memory_order order = memory_order_seq_cst) volatile noexcept
-			{
-				return const_cast<locked_atomic<T> *>(this)
-				  ->compare_exchange_weak(expected, desired, order);
-			}
-
-			__always_inline bool
-			compare_exchange_strong(T           &expected,
-			                        T            desired,
-			                        memory_order success,
-			                        memory_order failure) noexcept
-			{
-				return compare_exchange_weak(
-				  expected, desired, success, failure);
-			}
-			__always_inline bool
-			compare_exchange_strong(T           &expected,
-			                        T            desired,
-			                        memory_order success,
-			                        memory_order failure) volatile noexcept
-			{
-				return const_cast<locked_atomic<T> *>(this)
-				  ->compare_exchange_strong(
-				    expected, desired, success, failure);
-			}
-			__always_inline bool compare_exchange_strong(
-			  T           &expected,
-			  T            desired,
-			  memory_order order = memory_order_seq_cst) noexcept
-			{
-				return compare_exchange_strong(expected, desired, order, order);
-			}
-
-			__always_inline bool compare_exchange_strong(
-			  T           &expected,
-			  T            desired,
-			  memory_order order = memory_order_seq_cst) volatile noexcept
-			{
-				return const_cast<locked_atomic<T> *>(this)
-				  ->compare_exchange_strong(expected, desired, order);
-			}
-		};
-	}; // namespace detail
-
-	/**
-	 * Select the correct `atomic` implementation based on the type.
-	 */
+	// Compatibility definition of CHERIoT atomics now that we have a more
+	// complete standard definition.
 	template<typename T>
-	using atomic = std::conditional_t<
-	  std::is_pointer_v<T>,
-	  detail::pointer_atomic<T>,
-	  std::conditional_t<std::is_arithmetic_v<T>,
-	                     detail::arithmetic_atomic<T>,
-	                     std::conditional_t<std::is_enum_v<T>,
-	                                        detail::primitive_atomic<T>,
-	                                        detail::locked_atomic<T>>>>;
-
+	using atomic = std::atomic<T>;
 } // namespace cheriot
-__clang_ignored_warning_pop()
diff --git a/sdk/include/cheri.hh b/sdk/include/cheri.hh
index 5a26bbc..573a903 100644
--- a/sdk/include/cheri.hh
+++ b/sdk/include/cheri.hh
@@ -1058,13 +1058,23 @@
 	 * Checks that `ptr` is valid, unsealed, does not overlap the caller's
 	 * stack, and has at least `Permissions` and has at least `Space` bytes
 	 * after the current offset.
+	 *
+	 * If the permissions do not include Global, then this will also check that
+	 * the capability does not point to the current thread's stack.  This
+	 * behaviour can be disabled (for example, for use in a shared library) by
+	 * passing `false` for `CheckStack`.
 	 */
 	template<PermissionSet Permissions = PermissionSet{Permission::Load},
-	         typename T                = void>
+	         typename T                = void,
+	         bool CheckStack           = true>
 	__always_inline inline bool check_pointer(T *ptr, size_t space = sizeof(T))
 	{
-		return detail::check_pointer_internal<!Permissions.contains(
-		  Permission::Global)>(ptr, space, Permissions.as_raw());
+		// We can skip a stack check if we've asked for Global because the
+		// stack does not have this permission.
+		constexpr bool StackCheckNeeded =
+		  CheckStack && !Permissions.contains(Permission::Global);
+		return detail::check_pointer_internal<StackCheckNeeded>(
+		  ptr, space, Permissions.as_raw());
 	}
 
 	/**
diff --git a/sdk/include/libc++/atomic b/sdk/include/libc++/atomic
deleted file mode 100644
index 0f6aee8..0000000
--- a/sdk/include/libc++/atomic
+++ /dev/null
@@ -1,2835 +0,0 @@
-// -*- C++ -*-
-//===--------------------------- atomic -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCPP_ATOMIC
-#define _LIBCPP_ATOMIC
-
-/*
-    atomic synopsis
-
-namespace std
-{
-
-// feature test macro [version.syn]
-
-#define __cpp_lib_atomic_is_always_lock_free
-#define __cpp_lib_atomic_flag_test
-#define __cpp_lib_atomic_lock_free_type_aliases
-#define __cpp_lib_atomic_wait
-
- // order and consistency
-
- enum memory_order: unspecified // enum class in C++20
- {
-    relaxed,
-    consume, // load-consume
-    acquire, // load-acquire
-    release, // store-release
-    acq_rel, // store-release load-acquire
-    seq_cst // store-release load-acquire
- };
-
- inline constexpr auto memory_order_relaxed = memory_order::relaxed;
- inline constexpr auto memory_order_consume = memory_order::consume;
- inline constexpr auto memory_order_acquire = memory_order::acquire;
- inline constexpr auto memory_order_release = memory_order::release;
- inline constexpr auto memory_order_acq_rel = memory_order::acq_rel;
- inline constexpr auto memory_order_seq_cst = memory_order::seq_cst;
-
-template <class T> T kill_dependency(T y) noexcept;
-
-// lock-free property
-
-#define ATOMIC_BOOL_LOCK_FREE unspecified
-#define ATOMIC_CHAR_LOCK_FREE unspecified
-#define ATOMIC_CHAR8_T_LOCK_FREE unspecified // C++20
-#define ATOMIC_CHAR16_T_LOCK_FREE unspecified
-#define ATOMIC_CHAR32_T_LOCK_FREE unspecified
-#define ATOMIC_WCHAR_T_LOCK_FREE unspecified
-#define ATOMIC_SHORT_LOCK_FREE unspecified
-#define ATOMIC_INT_LOCK_FREE unspecified
-#define ATOMIC_LONG_LOCK_FREE unspecified
-#define ATOMIC_LLONG_LOCK_FREE unspecified
-#define ATOMIC_POINTER_LOCK_FREE unspecified
-
-template <class T>
-struct atomic
-{
-    using value_type = T;
-
-    static constexpr bool is_always_lock_free;
-    bool is_lock_free() const volatile noexcept;
-    bool is_lock_free() const noexcept;
-
-    atomic() noexcept = default; // until C++20
-    constexpr atomic() noexcept(is_nothrow_default_constructible_v<T>); // since C++20
-    constexpr atomic(T desr) noexcept;
-    atomic(const atomic&) = delete;
-    atomic& operator=(const atomic&) = delete;
-    atomic& operator=(const atomic&) volatile = delete;
-
-    T load(memory_order m = memory_order_seq_cst) const volatile noexcept;
-    T load(memory_order m = memory_order_seq_cst) const noexcept;
-    operator T() const volatile noexcept;
-    operator T() const noexcept;
-    void store(T desr, memory_order m = memory_order_seq_cst) volatile noexcept;
-    void store(T desr, memory_order m = memory_order_seq_cst) noexcept;
-    T operator=(T) volatile noexcept;
-    T operator=(T) noexcept;
-
-    T exchange(T desr, memory_order m = memory_order_seq_cst) volatile noexcept;
-    T exchange(T desr, memory_order m = memory_order_seq_cst) noexcept;
-    bool compare_exchange_weak(T& expc, T desr,
-                               memory_order s, memory_order f) volatile noexcept;
-    bool compare_exchange_weak(T& expc, T desr, memory_order s, memory_order f) noexcept;
-    bool compare_exchange_strong(T& expc, T desr,
-                                 memory_order s, memory_order f) volatile noexcept;
-    bool compare_exchange_strong(T& expc, T desr,
-                                 memory_order s, memory_order f) noexcept;
-    bool compare_exchange_weak(T& expc, T desr,
-                               memory_order m = memory_order_seq_cst) volatile noexcept;
-    bool compare_exchange_weak(T& expc, T desr,
-                               memory_order m = memory_order_seq_cst) noexcept;
-    bool compare_exchange_strong(T& expc, T desr,
-                                memory_order m = memory_order_seq_cst) volatile noexcept;
-    bool compare_exchange_strong(T& expc, T desr,
-                                 memory_order m = memory_order_seq_cst) noexcept;
-
-    void wait(T, memory_order = memory_order::seq_cst) const volatile noexcept;
-    void wait(T, memory_order = memory_order::seq_cst) const noexcept;
-    void notify_one() volatile noexcept;
-    void notify_one() noexcept;
-    void notify_all() volatile noexcept;
-    void notify_all() noexcept;
-};
-
-template <>
-struct atomic<integral>
-{
-    using value_type = integral;
-    using difference_type = value_type;
-
-    static constexpr bool is_always_lock_free;
-    bool is_lock_free() const volatile noexcept;
-    bool is_lock_free() const noexcept;
-
-    atomic() noexcept = default;
-    constexpr atomic(integral desr) noexcept;
-    atomic(const atomic&) = delete;
-    atomic& operator=(const atomic&) = delete;
-    atomic& operator=(const atomic&) volatile = delete;
-
-    integral load(memory_order m = memory_order_seq_cst) const volatile noexcept;
-    integral load(memory_order m = memory_order_seq_cst) const noexcept;
-    operator integral() const volatile noexcept;
-    operator integral() const noexcept;
-    void store(integral desr, memory_order m = memory_order_seq_cst) volatile noexcept;
-    void store(integral desr, memory_order m = memory_order_seq_cst) noexcept;
-    integral operator=(integral desr) volatile noexcept;
-    integral operator=(integral desr) noexcept;
-
-    integral exchange(integral desr,
-                      memory_order m = memory_order_seq_cst) volatile noexcept;
-    integral exchange(integral desr, memory_order m = memory_order_seq_cst) noexcept;
-    bool compare_exchange_weak(integral& expc, integral desr,
-                               memory_order s, memory_order f) volatile noexcept;
-    bool compare_exchange_weak(integral& expc, integral desr,
-                               memory_order s, memory_order f) noexcept;
-    bool compare_exchange_strong(integral& expc, integral desr,
-                                 memory_order s, memory_order f) volatile noexcept;
-    bool compare_exchange_strong(integral& expc, integral desr,
-                                 memory_order s, memory_order f) noexcept;
-    bool compare_exchange_weak(integral& expc, integral desr,
-                               memory_order m = memory_order_seq_cst) volatile noexcept;
-    bool compare_exchange_weak(integral& expc, integral desr,
-                               memory_order m = memory_order_seq_cst) noexcept;
-    bool compare_exchange_strong(integral& expc, integral desr,
-                                memory_order m = memory_order_seq_cst) volatile noexcept;
-    bool compare_exchange_strong(integral& expc, integral desr,
-                                 memory_order m = memory_order_seq_cst) noexcept;
-
-    integral fetch_add(integral op, memory_order m = memory_order_seq_cst) volatile noexcept;
-    integral fetch_add(integral op, memory_order m = memory_order_seq_cst) noexcept;
-    integral fetch_sub(integral op, memory_order m = memory_order_seq_cst) volatile noexcept;
-    integral fetch_sub(integral op, memory_order m = memory_order_seq_cst) noexcept;
-    integral fetch_and(integral op, memory_order m = memory_order_seq_cst) volatile noexcept;
-    integral fetch_and(integral op, memory_order m = memory_order_seq_cst) noexcept;
-    integral fetch_or(integral op, memory_order m = memory_order_seq_cst) volatile noexcept;
-    integral fetch_or(integral op, memory_order m = memory_order_seq_cst) noexcept;
-    integral fetch_xor(integral op, memory_order m = memory_order_seq_cst) volatile noexcept;
-    integral fetch_xor(integral op, memory_order m = memory_order_seq_cst) noexcept;
-
-    integral operator++(int) volatile noexcept;
-    integral operator++(int) noexcept;
-    integral operator--(int) volatile noexcept;
-    integral operator--(int) noexcept;
-    integral operator++() volatile noexcept;
-    integral operator++() noexcept;
-    integral operator--() volatile noexcept;
-    integral operator--() noexcept;
-    integral operator+=(integral op) volatile noexcept;
-    integral operator+=(integral op) noexcept;
-    integral operator-=(integral op) volatile noexcept;
-    integral operator-=(integral op) noexcept;
-    integral operator&=(integral op) volatile noexcept;
-    integral operator&=(integral op) noexcept;
-    integral operator|=(integral op) volatile noexcept;
-    integral operator|=(integral op) noexcept;
-    integral operator^=(integral op) volatile noexcept;
-    integral operator^=(integral op) noexcept;
-
-    void wait(integral, memory_order = memory_order::seq_cst) const volatile noexcept;
-    void wait(integral, memory_order = memory_order::seq_cst) const noexcept;
-    void notify_one() volatile noexcept;
-    void notify_one() noexcept;
-    void notify_all() volatile noexcept;
-    void notify_all() noexcept;
-};
-
-template <class T>
-struct atomic<T*>
-{
-    using value_type = T*;
-    using difference_type = ptrdiff_t;
-
-    static constexpr bool is_always_lock_free;
-    bool is_lock_free() const volatile noexcept;
-    bool is_lock_free() const noexcept;
-
-    atomic() noexcept = default; // until C++20
-    constexpr atomic() noexcept; // since C++20
-    constexpr atomic(T* desr) noexcept;
-    atomic(const atomic&) = delete;
-    atomic& operator=(const atomic&) = delete;
-    atomic& operator=(const atomic&) volatile = delete;
-
-    T* load(memory_order m = memory_order_seq_cst) const volatile noexcept;
-    T* load(memory_order m = memory_order_seq_cst) const noexcept;
-    operator T*() const volatile noexcept;
-    operator T*() const noexcept;
-    void store(T* desr, memory_order m = memory_order_seq_cst) volatile noexcept;
-    void store(T* desr, memory_order m = memory_order_seq_cst) noexcept;
-    T* operator=(T*) volatile noexcept;
-    T* operator=(T*) noexcept;
-
-    T* exchange(T* desr, memory_order m = memory_order_seq_cst) volatile noexcept;
-    T* exchange(T* desr, memory_order m = memory_order_seq_cst) noexcept;
-    bool compare_exchange_weak(T*& expc, T* desr,
-                               memory_order s, memory_order f) volatile noexcept;
-    bool compare_exchange_weak(T*& expc, T* desr,
-                               memory_order s, memory_order f) noexcept;
-    bool compare_exchange_strong(T*& expc, T* desr,
-                                 memory_order s, memory_order f) volatile noexcept;
-    bool compare_exchange_strong(T*& expc, T* desr,
-                                 memory_order s, memory_order f) noexcept;
-    bool compare_exchange_weak(T*& expc, T* desr,
-                               memory_order m = memory_order_seq_cst) volatile noexcept;
-    bool compare_exchange_weak(T*& expc, T* desr,
-                               memory_order m = memory_order_seq_cst) noexcept;
-    bool compare_exchange_strong(T*& expc, T* desr,
-                                memory_order m = memory_order_seq_cst) volatile noexcept;
-    bool compare_exchange_strong(T*& expc, T* desr,
-                                 memory_order m = memory_order_seq_cst) noexcept;
-    T* fetch_add(ptrdiff_t op, memory_order m = memory_order_seq_cst) volatile noexcept;
-    T* fetch_add(ptrdiff_t op, memory_order m = memory_order_seq_cst) noexcept;
-    T* fetch_sub(ptrdiff_t op, memory_order m = memory_order_seq_cst) volatile noexcept;
-    T* fetch_sub(ptrdiff_t op, memory_order m = memory_order_seq_cst) noexcept;
-
-    T* operator++(int) volatile noexcept;
-    T* operator++(int) noexcept;
-    T* operator--(int) volatile noexcept;
-    T* operator--(int) noexcept;
-    T* operator++() volatile noexcept;
-    T* operator++() noexcept;
-    T* operator--() volatile noexcept;
-    T* operator--() noexcept;
-    T* operator+=(ptrdiff_t op) volatile noexcept;
-    T* operator+=(ptrdiff_t op) noexcept;
-    T* operator-=(ptrdiff_t op) volatile noexcept;
-    T* operator-=(ptrdiff_t op) noexcept;
-
-    void wait(T*, memory_order = memory_order::seq_cst) const volatile noexcept;
-    void wait(T*, memory_order = memory_order::seq_cst) const noexcept;
-    void notify_one() volatile noexcept;
-    void notify_one() noexcept;
-    void notify_all() volatile noexcept;
-    void notify_all() noexcept;
-};
-
-
-template <class T>
-  bool atomic_is_lock_free(const volatile atomic<T>* obj) noexcept;
-
-template <class T>
-  bool atomic_is_lock_free(const atomic<T>* obj) noexcept;
-
-template <class T>
-  void atomic_store(volatile atomic<T>* obj, T desr) noexcept;
-
-template <class T>
-  void atomic_store(atomic<T>* obj, T desr) noexcept;
-
-template <class T>
-  void atomic_store_explicit(volatile atomic<T>* obj, T desr, memory_order m) noexcept;
-
-template <class T>
-  void atomic_store_explicit(atomic<T>* obj, T desr, memory_order m) noexcept;
-
-template <class T>
-  T atomic_load(const volatile atomic<T>* obj) noexcept;
-
-template <class T>
-  T atomic_load(const atomic<T>* obj) noexcept;
-
-template <class T>
-  T atomic_load_explicit(const volatile atomic<T>* obj, memory_order m) noexcept;
-
-template <class T>
-  T atomic_load_explicit(const atomic<T>* obj, memory_order m) noexcept;
-
-template <class T>
-  T atomic_exchange(volatile atomic<T>* obj, T desr) noexcept;
-
-template <class T>
-  T atomic_exchange(atomic<T>* obj, T desr) noexcept;
-
-template <class T>
-  T atomic_exchange_explicit(volatile atomic<T>* obj, T desr, memory_order m) noexcept;
-
-template <class T>
-  T atomic_exchange_explicit(atomic<T>* obj, T desr, memory_order m) noexcept;
-
-template <class T>
-  bool atomic_compare_exchange_weak(volatile atomic<T>* obj, T* expc, T desr) noexcept;
-
-template <class T>
-  bool atomic_compare_exchange_weak(atomic<T>* obj, T* expc, T desr) noexcept;
-
-template <class T>
-  bool atomic_compare_exchange_strong(volatile atomic<T>* obj, T* expc, T desr) noexcept;
-
-template <class T>
-  bool atomic_compare_exchange_strong(atomic<T>* obj, T* expc, T desr) noexcept;
-
-template <class T>
-  bool atomic_compare_exchange_weak_explicit(volatile atomic<T>* obj, T* expc,
-                                             T desr,
-                                             memory_order s, memory_order f) noexcept;
-
-template <class T>
-  bool atomic_compare_exchange_weak_explicit(atomic<T>* obj, T* expc, T desr,
-                                             memory_order s, memory_order f) noexcept;
-
-template <class T>
-  bool atomic_compare_exchange_strong_explicit(volatile atomic<T>* obj,
-                                               T* expc, T desr,
-                                               memory_order s, memory_order f) noexcept;
-
-template <class T>
-  bool atomic_compare_exchange_strong_explicit(atomic<T>* obj, T* expc,
-                                               T desr,
-                                               memory_order s, memory_order f) noexcept;
-
-template <class T>
-  void atomic_wait(const volatile atomic<T>* obj, T old) noexcept;
-
-template <class T>
-  void atomic_wait(const atomic<T>* obj, T old) noexcept;
-
-template <class T>
-  void atomic_wait_explicit(const volatile atomic<T>* obj, T old, memory_order m) noexcept;
-
-template <class T>
-  void atomic_wait_explicit(const atomic<T>* obj, T old, memory_order m) noexcept;
-
-template <class T>
-  void atomic_one(volatile atomic<T>* obj) noexcept;
-
-template <class T>
-  void atomic_one(atomic<T>* obj) noexcept;
-
-template <class T>
-  void atomic_all(volatile atomic<T>* obj) noexcept;
-
-template <class T>
-  void atomic_all(atomic<T>* obj) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_add(volatile atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_add(atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_add_explicit(volatile atomic<Integral>* obj, Integral op,
-                              memory_order m) noexcept;
-template <class Integral>
-  Integral atomic_fetch_add_explicit(atomic<Integral>* obj, Integral op,
-                              memory_order m) noexcept;
-template <class Integral>
-  Integral atomic_fetch_sub(volatile atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_sub(atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_sub_explicit(volatile atomic<Integral>* obj, Integral op,
-                                     memory_order m) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_sub_explicit(atomic<Integral>* obj, Integral op,
-                                     memory_order m) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_and(volatile atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_and(atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_and_explicit(volatile atomic<Integral>* obj, Integral op,
-                                     memory_order m) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_and_explicit(atomic<Integral>* obj, Integral op,
-                                     memory_order m) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_or(volatile atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_or(atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_or_explicit(volatile atomic<Integral>* obj, Integral op,
-                             memory_order m) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_or_explicit(atomic<Integral>* obj, Integral op,
-                             memory_order m) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_xor(volatile atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_xor(atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_xor_explicit(volatile atomic<Integral>* obj, Integral op,
-                                     memory_order m) noexcept;
-
-template <class Integral>
-  Integral atomic_fetch_xor_explicit(atomic<Integral>* obj, Integral op,
-                                     memory_order m) noexcept;
-
-template <class T>
-  T* atomic_fetch_add(volatile atomic<T*>* obj, ptrdiff_t op) noexcept;
-
-template <class T>
-  T* atomic_fetch_add(atomic<T*>* obj, ptrdiff_t op) noexcept;
-
-template <class T>
-  T* atomic_fetch_add_explicit(volatile atomic<T*>* obj, ptrdiff_t op,
-                               memory_order m) noexcept;
-
-template <class T>
-  T* atomic_fetch_add_explicit(atomic<T*>* obj, ptrdiff_t op, memory_order m) noexcept;
-
-template <class T>
-  T* atomic_fetch_sub(volatile atomic<T*>* obj, ptrdiff_t op) noexcept;
-
-template <class T>
-  T* atomic_fetch_sub(atomic<T*>* obj, ptrdiff_t op) noexcept;
-
-template <class T>
-  T* atomic_fetch_sub_explicit(volatile atomic<T*>* obj, ptrdiff_t op,
-                               memory_order m) noexcept;
-
-template <class T>
-  T* atomic_fetch_sub_explicit(atomic<T*>* obj, ptrdiff_t op, memory_order m) noexcept;
-
-// Atomics for standard typedef types
-
-typedef atomic<bool>               atomic_bool;
-typedef atomic<char>               atomic_char;
-typedef atomic<signed char>        atomic_schar;
-typedef atomic<unsigned char>      atomic_uchar;
-typedef atomic<short>              atomic_short;
-typedef atomic<unsigned short>     atomic_ushort;
-typedef atomic<int>                atomic_int;
-typedef atomic<unsigned int>       atomic_uint;
-typedef atomic<long>               atomic_long;
-typedef atomic<unsigned long>      atomic_ulong;
-typedef atomic<long long>          atomic_llong;
-typedef atomic<unsigned long long> atomic_ullong;
-typedef atomic<char8_t>            atomic_char8_t; // C++20
-typedef atomic<char16_t>           atomic_char16_t;
-typedef atomic<char32_t>           atomic_char32_t;
-typedef atomic<wchar_t>            atomic_wchar_t;
-
-typedef atomic<int_least8_t>   atomic_int_least8_t;
-typedef atomic<uint_least8_t>  atomic_uint_least8_t;
-typedef atomic<int_least16_t>  atomic_int_least16_t;
-typedef atomic<uint_least16_t> atomic_uint_least16_t;
-typedef atomic<int_least32_t>  atomic_int_least32_t;
-typedef atomic<uint_least32_t> atomic_uint_least32_t;
-typedef atomic<int_least64_t>  atomic_int_least64_t;
-typedef atomic<uint_least64_t> atomic_uint_least64_t;
-
-typedef atomic<int_fast8_t>   atomic_int_fast8_t;
-typedef atomic<uint_fast8_t>  atomic_uint_fast8_t;
-typedef atomic<int_fast16_t>  atomic_int_fast16_t;
-typedef atomic<uint_fast16_t> atomic_uint_fast16_t;
-typedef atomic<int_fast32_t>  atomic_int_fast32_t;
-typedef atomic<uint_fast32_t> atomic_uint_fast32_t;
-typedef atomic<int_fast64_t>  atomic_int_fast64_t;
-typedef atomic<uint_fast64_t> atomic_uint_fast64_t;
-
-typedef atomic<int8_t>   atomic_int8_t;
-typedef atomic<uint8_t>  atomic_uint8_t;
-typedef atomic<int16_t>  atomic_int16_t;
-typedef atomic<uint16_t> atomic_uint16_t;
-typedef atomic<int32_t>  atomic_int32_t;
-typedef atomic<uint32_t> atomic_uint32_t;
-typedef atomic<int64_t>  atomic_int64_t;
-typedef atomic<uint64_t> atomic_uint64_t;
-
-typedef atomic<intptr_t>  atomic_intptr_t;
-typedef atomic<uintptr_t> atomic_uintptr_t;
-typedef atomic<size_t>    atomic_size_t;
-typedef atomic<ptrdiff_t> atomic_ptrdiff_t;
-typedef atomic<intmax_t>  atomic_intmax_t;
-typedef atomic<uintmax_t> atomic_uintmax_t;
-
-// flag type and operations
-
-typedef struct atomic_flag
-{
-    atomic_flag() noexcept = default; // until C++20
-    constexpr atomic_flag() noexcept; // since C++20
-    atomic_flag(const atomic_flag&) = delete;
-    atomic_flag& operator=(const atomic_flag&) = delete;
-    atomic_flag& operator=(const atomic_flag&) volatile = delete;
-
-    bool test(memory_order m = memory_order_seq_cst) volatile noexcept;
-    bool test(memory_order m = memory_order_seq_cst) noexcept;
-    bool test_and_set(memory_order m = memory_order_seq_cst) volatile noexcept;
-    bool test_and_set(memory_order m = memory_order_seq_cst) noexcept;
-    void clear(memory_order m = memory_order_seq_cst) volatile noexcept;
-    void clear(memory_order m = memory_order_seq_cst) noexcept;
-
-    void wait(bool, memory_order = memory_order::seq_cst) const volatile noexcept;
-    void wait(bool, memory_order = memory_order::seq_cst) const noexcept;
-    void notify_one() volatile noexcept;
-    void notify_one() noexcept;
-    void notify_all() volatile noexcept;
-    void notify_all() noexcept;
-} atomic_flag;
-
-bool atomic_flag_test(volatile atomic_flag* obj) noexcept;
-bool atomic_flag_test(atomic_flag* obj) noexcept;
-bool atomic_flag_test_explicit(volatile atomic_flag* obj,
-                               memory_order m) noexcept;
-bool atomic_flag_test_explicit(atomic_flag* obj, memory_order m) noexcept;
-bool atomic_flag_test_and_set(volatile atomic_flag* obj) noexcept;
-bool atomic_flag_test_and_set(atomic_flag* obj) noexcept;
-bool atomic_flag_test_and_set_explicit(volatile atomic_flag* obj,
-                                       memory_order m) noexcept;
-bool atomic_flag_test_and_set_explicit(atomic_flag* obj, memory_order m) noexcept;
-void atomic_flag_clear(volatile atomic_flag* obj) noexcept;
-void atomic_flag_clear(atomic_flag* obj) noexcept;
-void atomic_flag_clear_explicit(volatile atomic_flag* obj, memory_order m) noexcept;
-void atomic_flag_clear_explicit(atomic_flag* obj, memory_order m) noexcept;
-
-void atomic_wait(const volatile atomic_flag* obj, T old) noexcept;
-void atomic_wait(const atomic_flag* obj, T old) noexcept;
-void atomic_wait_explicit(const volatile atomic_flag* obj, T old, memory_order m) noexcept;
-void atomic_wait_explicit(const atomic_flag* obj, T old, memory_order m) noexcept;
-void atomic_one(volatile atomic_flag* obj) noexcept;
-void atomic_one(atomic_flag* obj) noexcept;
-void atomic_all(volatile atomic_flag* obj) noexcept;
-void atomic_all(atomic_flag* obj) noexcept;
-
-// fences
-
-void atomic_thread_fence(memory_order m) noexcept;
-void atomic_signal_fence(memory_order m) noexcept;
-
-// deprecated
-
-template <class T>
-  void atomic_init(volatile atomic<T>* obj, typename atomic<T>::value_type desr) noexcept;
-
-template <class T>
-  void atomic_init(atomic<T>* obj, typename atomic<T>::value_type desr) noexcept;
-
-#define ATOMIC_VAR_INIT(value) see below
-
-#define ATOMIC_FLAG_INIT see below
-
-}  // std
-
-*/
-
-#include <__availability>
-#include <__config>
-#include <__threading_support>
-#include <cstddef>
-#include <cstdint>
-#include <cstring>
-#include <type_traits>
-#include <version>
-
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#pragma GCC system_header
-#endif
-
-#ifdef _LIBCPP_HAS_NO_THREADS
-# error <atomic> is not supported on this single threaded system
-#endif
-#ifdef _LIBCPP_HAS_NO_ATOMIC_HEADER
-# error <atomic> is not implemented
-#endif
-#ifdef kill_dependency
-# error C++ standard library is incompatible with <stdatomic.h>
-#endif
-
-#define _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m) \
-  _LIBCPP_DIAGNOSE_WARNING(__m == memory_order_consume || \
-                           __m == memory_order_acquire || \
-                           __m == memory_order_acq_rel,   \
-                        "memory order argument to atomic operation is invalid")
-
-#define _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) \
-  _LIBCPP_DIAGNOSE_WARNING(__m == memory_order_release || \
-                           __m == memory_order_acq_rel,   \
-                        "memory order argument to atomic operation is invalid")
-
-#define _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__m, __f) \
-  _LIBCPP_DIAGNOSE_WARNING(__f == memory_order_release || \
-                           __f == memory_order_acq_rel,   \
-                        "memory order argument to atomic operation is invalid")
-
-_LIBCPP_BEGIN_NAMESPACE_STD
-
-// Figure out what the underlying type for `memory_order` would be if it were
-// declared as an unscoped enum (accounting for -fshort-enums). Use this result
-// to pin the underlying type in C++20.
-enum __legacy_memory_order {
-    __mo_relaxed,
-    __mo_consume,
-    __mo_acquire,
-    __mo_release,
-    __mo_acq_rel,
-    __mo_seq_cst
-};
-
-typedef underlying_type<__legacy_memory_order>::type __memory_order_underlying_t;
-
-#if _LIBCPP_STD_VER > 17
-
-enum class memory_order : __memory_order_underlying_t {
-  relaxed = __mo_relaxed,
-  consume = __mo_consume,
-  acquire = __mo_acquire,
-  release = __mo_release,
-  acq_rel = __mo_acq_rel,
-  seq_cst = __mo_seq_cst
-};
-
-inline constexpr auto memory_order_relaxed = memory_order::relaxed;
-inline constexpr auto memory_order_consume = memory_order::consume;
-inline constexpr auto memory_order_acquire = memory_order::acquire;
-inline constexpr auto memory_order_release = memory_order::release;
-inline constexpr auto memory_order_acq_rel = memory_order::acq_rel;
-inline constexpr auto memory_order_seq_cst = memory_order::seq_cst;
-
-#else
-
-typedef enum memory_order {
-  memory_order_relaxed = __mo_relaxed,
-  memory_order_consume = __mo_consume,
-  memory_order_acquire = __mo_acquire,
-  memory_order_release = __mo_release,
-  memory_order_acq_rel = __mo_acq_rel,
-  memory_order_seq_cst = __mo_seq_cst,
-} memory_order;
-
-#endif // _LIBCPP_STD_VER > 17
-
-template <typename _Tp> _LIBCPP_INLINE_VISIBILITY
-bool __cxx_nonatomic_compare_equal(_Tp const& __lhs, _Tp const& __rhs) {
-    return _VSTD::memcmp(&__lhs, &__rhs, sizeof(_Tp)) == 0;
-}
-
-static_assert((is_same<underlying_type<memory_order>::type, __memory_order_underlying_t>::value),
-  "unexpected underlying type for std::memory_order");
-
-#if defined(_LIBCPP_HAS_GCC_ATOMIC_IMP) || \
-    defined(_LIBCPP_ATOMIC_ONLY_USE_BUILTINS)
-
-// [atomics.types.generic]p1 guarantees _Tp is trivially copyable. Because
-// the default operator= in an object is not volatile, a byte-by-byte copy
-// is required.
-template <typename _Tp, typename _Tv> _LIBCPP_INLINE_VISIBILITY
-typename enable_if<is_assignable<_Tp&, _Tv>::value>::type
-__cxx_atomic_assign_volatile(_Tp& __a_value, _Tv const& __val) {
-  __a_value = __val;
-}
-template <typename _Tp, typename _Tv> _LIBCPP_INLINE_VISIBILITY
-typename enable_if<is_assignable<_Tp&, _Tv>::value>::type
-__cxx_atomic_assign_volatile(_Tp volatile& __a_value, _Tv volatile const& __val) {
-  volatile char* __to = reinterpret_cast<volatile char*>(&__a_value);
-  volatile char* __end = __to + sizeof(_Tp);
-  volatile const char* __from = reinterpret_cast<volatile const char*>(&__val);
-  while (__to != __end)
-    *__to++ = *__from++;
-}
-
-#endif
-
-#if defined(_LIBCPP_HAS_GCC_ATOMIC_IMP)
-
-template <typename _Tp>
-struct __cxx_atomic_base_impl {
-
-  _LIBCPP_INLINE_VISIBILITY
-#ifndef _LIBCPP_CXX03_LANG
-    __cxx_atomic_base_impl() _NOEXCEPT = default;
-#else
-    __cxx_atomic_base_impl() _NOEXCEPT : __a_value() {}
-#endif // _LIBCPP_CXX03_LANG
-  _LIBCPP_CONSTEXPR explicit __cxx_atomic_base_impl(_Tp value) _NOEXCEPT
-    : __a_value(value) {}
-  _Tp __a_value;
-};
-
-_LIBCPP_INLINE_VISIBILITY inline _LIBCPP_CONSTEXPR int __to_gcc_order(memory_order __order) {
-  // Avoid switch statement to make this a constexpr.
-  return __order == memory_order_relaxed ? __ATOMIC_RELAXED:
-         (__order == memory_order_acquire ? __ATOMIC_ACQUIRE:
-          (__order == memory_order_release ? __ATOMIC_RELEASE:
-           (__order == memory_order_seq_cst ? __ATOMIC_SEQ_CST:
-            (__order == memory_order_acq_rel ? __ATOMIC_ACQ_REL:
-              __ATOMIC_CONSUME))));
-}
-
-_LIBCPP_INLINE_VISIBILITY inline _LIBCPP_CONSTEXPR int __to_gcc_failure_order(memory_order __order) {
-  // Avoid switch statement to make this a constexpr.
-  return __order == memory_order_relaxed ? __ATOMIC_RELAXED:
-         (__order == memory_order_acquire ? __ATOMIC_ACQUIRE:
-          (__order == memory_order_release ? __ATOMIC_RELAXED:
-           (__order == memory_order_seq_cst ? __ATOMIC_SEQ_CST:
-            (__order == memory_order_acq_rel ? __ATOMIC_ACQUIRE:
-              __ATOMIC_CONSUME))));
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void __cxx_atomic_init(volatile __cxx_atomic_base_impl<_Tp>* __a,  _Tp __val) {
-  __cxx_atomic_assign_volatile(__a->__a_value, __val);
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void __cxx_atomic_init(__cxx_atomic_base_impl<_Tp>* __a,  _Tp __val) {
-  __a->__a_value = __val;
-}
-
-_LIBCPP_INLINE_VISIBILITY inline
-void __cxx_atomic_thread_fence(memory_order __order) {
-  __atomic_thread_fence(__to_gcc_order(__order));
-}
-
-_LIBCPP_INLINE_VISIBILITY inline
-void __cxx_atomic_signal_fence(memory_order __order) {
-  __atomic_signal_fence(__to_gcc_order(__order));
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void __cxx_atomic_store(volatile __cxx_atomic_base_impl<_Tp>* __a,  _Tp __val,
-                        memory_order __order) {
-  __atomic_store(&__a->__a_value, &__val,
-                 __to_gcc_order(__order));
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void __cxx_atomic_store(__cxx_atomic_base_impl<_Tp>* __a,  _Tp __val,
-                        memory_order __order) {
-  __atomic_store(&__a->__a_value, &__val,
-                 __to_gcc_order(__order));
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_load(const volatile __cxx_atomic_base_impl<_Tp>* __a,
-                      memory_order __order) {
-  _Tp __ret;
-  __atomic_load(&__a->__a_value, &__ret,
-                __to_gcc_order(__order));
-  return __ret;
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_load(const __cxx_atomic_base_impl<_Tp>* __a, memory_order __order) {
-  _Tp __ret;
-  __atomic_load(&__a->__a_value, &__ret,
-                __to_gcc_order(__order));
-  return __ret;
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_exchange(volatile __cxx_atomic_base_impl<_Tp>* __a,
-                          _Tp __value, memory_order __order) {
-  _Tp __ret;
-  __atomic_exchange(&__a->__a_value, &__value, &__ret,
-                    __to_gcc_order(__order));
-  return __ret;
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_exchange(__cxx_atomic_base_impl<_Tp>* __a, _Tp __value,
-                          memory_order __order) {
-  _Tp __ret;
-  __atomic_exchange(&__a->__a_value, &__value, &__ret,
-                    __to_gcc_order(__order));
-  return __ret;
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool __cxx_atomic_compare_exchange_strong(
-    volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp* __expected, _Tp __value,
-    memory_order __success, memory_order __failure) {
-  return __atomic_compare_exchange(&__a->__a_value, __expected, &__value,
-                                   false,
-                                   __to_gcc_order(__success),
-                                   __to_gcc_failure_order(__failure));
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool __cxx_atomic_compare_exchange_strong(
-    __cxx_atomic_base_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order __success,
-    memory_order __failure) {
-  return __atomic_compare_exchange(&__a->__a_value, __expected, &__value,
-                                   false,
-                                   __to_gcc_order(__success),
-                                   __to_gcc_failure_order(__failure));
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool __cxx_atomic_compare_exchange_weak(
-    volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp* __expected, _Tp __value,
-    memory_order __success, memory_order __failure) {
-  return __atomic_compare_exchange(&__a->__a_value, __expected, &__value,
-                                   true,
-                                   __to_gcc_order(__success),
-                                   __to_gcc_failure_order(__failure));
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool __cxx_atomic_compare_exchange_weak(
-    __cxx_atomic_base_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order __success,
-    memory_order __failure) {
-  return __atomic_compare_exchange(&__a->__a_value, __expected, &__value,
-                                   true,
-                                   __to_gcc_order(__success),
-                                   __to_gcc_failure_order(__failure));
-}
-
-template <typename _Tp>
-struct __skip_amt { enum {value = 1}; };
-
-template <typename _Tp>
-struct __skip_amt<_Tp*> { enum {value = sizeof(_Tp)}; };
-
-// FIXME: Haven't figured out what the spec says about using arrays with
-// atomic_fetch_add. Force a failure rather than creating bad behavior.
-template <typename _Tp>
-struct __skip_amt<_Tp[]> { };
-template <typename _Tp, int n>
-struct __skip_amt<_Tp[n]> { };
-
-template <typename _Tp, typename _Td>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_add(volatile __cxx_atomic_base_impl<_Tp>* __a,
-                           _Td __delta, memory_order __order) {
-  return __atomic_fetch_add(&__a->__a_value, __delta * __skip_amt<_Tp>::value,
-                            __to_gcc_order(__order));
-}
-
-template <typename _Tp, typename _Td>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp>* __a, _Td __delta,
-                           memory_order __order) {
-  return __atomic_fetch_add(&__a->__a_value, __delta * __skip_amt<_Tp>::value,
-                            __to_gcc_order(__order));
-}
-
-template <typename _Tp, typename _Td>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_sub(volatile __cxx_atomic_base_impl<_Tp>* __a,
-                           _Td __delta, memory_order __order) {
-  return __atomic_fetch_sub(&__a->__a_value, __delta * __skip_amt<_Tp>::value,
-                            __to_gcc_order(__order));
-}
-
-template <typename _Tp, typename _Td>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp>* __a, _Td __delta,
-                           memory_order __order) {
-  return __atomic_fetch_sub(&__a->__a_value, __delta * __skip_amt<_Tp>::value,
-                            __to_gcc_order(__order));
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_and(volatile __cxx_atomic_base_impl<_Tp>* __a,
-                           _Tp __pattern, memory_order __order) {
-  return __atomic_fetch_and(&__a->__a_value, __pattern,
-                            __to_gcc_order(__order));
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_and(__cxx_atomic_base_impl<_Tp>* __a,
-                           _Tp __pattern, memory_order __order) {
-  return __atomic_fetch_and(&__a->__a_value, __pattern,
-                            __to_gcc_order(__order));
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_or(volatile __cxx_atomic_base_impl<_Tp>* __a,
-                          _Tp __pattern, memory_order __order) {
-  return __atomic_fetch_or(&__a->__a_value, __pattern,
-                           __to_gcc_order(__order));
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_or(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern,
-                          memory_order __order) {
-  return __atomic_fetch_or(&__a->__a_value, __pattern,
-                           __to_gcc_order(__order));
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_xor(volatile __cxx_atomic_base_impl<_Tp>* __a,
-                           _Tp __pattern, memory_order __order) {
-  return __atomic_fetch_xor(&__a->__a_value, __pattern,
-                            __to_gcc_order(__order));
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_xor(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern,
-                           memory_order __order) {
-  return __atomic_fetch_xor(&__a->__a_value, __pattern,
-                            __to_gcc_order(__order));
-}
-
-#define __cxx_atomic_is_lock_free(__s) __atomic_is_lock_free(__s, 0)
-
-#elif defined(_LIBCPP_HAS_C_ATOMIC_IMP)
-
-template <typename _Tp>
-struct __cxx_atomic_base_impl {
-
-  _LIBCPP_INLINE_VISIBILITY
-#ifndef _LIBCPP_CXX03_LANG
-    __cxx_atomic_base_impl() _NOEXCEPT = default;
-#else
-    __cxx_atomic_base_impl() _NOEXCEPT : __a_value() {}
-#endif // _LIBCPP_CXX03_LANG
-  _LIBCPP_CONSTEXPR explicit __cxx_atomic_base_impl(_Tp value) _NOEXCEPT
-    : __a_value(value) {}
-  _LIBCPP_DISABLE_EXTENSION_WARNING _Atomic(_Tp) __a_value;
-};
-
-#define __cxx_atomic_is_lock_free(__s) __c11_atomic_is_lock_free(__s)
-
-_LIBCPP_INLINE_VISIBILITY inline
-void __cxx_atomic_thread_fence(memory_order __order) _NOEXCEPT {
-    __c11_atomic_thread_fence(static_cast<__memory_order_underlying_t>(__order));
-}
-
-_LIBCPP_INLINE_VISIBILITY inline
-void __cxx_atomic_signal_fence(memory_order __order) _NOEXCEPT {
-    __c11_atomic_signal_fence(static_cast<__memory_order_underlying_t>(__order));
-}
-
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void __cxx_atomic_init(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __val) _NOEXCEPT {
-    __c11_atomic_init(&__a->__a_value, __val);
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void __cxx_atomic_init(__cxx_atomic_base_impl<_Tp> * __a, _Tp __val) _NOEXCEPT {
-    __c11_atomic_init(&__a->__a_value, __val);
-}
-
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void __cxx_atomic_store(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __val, memory_order __order) _NOEXCEPT {
-    __c11_atomic_store(&__a->__a_value, __val, static_cast<__memory_order_underlying_t>(__order));
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void __cxx_atomic_store(__cxx_atomic_base_impl<_Tp> * __a, _Tp __val, memory_order __order) _NOEXCEPT {
-    __c11_atomic_store(&__a->__a_value, __val, static_cast<__memory_order_underlying_t>(__order));
-}
-
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_load(__cxx_atomic_base_impl<_Tp> const volatile* __a, memory_order __order) _NOEXCEPT {
-    using __ptr_type = typename remove_const<decltype(__a->__a_value)>::type*;
-    return __c11_atomic_load(const_cast<__ptr_type>(&__a->__a_value), static_cast<__memory_order_underlying_t>(__order));
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_load(__cxx_atomic_base_impl<_Tp> const* __a, memory_order __order) _NOEXCEPT {
-    using __ptr_type = typename remove_const<decltype(__a->__a_value)>::type*;
-    return __c11_atomic_load(const_cast<__ptr_type>(&__a->__a_value), static_cast<__memory_order_underlying_t>(__order));
-}
-
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_exchange(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __value, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_exchange(&__a->__a_value, __value, static_cast<__memory_order_underlying_t>(__order));
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_exchange(__cxx_atomic_base_impl<_Tp> * __a, _Tp __value, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_exchange(&__a->__a_value, __value, static_cast<__memory_order_underlying_t>(__order));
-}
-
-_LIBCPP_INLINE_VISIBILITY inline _LIBCPP_CONSTEXPR memory_order __to_failure_order(memory_order __order) {
-  // Avoid switch statement to make this a constexpr.
-  return __order == memory_order_release ? memory_order_relaxed:
-         (__order == memory_order_acq_rel ? memory_order_acquire:
-             __order);
-}
-
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool __cxx_atomic_compare_exchange_strong(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) _NOEXCEPT {
-    return __c11_atomic_compare_exchange_strong(&__a->__a_value, __expected, __value, static_cast<__memory_order_underlying_t>(__success), static_cast<__memory_order_underlying_t>(__to_failure_order(__failure)));
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool __cxx_atomic_compare_exchange_strong(__cxx_atomic_base_impl<_Tp> * __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) _NOEXCEPT {
-    return __c11_atomic_compare_exchange_strong(&__a->__a_value, __expected, __value, static_cast<__memory_order_underlying_t>(__success), static_cast<__memory_order_underlying_t>(__to_failure_order(__failure)));
-}
-
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool __cxx_atomic_compare_exchange_weak(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) _NOEXCEPT {
-    return __c11_atomic_compare_exchange_weak(&__a->__a_value, __expected, __value, static_cast<__memory_order_underlying_t>(__success), static_cast<__memory_order_underlying_t>(__to_failure_order(__failure)));
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool __cxx_atomic_compare_exchange_weak(__cxx_atomic_base_impl<_Tp> * __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) _NOEXCEPT {
-    return __c11_atomic_compare_exchange_weak(&__a->__a_value, __expected, __value,  static_cast<__memory_order_underlying_t>(__success), static_cast<__memory_order_underlying_t>(__to_failure_order(__failure)));
-}
-
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __delta, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_add(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order));
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp> * __a, _Tp __delta, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_add(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order));
-}
-
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp* __cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp*> volatile* __a, ptrdiff_t __delta, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_add(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order));
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp* __cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp*> * __a, ptrdiff_t __delta, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_add(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order));
-}
-
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __delta, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_sub(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order));
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp> * __a, _Tp __delta, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_sub(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order));
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp* __cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp*> volatile* __a, ptrdiff_t __delta, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_sub(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order));
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp* __cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp*> * __a, ptrdiff_t __delta, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_sub(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order));
-}
-
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_and(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __pattern, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_and(&__a->__a_value, __pattern, static_cast<__memory_order_underlying_t>(__order));
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_and(__cxx_atomic_base_impl<_Tp> * __a, _Tp __pattern, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_and(&__a->__a_value, __pattern, static_cast<__memory_order_underlying_t>(__order));
-}
-
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_or(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __pattern, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_or(&__a->__a_value, __pattern, static_cast<__memory_order_underlying_t>(__order));
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_or(__cxx_atomic_base_impl<_Tp> * __a, _Tp __pattern, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_or(&__a->__a_value, __pattern, static_cast<__memory_order_underlying_t>(__order));
-}
-
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_xor(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __pattern, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_xor(&__a->__a_value, __pattern, static_cast<__memory_order_underlying_t>(__order));
-}
-template<class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_xor(__cxx_atomic_base_impl<_Tp> * __a, _Tp __pattern, memory_order __order) _NOEXCEPT {
-    return __c11_atomic_fetch_xor(&__a->__a_value, __pattern, static_cast<__memory_order_underlying_t>(__order));
-}
-
-#endif // _LIBCPP_HAS_GCC_ATOMIC_IMP, _LIBCPP_HAS_C_ATOMIC_IMP
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp kill_dependency(_Tp __y) _NOEXCEPT
-{
-    return __y;
-}
-
-#if defined(__CLANG_ATOMIC_BOOL_LOCK_FREE)
-# define ATOMIC_BOOL_LOCK_FREE      __CLANG_ATOMIC_BOOL_LOCK_FREE
-# define ATOMIC_CHAR_LOCK_FREE      __CLANG_ATOMIC_CHAR_LOCK_FREE
-#ifndef _LIBCPP_HAS_NO_CHAR8_T
-# define ATOMIC_CHAR8_T_LOCK_FREE   __CLANG_ATOMIC_CHAR8_T_LOCK_FREE
-#endif
-# define ATOMIC_CHAR16_T_LOCK_FREE  __CLANG_ATOMIC_CHAR16_T_LOCK_FREE
-# define ATOMIC_CHAR32_T_LOCK_FREE  __CLANG_ATOMIC_CHAR32_T_LOCK_FREE
-# define ATOMIC_WCHAR_T_LOCK_FREE   __CLANG_ATOMIC_WCHAR_T_LOCK_FREE
-# define ATOMIC_SHORT_LOCK_FREE     __CLANG_ATOMIC_SHORT_LOCK_FREE
-# define ATOMIC_INT_LOCK_FREE       __CLANG_ATOMIC_INT_LOCK_FREE
-# define ATOMIC_LONG_LOCK_FREE      __CLANG_ATOMIC_LONG_LOCK_FREE
-# define ATOMIC_LLONG_LOCK_FREE     __CLANG_ATOMIC_LLONG_LOCK_FREE
-# define ATOMIC_POINTER_LOCK_FREE   __CLANG_ATOMIC_POINTER_LOCK_FREE
-#elif defined(__GCC_ATOMIC_BOOL_LOCK_FREE)
-# define ATOMIC_BOOL_LOCK_FREE      __GCC_ATOMIC_BOOL_LOCK_FREE
-# define ATOMIC_CHAR_LOCK_FREE      __GCC_ATOMIC_CHAR_LOCK_FREE
-#ifndef _LIBCPP_HAS_NO_CHAR8_T
-# define ATOMIC_CHAR8_T_LOCK_FREE   __GCC_ATOMIC_CHAR8_T_LOCK_FREE
-#endif
-# define ATOMIC_CHAR16_T_LOCK_FREE  __GCC_ATOMIC_CHAR16_T_LOCK_FREE
-# define ATOMIC_CHAR32_T_LOCK_FREE  __GCC_ATOMIC_CHAR32_T_LOCK_FREE
-# define ATOMIC_WCHAR_T_LOCK_FREE   __GCC_ATOMIC_WCHAR_T_LOCK_FREE
-# define ATOMIC_SHORT_LOCK_FREE     __GCC_ATOMIC_SHORT_LOCK_FREE
-# define ATOMIC_INT_LOCK_FREE       __GCC_ATOMIC_INT_LOCK_FREE
-# define ATOMIC_LONG_LOCK_FREE      __GCC_ATOMIC_LONG_LOCK_FREE
-# define ATOMIC_LLONG_LOCK_FREE     __GCC_ATOMIC_LLONG_LOCK_FREE
-# define ATOMIC_POINTER_LOCK_FREE   __GCC_ATOMIC_POINTER_LOCK_FREE
-#endif
-
-#ifdef _LIBCPP_ATOMIC_ONLY_USE_BUILTINS
-
-template<typename _Tp>
-struct __cxx_atomic_lock_impl {
-
-  _LIBCPP_INLINE_VISIBILITY
-  __cxx_atomic_lock_impl() _NOEXCEPT
-    : __a_value(), __a_lock(0) {}
-  _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR explicit
-  __cxx_atomic_lock_impl(_Tp value) _NOEXCEPT
-    : __a_value(value), __a_lock(0) {}
-
-  _Tp __a_value;
-  mutable __cxx_atomic_base_impl<_LIBCPP_ATOMIC_FLAG_TYPE> __a_lock;
-
-  _LIBCPP_INLINE_VISIBILITY void __lock() const volatile {
-    while(1 == __cxx_atomic_exchange(&__a_lock, _LIBCPP_ATOMIC_FLAG_TYPE(true), memory_order_acquire))
-        /*spin*/;
-  }
-  _LIBCPP_INLINE_VISIBILITY void __lock() const {
-    while(1 == __cxx_atomic_exchange(&__a_lock, _LIBCPP_ATOMIC_FLAG_TYPE(true), memory_order_acquire))
-        /*spin*/;
-  }
-  _LIBCPP_INLINE_VISIBILITY void __unlock() const volatile {
-    __cxx_atomic_store(&__a_lock, _LIBCPP_ATOMIC_FLAG_TYPE(false), memory_order_release);
-  }
-  _LIBCPP_INLINE_VISIBILITY void __unlock() const {
-    __cxx_atomic_store(&__a_lock, _LIBCPP_ATOMIC_FLAG_TYPE(false), memory_order_release);
-  }
-  _LIBCPP_INLINE_VISIBILITY _Tp __read() const volatile {
-    __lock();
-    _Tp __old;
-    __cxx_atomic_assign_volatile(__old, __a_value);
-    __unlock();
-    return __old;
-  }
-  _LIBCPP_INLINE_VISIBILITY _Tp __read() const {
-    __lock();
-    _Tp __old = __a_value;
-    __unlock();
-    return __old;
-  }
-};
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void __cxx_atomic_init(volatile __cxx_atomic_lock_impl<_Tp>* __a,  _Tp __val) {
-  __cxx_atomic_assign_volatile(__a->__a_value, __val);
-}
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void __cxx_atomic_init(__cxx_atomic_lock_impl<_Tp>* __a,  _Tp __val) {
-  __a->__a_value = __val;
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void __cxx_atomic_store(volatile __cxx_atomic_lock_impl<_Tp>* __a,  _Tp __val, memory_order) {
-  __a->__lock();
-  __cxx_atomic_assign_volatile(__a->__a_value, __val);
-  __a->__unlock();
-}
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void __cxx_atomic_store(__cxx_atomic_lock_impl<_Tp>* __a,  _Tp __val, memory_order) {
-  __a->__lock();
-  __a->__a_value = __val;
-  __a->__unlock();
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_load(const volatile __cxx_atomic_lock_impl<_Tp>* __a, memory_order) {
-  return __a->__read();
-}
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_load(const __cxx_atomic_lock_impl<_Tp>* __a, memory_order) {
-  return __a->__read();
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_exchange(volatile __cxx_atomic_lock_impl<_Tp>* __a, _Tp __value, memory_order) {
-  __a->__lock();
-  _Tp __old;
-  __cxx_atomic_assign_volatile(__old, __a->__a_value);
-  __cxx_atomic_assign_volatile(__a->__a_value, __value);
-  __a->__unlock();
-  return __old;
-}
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_exchange(__cxx_atomic_lock_impl<_Tp>* __a, _Tp __value, memory_order) {
-  __a->__lock();
-  _Tp __old = __a->__a_value;
-  __a->__a_value = __value;
-  __a->__unlock();
-  return __old;
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool __cxx_atomic_compare_exchange_strong(volatile __cxx_atomic_lock_impl<_Tp>* __a,
-                                          _Tp* __expected, _Tp __value, memory_order, memory_order) {
-  _Tp __temp;
-  __a->__lock();
-  __cxx_atomic_assign_volatile(__temp, __a->__a_value);
-  bool __ret = (_VSTD::memcmp(&__temp, __expected, sizeof(_Tp)) == 0);
-  if(__ret)
-    __cxx_atomic_assign_volatile(__a->__a_value, __value);
-  else
-    __cxx_atomic_assign_volatile(*__expected, __a->__a_value);
-  __a->__unlock();
-  return __ret;
-}
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool __cxx_atomic_compare_exchange_strong(__cxx_atomic_lock_impl<_Tp>* __a,
-                                          _Tp* __expected, _Tp __value, memory_order, memory_order) {
-  __a->__lock();
-  bool __ret = (_VSTD::memcmp(&__a->__a_value, __expected, sizeof(_Tp)) == 0);
-  if(__ret)
-    _VSTD::memcpy(&__a->__a_value, &__value, sizeof(_Tp));
-  else
-    _VSTD::memcpy(__expected, &__a->__a_value, sizeof(_Tp));
-  __a->__unlock();
-  return __ret;
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool __cxx_atomic_compare_exchange_weak(volatile __cxx_atomic_lock_impl<_Tp>* __a,
-                                        _Tp* __expected, _Tp __value, memory_order, memory_order) {
-  _Tp __temp;
-  __a->__lock();
-  __cxx_atomic_assign_volatile(__temp, __a->__a_value);
-  bool __ret = (_VSTD::memcmp(&__temp, __expected, sizeof(_Tp)) == 0);
-  if(__ret)
-    __cxx_atomic_assign_volatile(__a->__a_value, __value);
-  else
-    __cxx_atomic_assign_volatile(*__expected, __a->__a_value);
-  __a->__unlock();
-  return __ret;
-}
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool __cxx_atomic_compare_exchange_weak(__cxx_atomic_lock_impl<_Tp>* __a,
-                                        _Tp* __expected, _Tp __value, memory_order, memory_order) {
-  __a->__lock();
-  bool __ret = (_VSTD::memcmp(&__a->__a_value, __expected, sizeof(_Tp)) == 0);
-  if(__ret)
-    _VSTD::memcpy(&__a->__a_value, &__value, sizeof(_Tp));
-  else
-    _VSTD::memcpy(__expected, &__a->__a_value, sizeof(_Tp));
-  __a->__unlock();
-  return __ret;
-}
-
-template <typename _Tp, typename _Td>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp>* __a,
-                           _Td __delta, memory_order) {
-  __a->__lock();
-  _Tp __old;
-  __cxx_atomic_assign_volatile(__old, __a->__a_value);
-  __cxx_atomic_assign_volatile(__a->__a_value, _Tp(__old + __delta));
-  __a->__unlock();
-  return __old;
-}
-template <typename _Tp, typename _Td>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp>* __a,
-                           _Td __delta, memory_order) {
-  __a->__lock();
-  _Tp __old = __a->__a_value;
-  __a->__a_value += __delta;
-  __a->__unlock();
-  return __old;
-}
-
-template <typename _Tp, typename _Td>
-_LIBCPP_INLINE_VISIBILITY
-_Tp* __cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp*>* __a,
-                           ptrdiff_t __delta, memory_order) {
-  __a->__lock();
-  _Tp* __old;
-  __cxx_atomic_assign_volatile(__old, __a->__a_value);
-  __cxx_atomic_assign_volatile(__a->__a_value, __old + __delta);
-  __a->__unlock();
-  return __old;
-}
-template <typename _Tp, typename _Td>
-_LIBCPP_INLINE_VISIBILITY
-_Tp* __cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp*>* __a,
-                           ptrdiff_t __delta, memory_order) {
-  __a->__lock();
-  _Tp* __old = __a->__a_value;
-  __a->__a_value += __delta;
-  __a->__unlock();
-  return __old;
-}
-
-template <typename _Tp, typename _Td>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_sub(volatile __cxx_atomic_lock_impl<_Tp>* __a,
-                           _Td __delta, memory_order) {
-  __a->__lock();
-  _Tp __old;
-  __cxx_atomic_assign_volatile(__old, __a->__a_value);
-  __cxx_atomic_assign_volatile(__a->__a_value, _Tp(__old - __delta));
-  __a->__unlock();
-  return __old;
-}
-template <typename _Tp, typename _Td>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_sub(__cxx_atomic_lock_impl<_Tp>* __a,
-                           _Td __delta, memory_order) {
-  __a->__lock();
-  _Tp __old = __a->__a_value;
-  __a->__a_value -= __delta;
-  __a->__unlock();
-  return __old;
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_and(volatile __cxx_atomic_lock_impl<_Tp>* __a,
-                           _Tp __pattern, memory_order) {
-  __a->__lock();
-  _Tp __old;
-  __cxx_atomic_assign_volatile(__old, __a->__a_value);
-  __cxx_atomic_assign_volatile(__a->__a_value, _Tp(__old & __pattern));
-  __a->__unlock();
-  return __old;
-}
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_and(__cxx_atomic_lock_impl<_Tp>* __a,
-                           _Tp __pattern, memory_order) {
-  __a->__lock();
-  _Tp __old = __a->__a_value;
-  __a->__a_value &= __pattern;
-  __a->__unlock();
-  return __old;
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_or(volatile __cxx_atomic_lock_impl<_Tp>* __a,
-                          _Tp __pattern, memory_order) {
-  __a->__lock();
-  _Tp __old;
-  __cxx_atomic_assign_volatile(__old, __a->__a_value);
-  __cxx_atomic_assign_volatile(__a->__a_value, _Tp(__old | __pattern));
-  __a->__unlock();
-  return __old;
-}
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_or(__cxx_atomic_lock_impl<_Tp>* __a,
-                          _Tp __pattern, memory_order) {
-  __a->__lock();
-  _Tp __old = __a->__a_value;
-  __a->__a_value |= __pattern;
-  __a->__unlock();
-  return __old;
-}
-
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_xor(volatile __cxx_atomic_lock_impl<_Tp>* __a,
-                           _Tp __pattern, memory_order) {
-  __a->__lock();
-  _Tp __old;
-  __cxx_atomic_assign_volatile(__old, __a->__a_value);
-  __cxx_atomic_assign_volatile(__a->__a_value, _Tp(__old ^ __pattern));
-  __a->__unlock();
-  return __old;
-}
-template <typename _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp __cxx_atomic_fetch_xor(__cxx_atomic_lock_impl<_Tp>* __a,
-                           _Tp __pattern, memory_order) {
-  __a->__lock();
-  _Tp __old = __a->__a_value;
-  __a->__a_value ^= __pattern;
-  __a->__unlock();
-  return __old;
-}
-
-#ifdef __cpp_lib_atomic_is_always_lock_free
-
-template<typename _Tp> struct __cxx_is_always_lock_free {
-    enum { __value = __atomic_always_lock_free(sizeof(_Tp), 0) }; };
-
-#else
-
-template<typename _Tp> struct __cxx_is_always_lock_free { enum { __value = false }; };
-// Implementations must match the C ATOMIC_*_LOCK_FREE macro values.
-template<> struct __cxx_is_always_lock_free<bool> { enum { __value = 2 == ATOMIC_BOOL_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<char> { enum { __value = 2 == ATOMIC_CHAR_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<signed char> { enum { __value = 2 == ATOMIC_CHAR_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<unsigned char> { enum { __value = 2 == ATOMIC_CHAR_LOCK_FREE }; };
-#ifndef _LIBCPP_HAS_NO_CHAR8_T
-template<> struct __cxx_is_always_lock_free<char8_t> { enum { __value = 2 == ATOMIC_CHAR8_T_LOCK_FREE }; };
-#endif
-template<> struct __cxx_is_always_lock_free<char16_t> { enum { __value = 2 == ATOMIC_CHAR16_T_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<char32_t> { enum { __value = 2 == ATOMIC_CHAR32_T_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<wchar_t> { enum { __value = 2 == ATOMIC_WCHAR_T_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<short> { enum { __value = 2 == ATOMIC_SHORT_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<unsigned short> { enum { __value = 2 == ATOMIC_SHORT_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<int> { enum { __value = 2 == ATOMIC_INT_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<unsigned int> { enum { __value = 2 == ATOMIC_INT_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<long> { enum { __value = 2 == ATOMIC_LONG_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<unsigned long> { enum { __value = 2 == ATOMIC_LONG_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<long long> { enum { __value = 2 == ATOMIC_LLONG_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<unsigned long long> { enum { __value = 2 == ATOMIC_LLONG_LOCK_FREE }; };
-template<typename _Tp> struct __cxx_is_always_lock_free<_Tp*> { enum { __value = 2 == ATOMIC_POINTER_LOCK_FREE }; };
-template<> struct __cxx_is_always_lock_free<std::nullptr_t> { enum { __value = 2 == ATOMIC_POINTER_LOCK_FREE }; };
-
-#endif //__cpp_lib_atomic_is_always_lock_free
-
-template <typename _Tp,
-          typename _Base = typename conditional<__cxx_is_always_lock_free<_Tp>::__value,
-                                                __cxx_atomic_base_impl<_Tp>,
-                                                __cxx_atomic_lock_impl<_Tp> >::type>
-#else
-template <typename _Tp,
-          typename _Base = __cxx_atomic_base_impl<_Tp> >
-#endif //_LIBCPP_ATOMIC_ONLY_USE_BUILTINS
-struct __cxx_atomic_impl : public _Base {
-
-#if _GNUC_VER >= 501
-    static_assert(is_trivially_copyable<_Tp>::value,
-      "std::atomic<Tp> requires that 'Tp' be a trivially copyable type");
-#endif
-
-  _LIBCPP_INLINE_VISIBILITY __cxx_atomic_impl() _NOEXCEPT _LIBCPP_DEFAULT
-  _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR explicit __cxx_atomic_impl(_Tp value) _NOEXCEPT
-    : _Base(value) {}
-};
-
-#ifdef __linux__
-    using __cxx_contention_t = int32_t;
-#else
-    using __cxx_contention_t = int64_t;
-#endif //__linux__
-
-using __cxx_atomic_contention_t = __cxx_atomic_impl<__cxx_contention_t>;
-
-#ifndef _LIBCPP_HAS_NO_PLATFORM_WAIT
-
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile*);
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile*);
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(void const volatile*);
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_wait(void const volatile*, __cxx_contention_t);
-
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile*);
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile*);
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile*);
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_wait(__cxx_atomic_contention_t const volatile*, __cxx_contention_t);
-
-template <class _Atp, class _Fn>
-struct __libcpp_atomic_wait_backoff_impl {
-    _Atp* __a;
-    _Fn __test_fn;
-    _LIBCPP_AVAILABILITY_SYNC
-    _LIBCPP_INLINE_VISIBILITY bool operator()(chrono::nanoseconds __elapsed) const
-    {
-        if(__elapsed > chrono::microseconds(64))
-        {
-            auto const __monitor = __libcpp_atomic_monitor(__a);
-            if(__test_fn())
-                return true;
-            __libcpp_atomic_wait(__a, __monitor);
-        }
-        else if(__elapsed > chrono::microseconds(4))
-            __libcpp_thread_yield();
-        else
-            {} // poll
-        return false;
-    }
-};
-
-template <class _Atp, class _Fn>
-_LIBCPP_AVAILABILITY_SYNC
-_LIBCPP_INLINE_VISIBILITY bool __cxx_atomic_wait(_Atp* __a, _Fn && __test_fn)
-{
-    __libcpp_atomic_wait_backoff_impl<_Atp, typename decay<_Fn>::type> __backoff_fn = {__a, __test_fn};
-    return __libcpp_thread_poll_with_backoff(__test_fn, __backoff_fn);
-}
-
-#else // _LIBCPP_HAS_NO_PLATFORM_WAIT
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp> const volatile*) { }
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp> const volatile*) { }
-template <class _Atp, class _Fn>
-_LIBCPP_INLINE_VISIBILITY bool __cxx_atomic_wait(_Atp*, _Fn && __test_fn)
-{
-    return __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
-}
-
-#endif // _LIBCPP_HAS_NO_PLATFORM_WAIT
-
-template <class _Atp, class _Tp>
-struct __cxx_atomic_wait_test_fn_impl {
-    _Atp* __a;
-    _Tp __val;
-    memory_order __order;
-    _LIBCPP_INLINE_VISIBILITY bool operator()() const
-    {
-        return !__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val);
-    }
-};
-
-template <class _Atp, class _Tp>
-_LIBCPP_AVAILABILITY_SYNC
-_LIBCPP_INLINE_VISIBILITY bool __cxx_atomic_wait(_Atp* __a, _Tp const __val, memory_order __order)
-{
-    __cxx_atomic_wait_test_fn_impl<_Atp, _Tp> __test_fn = {__a, __val, __order};
-    return __cxx_atomic_wait(__a, __test_fn);
-}
-
-// general atomic<T>
-
-template <class _Tp, bool = is_integral<_Tp>::value && !is_same<_Tp, bool>::value>
-struct __atomic_base  // false
-{
-    mutable __cxx_atomic_impl<_Tp> __a_;
-
-#if defined(__cpp_lib_atomic_is_always_lock_free)
-  static _LIBCPP_CONSTEXPR bool is_always_lock_free = __atomic_always_lock_free(sizeof(__a_), 0);
-#endif
-
-    _LIBCPP_INLINE_VISIBILITY
-    bool is_lock_free() const volatile _NOEXCEPT
-        {return __cxx_atomic_is_lock_free(sizeof(_Tp));}
-    _LIBCPP_INLINE_VISIBILITY
-    bool is_lock_free() const _NOEXCEPT
-        {return static_cast<__atomic_base const volatile*>(this)->is_lock_free();}
-    _LIBCPP_INLINE_VISIBILITY
-    void store(_Tp __d, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT
-      _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m)
-        {__cxx_atomic_store(&__a_, __d, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    void store(_Tp __d, memory_order __m = memory_order_seq_cst) _NOEXCEPT
-      _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m)
-        {__cxx_atomic_store(&__a_, __d, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp load(memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT
-      _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m)
-        {return __cxx_atomic_load(&__a_, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp load(memory_order __m = memory_order_seq_cst) const _NOEXCEPT
-      _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m)
-        {return __cxx_atomic_load(&__a_, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    operator _Tp() const volatile _NOEXCEPT {return load();}
-    _LIBCPP_INLINE_VISIBILITY
-    operator _Tp() const _NOEXCEPT          {return load();}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT
-        {return __cxx_atomic_exchange(&__a_, __d, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) _NOEXCEPT
-        {return __cxx_atomic_exchange(&__a_, __d, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    bool compare_exchange_weak(_Tp& __e, _Tp __d,
-                               memory_order __s, memory_order __f) volatile _NOEXCEPT
-      _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f)
-        {return __cxx_atomic_compare_exchange_weak(&__a_, &__e, __d, __s, __f);}
-    _LIBCPP_INLINE_VISIBILITY
-    bool compare_exchange_weak(_Tp& __e, _Tp __d,
-                               memory_order __s, memory_order __f) _NOEXCEPT
-      _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f)
-        {return __cxx_atomic_compare_exchange_weak(&__a_, &__e, __d, __s, __f);}
-    _LIBCPP_INLINE_VISIBILITY
-    bool compare_exchange_strong(_Tp& __e, _Tp __d,
-                                 memory_order __s, memory_order __f) volatile _NOEXCEPT
-      _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f)
-        {return __cxx_atomic_compare_exchange_strong(&__a_, &__e, __d, __s, __f);}
-    _LIBCPP_INLINE_VISIBILITY
-    bool compare_exchange_strong(_Tp& __e, _Tp __d,
-                                 memory_order __s, memory_order __f) _NOEXCEPT
-      _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f)
-        {return __cxx_atomic_compare_exchange_strong(&__a_, &__e, __d, __s, __f);}
-    _LIBCPP_INLINE_VISIBILITY
-    bool compare_exchange_weak(_Tp& __e, _Tp __d,
-                              memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT
-        {return __cxx_atomic_compare_exchange_weak(&__a_, &__e, __d, __m, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    bool compare_exchange_weak(_Tp& __e, _Tp __d,
-                               memory_order __m = memory_order_seq_cst) _NOEXCEPT
-        {return __cxx_atomic_compare_exchange_weak(&__a_, &__e, __d, __m, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    bool compare_exchange_strong(_Tp& __e, _Tp __d,
-                              memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT
-        {return __cxx_atomic_compare_exchange_strong(&__a_, &__e, __d, __m, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    bool compare_exchange_strong(_Tp& __e, _Tp __d,
-                                 memory_order __m = memory_order_seq_cst) _NOEXCEPT
-        {return __cxx_atomic_compare_exchange_strong(&__a_, &__e, __d, __m, __m);}
-
-    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT
-        {__cxx_atomic_wait(&__a_, __v, __m);}
-    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT
-        {__cxx_atomic_wait(&__a_, __v, __m);}
-    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void notify_one() volatile _NOEXCEPT
-        {__cxx_atomic_notify_one(&__a_);}
-    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void notify_one() _NOEXCEPT
-        {__cxx_atomic_notify_one(&__a_);}
-    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void notify_all() volatile _NOEXCEPT
-        {__cxx_atomic_notify_all(&__a_);}
-    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void notify_all() _NOEXCEPT
-        {__cxx_atomic_notify_all(&__a_);}
-
-#if _LIBCPP_STD_VER > 17
-    _LIBCPP_INLINE_VISIBILITY constexpr
-    __atomic_base() noexcept(is_nothrow_default_constructible_v<_Tp>) : __a_(_Tp()) {}
-#else
-    _LIBCPP_INLINE_VISIBILITY
-    __atomic_base() _NOEXCEPT _LIBCPP_DEFAULT
-#endif
-
-    _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
-    __atomic_base(_Tp __d) _NOEXCEPT : __a_(__d) {}
-
-#ifndef _LIBCPP_CXX03_LANG
-    __atomic_base(const __atomic_base&) = delete;
-#else
-private:
-    _LIBCPP_INLINE_VISIBILITY
-    __atomic_base(const __atomic_base&);
-#endif
-};
-
-#if defined(__cpp_lib_atomic_is_always_lock_free)
-template <class _Tp, bool __b>
-_LIBCPP_CONSTEXPR bool __atomic_base<_Tp, __b>::is_always_lock_free;
-#endif
-
-// atomic<Integral>
-
-template <class _Tp>
-struct __atomic_base<_Tp, true>
-    : public __atomic_base<_Tp, false>
-{
-    typedef __atomic_base<_Tp, false> __base;
-
-    _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-    __atomic_base() _NOEXCEPT _LIBCPP_DEFAULT
-
-    _LIBCPP_INLINE_VISIBILITY
-    _LIBCPP_CONSTEXPR __atomic_base(_Tp __d) _NOEXCEPT : __base(__d) {}
-
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT
-        {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT
-        {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT
-        {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT
-        {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT
-        {return __cxx_atomic_fetch_and(&this->__a_, __op, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT
-        {return __cxx_atomic_fetch_and(&this->__a_, __op, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT
-        {return __cxx_atomic_fetch_or(&this->__a_, __op, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT
-        {return __cxx_atomic_fetch_or(&this->__a_, __op, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT
-        {return __cxx_atomic_fetch_xor(&this->__a_, __op, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT
-        {return __cxx_atomic_fetch_xor(&this->__a_, __op, __m);}
-
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator++(int) volatile _NOEXCEPT      {return fetch_add(_Tp(1));}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator++(int) _NOEXCEPT               {return fetch_add(_Tp(1));}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator--(int) volatile _NOEXCEPT      {return fetch_sub(_Tp(1));}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator--(int) _NOEXCEPT               {return fetch_sub(_Tp(1));}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator++() volatile _NOEXCEPT         {return fetch_add(_Tp(1)) + _Tp(1);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator++() _NOEXCEPT                  {return fetch_add(_Tp(1)) + _Tp(1);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator--() volatile _NOEXCEPT         {return fetch_sub(_Tp(1)) - _Tp(1);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator--() _NOEXCEPT                  {return fetch_sub(_Tp(1)) - _Tp(1);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator+=(_Tp __op) volatile _NOEXCEPT {return fetch_add(__op) + __op;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator+=(_Tp __op) _NOEXCEPT          {return fetch_add(__op) + __op;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator-=(_Tp __op) volatile _NOEXCEPT {return fetch_sub(__op) - __op;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator-=(_Tp __op) _NOEXCEPT          {return fetch_sub(__op) - __op;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator&=(_Tp __op) volatile _NOEXCEPT {return fetch_and(__op) & __op;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator&=(_Tp __op) _NOEXCEPT          {return fetch_and(__op) & __op;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator|=(_Tp __op) volatile _NOEXCEPT {return fetch_or(__op) | __op;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator|=(_Tp __op) _NOEXCEPT          {return fetch_or(__op) | __op;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator^=(_Tp __op) volatile _NOEXCEPT {return fetch_xor(__op) ^ __op;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator^=(_Tp __op) _NOEXCEPT          {return fetch_xor(__op) ^ __op;}
-};
-
-// atomic<T>
-
-template <class _Tp>
-struct atomic
-    : public __atomic_base<_Tp>
-{
-    typedef __atomic_base<_Tp> __base;
-    typedef _Tp value_type;
-    typedef value_type difference_type;
-
-#if _LIBCPP_STD_VER > 17
-    _LIBCPP_INLINE_VISIBILITY
-    atomic() = default;
-#else
-    _LIBCPP_INLINE_VISIBILITY
-    atomic() _NOEXCEPT _LIBCPP_DEFAULT
-#endif
-
-    _LIBCPP_INLINE_VISIBILITY
-    _LIBCPP_CONSTEXPR atomic(_Tp __d) _NOEXCEPT : __base(__d) {}
-
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator=(_Tp __d) volatile _NOEXCEPT
-        {__base::store(__d); return __d;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp operator=(_Tp __d) _NOEXCEPT
-        {__base::store(__d); return __d;}
-
-    atomic& operator=(const atomic&) = delete;
-    atomic& operator=(const atomic&) volatile = delete;
-};
-
-// atomic<T*>
-
-template <class _Tp>
-struct atomic<_Tp*>
-    : public __atomic_base<_Tp*>
-{
-    typedef __atomic_base<_Tp*> __base;
-    typedef _Tp* value_type;
-    typedef ptrdiff_t difference_type;
-
-    _LIBCPP_INLINE_VISIBILITY
-    atomic() _NOEXCEPT _LIBCPP_DEFAULT
-
-    _LIBCPP_INLINE_VISIBILITY
-    _LIBCPP_CONSTEXPR atomic(_Tp* __d) _NOEXCEPT : __base(__d) {}
-
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator=(_Tp* __d) volatile _NOEXCEPT
-        {__base::store(__d); return __d;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator=(_Tp* __d) _NOEXCEPT
-        {__base::store(__d); return __d;}
-
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst)
-                                                                        volatile _NOEXCEPT
-        {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT
-        {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst)
-                                                                        volatile _NOEXCEPT
-        {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT
-        {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);}
-
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator++(int) volatile _NOEXCEPT            {return fetch_add(1);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator++(int) _NOEXCEPT                     {return fetch_add(1);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator--(int) volatile _NOEXCEPT            {return fetch_sub(1);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator--(int) _NOEXCEPT                     {return fetch_sub(1);}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator++() volatile _NOEXCEPT               {return fetch_add(1) + 1;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator++() _NOEXCEPT                        {return fetch_add(1) + 1;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator--() volatile _NOEXCEPT               {return fetch_sub(1) - 1;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator--() _NOEXCEPT                        {return fetch_sub(1) - 1;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator+=(ptrdiff_t __op) volatile _NOEXCEPT {return fetch_add(__op) + __op;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator+=(ptrdiff_t __op) _NOEXCEPT          {return fetch_add(__op) + __op;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator-=(ptrdiff_t __op) volatile _NOEXCEPT {return fetch_sub(__op) - __op;}
-    _LIBCPP_INLINE_VISIBILITY
-    _Tp* operator-=(ptrdiff_t __op) _NOEXCEPT          {return fetch_sub(__op) - __op;}
-
-    atomic& operator=(const atomic&) = delete;
-    atomic& operator=(const atomic&) volatile = delete;
-};
-
-// atomic_is_lock_free
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool
-atomic_is_lock_free(const volatile atomic<_Tp>* __o) _NOEXCEPT
-{
-    return __o->is_lock_free();
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool
-atomic_is_lock_free(const atomic<_Tp>* __o) _NOEXCEPT
-{
-    return __o->is_lock_free();
-}
-
-// atomic_init
-
-template <class _Tp>
-_LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_INLINE_VISIBILITY
-void
-atomic_init(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d) _NOEXCEPT
-{
-    __cxx_atomic_init(&__o->__a_, __d);
-}
-
-template <class _Tp>
-_LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_INLINE_VISIBILITY
-void
-atomic_init(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d) _NOEXCEPT
-{
-    __cxx_atomic_init(&__o->__a_, __d);
-}
-
-// atomic_store
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void
-atomic_store(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d) _NOEXCEPT
-{
-    __o->store(__d);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void
-atomic_store(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d) _NOEXCEPT
-{
-    __o->store(__d);
-}
-
-// atomic_store_explicit
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void
-atomic_store_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d, memory_order __m) _NOEXCEPT
-  _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m)
-{
-    __o->store(__d, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-void
-atomic_store_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d, memory_order __m) _NOEXCEPT
-  _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m)
-{
-    __o->store(__d, __m);
-}
-
-// atomic_load
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp
-atomic_load(const volatile atomic<_Tp>* __o) _NOEXCEPT
-{
-    return __o->load();
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp
-atomic_load(const atomic<_Tp>* __o) _NOEXCEPT
-{
-    return __o->load();
-}
-
-// atomic_load_explicit
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp
-atomic_load_explicit(const volatile atomic<_Tp>* __o, memory_order __m) _NOEXCEPT
-  _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m)
-{
-    return __o->load(__m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp
-atomic_load_explicit(const atomic<_Tp>* __o, memory_order __m) _NOEXCEPT
-  _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m)
-{
-    return __o->load(__m);
-}
-
-// atomic_exchange
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp
-atomic_exchange(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d) _NOEXCEPT
-{
-    return __o->exchange(__d);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp
-atomic_exchange(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d) _NOEXCEPT
-{
-    return __o->exchange(__d);
-}
-
-// atomic_exchange_explicit
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp
-atomic_exchange_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d, memory_order __m) _NOEXCEPT
-{
-    return __o->exchange(__d, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp
-atomic_exchange_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d, memory_order __m) _NOEXCEPT
-{
-    return __o->exchange(__d, __m);
-}
-
-// atomic_compare_exchange_weak
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool
-atomic_compare_exchange_weak(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e, typename atomic<_Tp>::value_type __d) _NOEXCEPT
-{
-    return __o->compare_exchange_weak(*__e, __d);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool
-atomic_compare_exchange_weak(atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e, typename atomic<_Tp>::value_type __d) _NOEXCEPT
-{
-    return __o->compare_exchange_weak(*__e, __d);
-}
-
-// atomic_compare_exchange_strong
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool
-atomic_compare_exchange_strong(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e, typename atomic<_Tp>::value_type __d) _NOEXCEPT
-{
-    return __o->compare_exchange_strong(*__e, __d);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool
-atomic_compare_exchange_strong(atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e, typename atomic<_Tp>::value_type __d) _NOEXCEPT
-{
-    return __o->compare_exchange_strong(*__e, __d);
-}
-
-// atomic_compare_exchange_weak_explicit
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool
-atomic_compare_exchange_weak_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e,
-                                      typename atomic<_Tp>::value_type __d,
-                                      memory_order __s, memory_order __f) _NOEXCEPT
-  _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f)
-{
-    return __o->compare_exchange_weak(*__e, __d, __s, __f);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool
-atomic_compare_exchange_weak_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e, typename atomic<_Tp>::value_type __d,
-                                      memory_order __s, memory_order __f) _NOEXCEPT
-  _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f)
-{
-    return __o->compare_exchange_weak(*__e, __d, __s, __f);
-}
-
-// atomic_compare_exchange_strong_explicit
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool
-atomic_compare_exchange_strong_explicit(volatile atomic<_Tp>* __o,
-                                        typename atomic<_Tp>::value_type* __e, typename atomic<_Tp>::value_type __d,
-                                        memory_order __s, memory_order __f) _NOEXCEPT
-  _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f)
-{
-    return __o->compare_exchange_strong(*__e, __d, __s, __f);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-bool
-atomic_compare_exchange_strong_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e,
-                                        typename atomic<_Tp>::value_type __d,
-                                        memory_order __s, memory_order __f) _NOEXCEPT
-  _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f)
-{
-    return __o->compare_exchange_strong(*__e, __d, __s, __f);
-}
-
-// atomic_wait
-
-template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-void atomic_wait(const volatile atomic<_Tp>* __o,
-                 typename atomic<_Tp>::value_type __v) _NOEXCEPT
-{
-    return __o->wait(__v);
-}
-
-template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-void atomic_wait(const atomic<_Tp>* __o,
-                 typename atomic<_Tp>::value_type __v) _NOEXCEPT
-{
-    return __o->wait(__v);
-}
-
-// atomic_wait_explicit
-
-template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-void atomic_wait_explicit(const volatile atomic<_Tp>* __o,
-                          typename atomic<_Tp>::value_type __v,
-                          memory_order __m) _NOEXCEPT
-  _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m)
-{
-    return __o->wait(__v, __m);
-}
-
-template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-void atomic_wait_explicit(const atomic<_Tp>* __o,
-                          typename atomic<_Tp>::value_type __v,
-                          memory_order __m) _NOEXCEPT
-  _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m)
-{
-    return __o->wait(__v, __m);
-}
-
-// atomic_notify_one
-
-template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-void atomic_notify_one(volatile atomic<_Tp>* __o) _NOEXCEPT
-{
-    __o->notify_one();
-}
-template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-void atomic_notify_one(atomic<_Tp>* __o) _NOEXCEPT
-{
-    __o->notify_one();
-}
-
-// atomic_notify_one
-
-template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-void atomic_notify_all(volatile atomic<_Tp>* __o) _NOEXCEPT
-{
-    __o->notify_all();
-}
-template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-void atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT
-{
-    __o->notify_all();
-}
-
-// atomic_fetch_add
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
-    _Tp
->::type
-atomic_fetch_add(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT
-{
-    return __o->fetch_add(__op);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
-    _Tp
->::type
-atomic_fetch_add(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT
-{
-    return __o->fetch_add(__op);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_add(volatile atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op) _NOEXCEPT
-{
-    return __o->fetch_add(__op);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_add(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op) _NOEXCEPT
-{
-    return __o->fetch_add(__op);
-}
-
-// atomic_fetch_add_explicit
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
-    _Tp
->::type
-atomic_fetch_add_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_add(__op, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
-    _Tp
->::type
-atomic_fetch_add_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_add(__op, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_add_explicit(volatile atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_add(__op, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_add_explicit(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_add(__op, __m);
-}
-
-// atomic_fetch_sub
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
-    _Tp
->::type
-atomic_fetch_sub(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT
-{
-    return __o->fetch_sub(__op);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
-    _Tp
->::type
-atomic_fetch_sub(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT
-{
-    return __o->fetch_sub(__op);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_sub(volatile atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op) _NOEXCEPT
-{
-    return __o->fetch_sub(__op);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_sub(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op) _NOEXCEPT
-{
-    return __o->fetch_sub(__op);
-}
-
-// atomic_fetch_sub_explicit
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
-    _Tp
->::type
-atomic_fetch_sub_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_sub(__op, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
-    _Tp
->::type
-atomic_fetch_sub_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_sub(__op, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_sub_explicit(volatile atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_sub(__op, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_sub_explicit(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_sub(__op, __m);
-}
-
-// atomic_fetch_and
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value,
-    _Tp
->::type
-atomic_fetch_and(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op) _NOEXCEPT
-{
-    return __o->fetch_and(__op);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value,
-    _Tp
->::type
-atomic_fetch_and(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op) _NOEXCEPT
-{
-    return __o->fetch_and(__op);
-}
-
-// atomic_fetch_and_explicit
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value,
-    _Tp
->::type
-atomic_fetch_and_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_and(__op, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value,
-    _Tp
->::type
-atomic_fetch_and_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_and(__op, __m);
-}
-
-// atomic_fetch_or
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value,
-    _Tp
->::type
-atomic_fetch_or(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op) _NOEXCEPT
-{
-    return __o->fetch_or(__op);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value,
-    _Tp
->::type
-atomic_fetch_or(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op) _NOEXCEPT
-{
-    return __o->fetch_or(__op);
-}
-
-// atomic_fetch_or_explicit
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value,
-    _Tp
->::type
-atomic_fetch_or_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_or(__op, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value,
-    _Tp
->::type
-atomic_fetch_or_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_or(__op, __m);
-}
-
-// atomic_fetch_xor
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value,
-    _Tp
->::type
-atomic_fetch_xor(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op) _NOEXCEPT
-{
-    return __o->fetch_xor(__op);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value,
-    _Tp
->::type
-atomic_fetch_xor(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op) _NOEXCEPT
-{
-    return __o->fetch_xor(__op);
-}
-
-// atomic_fetch_xor_explicit
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value,
-    _Tp
->::type
-atomic_fetch_xor_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_xor(__op, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
-    is_integral<_Tp>::value && !is_same<_Tp, bool>::value,
-    _Tp
->::type
-atomic_fetch_xor_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op, memory_order __m) _NOEXCEPT
-{
-    return __o->fetch_xor(__op, __m);
-}
-
-// flag type and operations
-
-typedef struct atomic_flag
-{
-    __cxx_atomic_impl<_LIBCPP_ATOMIC_FLAG_TYPE> __a_;
-
-    _LIBCPP_INLINE_VISIBILITY
-    bool test(memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT
-        {return _LIBCPP_ATOMIC_FLAG_TYPE(true) == __cxx_atomic_load(&__a_, __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    bool test(memory_order __m = memory_order_seq_cst) const _NOEXCEPT
-        {return _LIBCPP_ATOMIC_FLAG_TYPE(true) == __cxx_atomic_load(&__a_, __m);}
-
-    _LIBCPP_INLINE_VISIBILITY
-    bool test_and_set(memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT
-        {return __cxx_atomic_exchange(&__a_, _LIBCPP_ATOMIC_FLAG_TYPE(true), __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    bool test_and_set(memory_order __m = memory_order_seq_cst) _NOEXCEPT
-        {return __cxx_atomic_exchange(&__a_, _LIBCPP_ATOMIC_FLAG_TYPE(true), __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    void clear(memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT
-        {__cxx_atomic_store(&__a_, _LIBCPP_ATOMIC_FLAG_TYPE(false), __m);}
-    _LIBCPP_INLINE_VISIBILITY
-    void clear(memory_order __m = memory_order_seq_cst) _NOEXCEPT
-        {__cxx_atomic_store(&__a_, _LIBCPP_ATOMIC_FLAG_TYPE(false), __m);}
-
-    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-    void wait(bool __v, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT
-        {__cxx_atomic_wait(&__a_, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m);}
-    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-    void wait(bool __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT
-        {__cxx_atomic_wait(&__a_, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m);}
-    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-    void notify_one() volatile _NOEXCEPT
-        {__cxx_atomic_notify_one(&__a_);}
-    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-    void notify_one() _NOEXCEPT
-        {__cxx_atomic_notify_one(&__a_);}
-    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-    void notify_all() volatile _NOEXCEPT
-        {__cxx_atomic_notify_all(&__a_);}
-    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-    void notify_all() _NOEXCEPT
-        {__cxx_atomic_notify_all(&__a_);}
-
-#if _LIBCPP_STD_VER > 17
-    _LIBCPP_INLINE_VISIBILITY constexpr
-    atomic_flag() _NOEXCEPT : __a_(false) {}
-#else
-    _LIBCPP_INLINE_VISIBILITY
-    atomic_flag() _NOEXCEPT _LIBCPP_DEFAULT
-#endif
-
-    _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
-    atomic_flag(bool __b) _NOEXCEPT : __a_(__b) {} // EXTENSION
-
-#ifndef _LIBCPP_CXX03_LANG
-    atomic_flag(const atomic_flag&) = delete;
-    atomic_flag& operator=(const atomic_flag&) = delete;
-    atomic_flag& operator=(const atomic_flag&) volatile = delete;
-#else
-private:
-    _LIBCPP_INLINE_VISIBILITY
-    atomic_flag(const atomic_flag&);
-    _LIBCPP_INLINE_VISIBILITY
-    atomic_flag& operator=(const atomic_flag&);
-    _LIBCPP_INLINE_VISIBILITY
-    atomic_flag& operator=(const atomic_flag&) volatile;
-#endif
-} atomic_flag;
-
-
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-atomic_flag_test(const volatile atomic_flag* __o) _NOEXCEPT
-{
-    return __o->test();
-}
-
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-atomic_flag_test(const atomic_flag* __o) _NOEXCEPT
-{
-    return __o->test();
-}
-
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-atomic_flag_test_explicit(const volatile atomic_flag* __o, memory_order __m) _NOEXCEPT
-{
-    return __o->test(__m);
-}
-
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-atomic_flag_test_explicit(const atomic_flag* __o, memory_order __m) _NOEXCEPT
-{
-    return __o->test(__m);
-}
-
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-atomic_flag_test_and_set(volatile atomic_flag* __o) _NOEXCEPT
-{
-    return __o->test_and_set();
-}
-
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-atomic_flag_test_and_set(atomic_flag* __o) _NOEXCEPT
-{
-    return __o->test_and_set();
-}
-
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-atomic_flag_test_and_set_explicit(volatile atomic_flag* __o, memory_order __m) _NOEXCEPT
-{
-    return __o->test_and_set(__m);
-}
-
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-atomic_flag_test_and_set_explicit(atomic_flag* __o, memory_order __m) _NOEXCEPT
-{
-    return __o->test_and_set(__m);
-}
-
-inline _LIBCPP_INLINE_VISIBILITY
-void
-atomic_flag_clear(volatile atomic_flag* __o) _NOEXCEPT
-{
-    __o->clear();
-}
-
-inline _LIBCPP_INLINE_VISIBILITY
-void
-atomic_flag_clear(atomic_flag* __o) _NOEXCEPT
-{
-    __o->clear();
-}
-
-inline _LIBCPP_INLINE_VISIBILITY
-void
-atomic_flag_clear_explicit(volatile atomic_flag* __o, memory_order __m) _NOEXCEPT
-{
-    __o->clear(__m);
-}
-
-inline _LIBCPP_INLINE_VISIBILITY
-void
-atomic_flag_clear_explicit(atomic_flag* __o, memory_order __m) _NOEXCEPT
-{
-    __o->clear(__m);
-}
-
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_SYNC
-void
-atomic_flag_wait(const volatile atomic_flag* __o, bool __v) _NOEXCEPT
-{
-    __o->wait(__v);
-}
-
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_SYNC
-void
-atomic_flag_wait(const atomic_flag* __o, bool __v) _NOEXCEPT
-{
-    __o->wait(__v);
-}
-
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_SYNC
-void
-atomic_flag_wait_explicit(const volatile atomic_flag* __o,
-                          bool __v, memory_order __m) _NOEXCEPT
-{
-    __o->wait(__v, __m);
-}
-
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_SYNC
-void
-atomic_flag_wait_explicit(const atomic_flag* __o,
-                          bool __v, memory_order __m) _NOEXCEPT
-{
-    __o->wait(__v, __m);
-}
-
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_SYNC
-void
-atomic_flag_notify_one(volatile atomic_flag* __o) _NOEXCEPT
-{
-    __o->notify_one();
-}
-
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_SYNC
-void
-atomic_flag_notify_one(atomic_flag* __o) _NOEXCEPT
-{
-    __o->notify_one();
-}
-
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_SYNC
-void
-atomic_flag_notify_all(volatile atomic_flag* __o) _NOEXCEPT
-{
-    __o->notify_all();
-}
-
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_SYNC
-void
-atomic_flag_notify_all(atomic_flag* __o) _NOEXCEPT
-{
-    __o->notify_all();
-}
-
-// fences
-
-inline _LIBCPP_INLINE_VISIBILITY
-void
-atomic_thread_fence(memory_order __m) _NOEXCEPT
-{
-    __cxx_atomic_thread_fence(__m);
-}
-
-inline _LIBCPP_INLINE_VISIBILITY
-void
-atomic_signal_fence(memory_order __m) _NOEXCEPT
-{
-    __cxx_atomic_signal_fence(__m);
-}
-
-// Atomics for standard typedef types
-
-typedef atomic<bool>               atomic_bool;
-typedef atomic<char>               atomic_char;
-typedef atomic<signed char>        atomic_schar;
-typedef atomic<unsigned char>      atomic_uchar;
-typedef atomic<short>              atomic_short;
-typedef atomic<unsigned short>     atomic_ushort;
-typedef atomic<int>                atomic_int;
-typedef atomic<unsigned int>       atomic_uint;
-typedef atomic<long>               atomic_long;
-typedef atomic<unsigned long>      atomic_ulong;
-typedef atomic<long long>          atomic_llong;
-typedef atomic<unsigned long long> atomic_ullong;
-#ifndef _LIBCPP_HAS_NO_CHAR8_T
-typedef atomic<char8_t>            atomic_char8_t;
-#endif
-typedef atomic<char16_t>           atomic_char16_t;
-typedef atomic<char32_t>           atomic_char32_t;
-typedef atomic<wchar_t>            atomic_wchar_t;
-
-typedef atomic<int_least8_t>   atomic_int_least8_t;
-typedef atomic<uint_least8_t>  atomic_uint_least8_t;
-typedef atomic<int_least16_t>  atomic_int_least16_t;
-typedef atomic<uint_least16_t> atomic_uint_least16_t;
-typedef atomic<int_least32_t>  atomic_int_least32_t;
-typedef atomic<uint_least32_t> atomic_uint_least32_t;
-typedef atomic<int_least64_t>  atomic_int_least64_t;
-typedef atomic<uint_least64_t> atomic_uint_least64_t;
-
-typedef atomic<int_fast8_t>   atomic_int_fast8_t;
-typedef atomic<uint_fast8_t>  atomic_uint_fast8_t;
-typedef atomic<int_fast16_t>  atomic_int_fast16_t;
-typedef atomic<uint_fast16_t> atomic_uint_fast16_t;
-typedef atomic<int_fast32_t>  atomic_int_fast32_t;
-typedef atomic<uint_fast32_t> atomic_uint_fast32_t;
-typedef atomic<int_fast64_t>  atomic_int_fast64_t;
-typedef atomic<uint_fast64_t> atomic_uint_fast64_t;
-
-typedef atomic< int8_t>  atomic_int8_t;
-typedef atomic<uint8_t>  atomic_uint8_t;
-typedef atomic< int16_t> atomic_int16_t;
-typedef atomic<uint16_t> atomic_uint16_t;
-typedef atomic< int32_t> atomic_int32_t;
-typedef atomic<uint32_t> atomic_uint32_t;
-typedef atomic< int64_t> atomic_int64_t;
-typedef atomic<uint64_t> atomic_uint64_t;
-
-typedef atomic<intptr_t>  atomic_intptr_t;
-typedef atomic<uintptr_t> atomic_uintptr_t;
-typedef atomic<size_t>    atomic_size_t;
-typedef atomic<ptrdiff_t> atomic_ptrdiff_t;
-typedef atomic<intmax_t>  atomic_intmax_t;
-typedef atomic<uintmax_t> atomic_uintmax_t;
-
-// atomic_*_lock_free : prefer the contention type most highly, then the largest lock-free type
-
-#ifdef __cpp_lib_atomic_is_always_lock_free
-# define _LIBCPP_CONTENTION_LOCK_FREE __atomic_always_lock_free(sizeof(__cxx_contention_t), 0)
-#else
-# define _LIBCPP_CONTENTION_LOCK_FREE false
-#endif
-
-#if ATOMIC_LLONG_LOCK_FREE == 2
-typedef conditional<_LIBCPP_CONTENTION_LOCK_FREE, __cxx_contention_t, long long>::type          __libcpp_signed_lock_free;
-typedef conditional<_LIBCPP_CONTENTION_LOCK_FREE, __cxx_contention_t, unsigned long long>::type __libcpp_unsigned_lock_free;
-#elif ATOMIC_INT_LOCK_FREE == 2
-typedef conditional<_LIBCPP_CONTENTION_LOCK_FREE, __cxx_contention_t, int>::type                __libcpp_signed_lock_free;
-typedef conditional<_LIBCPP_CONTENTION_LOCK_FREE, __cxx_contention_t, unsigned int>::type       __libcpp_unsigned_lock_free;
-#elif ATOMIC_SHORT_LOCK_FREE == 2
-typedef conditional<_LIBCPP_CONTENTION_LOCK_FREE, __cxx_contention_t, short>::type              __libcpp_signed_lock_free;
-typedef conditional<_LIBCPP_CONTENTION_LOCK_FREE, __cxx_contention_t, unsigned short>::type     __libcpp_unsigned_lock_free;
-#elif ATOMIC_CHAR_LOCK_FREE == 2
-typedef conditional<_LIBCPP_CONTENTION_LOCK_FREE, __cxx_contention_t, char>::type               __libcpp_signed_lock_free;
-typedef conditional<_LIBCPP_CONTENTION_LOCK_FREE, __cxx_contention_t, unsigned char>::type      __libcpp_unsigned_lock_free;
-#else
-    // No signed/unsigned lock-free types
-#endif
-
-typedef atomic<__libcpp_signed_lock_free> atomic_signed_lock_free;
-typedef atomic<__libcpp_unsigned_lock_free> atomic_unsigned_lock_free;
-
-#define ATOMIC_FLAG_INIT {false}
-#define ATOMIC_VAR_INIT(__v) {__v}
-
-_LIBCPP_END_NAMESPACE_STD
-
-#endif // _LIBCPP_ATOMIC
diff --git a/sdk/include/locks.hh b/sdk/include/locks.hh
index 9757a98..a89d4b7 100644
--- a/sdk/include/locks.hh
+++ b/sdk/include/locks.hh
@@ -5,7 +5,6 @@
 #include <debug.hh>
 #include <errno.h>
 #include <futex.h>
-#include <semaphore.h>
 #include <thread.h>
 
 __clang_ignored_warning_push("-Watomic-alignment")
@@ -288,6 +287,14 @@
 		wrappedLock->lock();
 	}
 
+	/// Constructor, attempts to acquire the lock with a timeout.
+	[[nodiscard]] explicit LockGuard(Lock &lock, Timeout *timeout) requires(
+	  TryLockable<Lock>)
+	  : wrappedLock(&lock), isOwned(false)
+	{
+		try_lock(timeout);
+	}
+
 	/// Move constructor, transfers ownership of the lock.
 	[[nodiscard]] explicit LockGuard(LockGuard &&guard)
 	  : wrappedLock(guard.wrappedLock), isOwned(guard.isOwned)
@@ -337,5 +344,26 @@
 			wrappedLock->unlock();
 		}
 	}
+
+	/**
+	 * Conversion to bool.  Returns true if this guard owns the lock, false
+	 * otherwise.  This allows lock guards to be used with a timeout in
+	 * conditional blocks, such as:
+	 *
+	 * ```
+	 * if (LockGuard g{lock, timeout})
+	 * {
+	 *    // Run this code if we acquired the lock, releasing the lock at the end.
+	 * }
+	 * else
+	 * {
+	 *    // Run this code if we did not acquire the lock.
+	 * }
+	 * ```
+	 */
+	operator bool()
+	{
+		return isOwned;
+	}
 };
 __clang_ignored_warning_pop()
diff --git a/sdk/include/multiwaiter.h b/sdk/include/multiwaiter.h
index 7830b52..418228c 100644
--- a/sdk/include/multiwaiter.h
+++ b/sdk/include/multiwaiter.h
@@ -38,20 +38,12 @@
  */
 enum EventWaiterKind
 {
-	/// Event source is a message queue.
-	EventWaiterQueue,
 	/// Event source is an event channel.
 	EventWaiterEventChannel,
 	/// Event source is a futex.
 	EventWaiterFutex
 };
 
-enum [[clang::flag_enum]] EventWaiterQueueFlags{
-  /// Notify when the queue is not full.
-  EventWaiterQueueSendReady = (1 << 0),
-  /// Notify when the queue is not empty.
-  EventWaiterQueueReceiveReady = (1 << 1)};
-
 enum [[clang::flag_enum]] EventWaiterEventChannelFlags{
   /// Automatically clear the bits we waited on.
   EventWaiterEventChannelClearOnExit = (1 << 24),
@@ -78,9 +70,6 @@
 	 * Event-specific configuration.  This field is modified during the wait
 	 * call.  The interpretation of this depends on `kind`:
 	 *
-	 * - `EventWaiterQueue`: this contains a bitmap of `EventWaiterQueueFlags`
-	 *   values indicating the events to watch for.  On return, the bits for
-	 *   the values that have been set will be stored.
 	 * - `EventWaiterEventChannel`: The low 24 bits contain the bits to
 	 *   monitor, the top bit indicates whether this event is triggered if all
 	 *   of the bits are set (true) or some of them (false).  On return, this
diff --git a/sdk/include/queue.h b/sdk/include/queue.h
index 1fa2933..162f91c 100644
--- a/sdk/include/queue.h
+++ b/sdk/include/queue.h
@@ -1,82 +1,239 @@
 // Copyright Microsoft and CHERIoT Contributors.
 // SPDX-License-Identifier: MIT
+/**
+ * This file contains the interface for a simple message queue.  This is split
+ * into two layers.  The core functionality is implemented as a shared library.
+ * This allows message queues to be used for inter-thread communication without
+ * requiring cross-compartment calls except in the blocking cases (reading from
+ * an empty queue, writing to a full queue).
+ *
+ * These library interfaces are then wrapped in a compartment, which provides
+ * sealed capabilities that authorise sending or receiving messages via a
+ * queue.  The compartment interface can be used between mutually distrusting
+ * compartments.  Neither endpoint can corrupt the queue state, though there is
+ * of course no guarantee that the sender will send valid data.
+ *
+ * Both sets of queues support multiple senders and multiple receivers.  This
+ * does *not* guarantee priority propagation and so a high-priority thread
+ * sending a message may be starved by a low-priority thread that attempts to
+ * send a message over the same queue and is preempted by a medium-priority
+ * thread.
+ */
 
 #pragma once
 
-#include <cdefs.h>
-#include <compartment.h>
+#include "cdefs.h"
+#include <multiwaiter.h>
+#include <stdatomic.h>
 #include <stddef.h>
 #include <stdint.h>
+#include <stdlib.h>
 #include <timeout.h>
 
-/*
- * Queue APIs.
- * Queues are FIFO structures with fixed-size messages. This queue
- * implementation allows for multiple senders and receivers, and the same queue
- * handle is used for both sending and receiving. Messages are always copied to
- * and from the callers instead of taking pointers. Each queue has a maximum
- * number of messages it can store, and callers can specify a timeout that a
- * send and recv operation can be blocked for if the queue is
- * full or empty.
+/**
+ * A handle to a queue endpoint.
+ *
+ * Dropping permissions can make this a receive-only or a send-only handle.
  */
+struct QueueHandle
+{
+	/**
+	 * The size of one element in this queue.
+	 */
+	size_t elementSize;
+	/**
+	 * The size of the queue.
+	 */
+	size_t queueSize;
+	/**
+	 * The buffer used for storing queue elements.
+	 */
+	void *buffer;
+	/**
+	 * The producer counter.
+	 */
+	_Atomic(uint32_t) *producer;
+	/**
+	 * The consumer counter.
+	 */
+	_Atomic(uint32_t) *consumer;
+};
 
 __BEGIN_DECLS
 
 /**
- * Create a new message queue.
+ * Allocates space for a queue using `heapCapability` and stores a handle to it
+ * via `outQueue`.  The underlying allocation (which is necessary to free the
+ * queue) is returned via `outAllocation`.
  *
- * @param outQue storage for the returned sealed queue handle. The storage
- * capability must have store and store capability permissions and at least
- * enough space for sizeof(void *). It can be a local capability.
- * @param itemSize size of each message
- * @param maxNItems max number of messages
- *
- * @return error code. 0 on success
+ * The queue is has space for `elementCount` entries.  Each entry is a fixed
+ * size, `elementSize` bytes.
  */
-int __cheri_compartment("sched") queue_create(Timeout           *timeout,
-                                              struct SObjStruct *heapCapability,
-                                              void             **outQue,
-                                              size_t             itemSize,
-                                              size_t             maxNItems);
+int __cheri_libcall queue_create(Timeout            *timeout,
+                                 struct SObjStruct  *heapCapability,
+                                 struct QueueHandle *outQueue,
+                                 void              **outAllocation,
+                                 size_t              elementSize,
+                                 size_t              elementCount);
 
 /**
- * Delete this queue. All blockers will be woken up.
+ * Convert a queue handle returned from `queue_create` into one that can be
+ * used *only* for receiving.
  *
- * @param que sealed queue handle
- *
- * @return error code. 0 on success
+ * Note: This is primarily defence in depth.  A malicious holder of this queue
+ * handle can still set the consumer counter to invalid values.
  */
-int __cheri_compartment("sched")
-  queue_delete(struct SObjStruct *heapCapability, void *que);
+struct QueueHandle __cheri_libcall
+queue_make_receive_handle(struct QueueHandle handle);
 
 /**
- * Send a message to the queue, blocking for at most waitTicks of timer
- * ticks. The message size does not need to be provided, since we get
- * the size internally.
+ * Convert a queue handle returned from `queue_create` into one that can be
+ * used *only* for sending.
  *
- * @param que sealed queue handle
- * @param src the message to be sent
- *
- * @return error code. 0 on success
+ * Note: This is primarily defence in depth.  A malicious holder of this queue
+ * handle can still set the producer counter to invalid values and overwrite
+ * arbitrary queue locations.
  */
-int __cheri_compartment("sched")
-  queue_send(Timeout *timeout, void *que, const void *src);
+struct QueueHandle __cheri_libcall
+queue_make_send_handle(struct QueueHandle handle);
 
 /**
- * Same as queue_send, just in the other direction.
+ * Send a message to the queue specified by `handle`.  This expects to be able
+ * to copy the number of bytes specified by `elementSize` when the queue was
+ * created from `src`.
+ *
+ * Returns 0 on success.  On failure, returns `-ETIMEOUT` if the timeout was
+ * exhausted, `-EINVAL` on invalid arguments.
+ *
+ * This expected to be called with a valid queue handle.  It does not validate
+ * that this is correct.  It uses `safe_memcpy` and so will check the buffer.
  */
-int __cheri_compartment("sched")
-  queue_recv(Timeout *timeout, void *que, void *dst);
+int __cheri_libcall queue_send(Timeout            *timeout,
+                               struct QueueHandle *handle,
+                               const void         *src);
 
 /**
- * Check the number of remaining items in the queue.
+ * Receive a message over a queue specified by `handle`.  This expects to be
+ * able to copy the number of bytes specified by `elementSize`.  The message is
+ * copied to `dst`, which must have sufficient permissions and space to hold
+ * the message.
  *
- * @param que sealed queue handle
- * @param items buffer of the return value
- *
- * @return error code. 0 on success
+ * Returns 0 on success, `-ETIMEOUT` if the timeout was exhausted, `-EINVAL` on
+ * invalid arguments.
  */
-int __cheri_compartment("sched")
-  queue_items_remaining(void *que, size_t *items);
+int __cheri_libcall queue_receive(Timeout            *timeout,
+                                  struct QueueHandle *handle,
+                                  void               *dst);
+
+/**
+ * Returns the number of items in the queue specified by `handle` via `items`.
+ *
+ * Returns 0 on success.  This has no failure mechanisms, but is intended to
+ * have the same interface as the version that operates on a sealed queue
+ * handle.
+ *
+ * Note: This interface is inherently racy.  The number of items in the queue
+ * may change in between the return of this function and the caller acting on
+ * the result.
+ */
+int __cheri_libcall queue_items_remaining(struct QueueHandle *handle,
+                                          size_t             *items);
+
+/**
+ * Allocate a new message queue that is managed by the message queue
+ * compartment.  This is returned as two sealed pointers to send and receive
+ * ends of the queue.
+ */
+int __cheri_compartment("message_queue")
+  queue_create_sealed(Timeout            *timeout,
+                      struct SObjStruct  *heapCapability,
+                      struct SObjStruct **outQueueSend,
+                      struct SObjStruct **outQueReceive,
+                      size_t              elementSize,
+                      size_t              elementCount);
+
+/**
+ * Destroy a queue using a sealed queue endpoint handle.  The queue is not
+ * actually freed until *both* endpoints are destroyed, which means that you
+ * can safely call this from the sending end without the receiving end losing
+ * access to messages held in the queue.
+ */
+int __cheri_compartment("message_queue")
+  queue_destroy_sealed(Timeout           *timeout,
+                       struct SObjStruct *heapCapability,
+                       struct SObjStruct *queueHandle);
+
+/**
+ * Send a message via a sealed queue endpoint.  This behaves in the same way as
+ * `queue_send`, except that it will return `-EINVAL` if the endpoint is not a
+ * valid sending endpoint and may return `-ECOMPARTMENTFAIL` if the queue is
+ * destroyed during the call.
+ */
+int __cheri_compartment("message_queue")
+  queue_send_sealed(Timeout           *timeout,
+                    struct SObjStruct *handle,
+                    const void        *src);
+
+/**
+ * Receive a message via a sealed queue endpoint.  This behaves in the same way
+ * as `queue_receive`, except that it will return `-EINVAL` if the endpoint is
+ * not a valid receiving endpoint and may return `-ECOMPARTMENTFAIL` if the
+ * queue is destroyed during the call.
+ */
+int __cheri_compartment("message_queue")
+  queue_receive_sealed(Timeout *timeout, struct SObjStruct *handle, void *dst);
+
+/**
+ * Returns, via `items`, the number of items in the queue specified by `handle`.
+ * Returns 0 on success.
+ *
+ * This call is intended to be fast and so does minimal checking of arguments.
+ * It does not mutate state or acquire any locks and so may return
+ * `-ECOMPARTMENTFAIL` for any failure case.
+ */
+int __cheri_compartment("message_queue")
+  queue_items_remaining_sealed(struct SObjStruct *handle, size_t *items);
+
+/**
+ * Initialise an event waiter source so that it will wait for the queue to be
+ * ready to receive.  Note that this is inherently racy because another consumer
+ * may drain the queue before this consumer wakes up.
+ */
+void __cheri_libcall
+multiwaiter_queue_receive_init(struct EventWaiterSource *source,
+                               struct QueueHandle       *handle);
+
+/**
+ * Initialise an event waiter source so that it will wait for the queue to be
+ * ready to send.  Note that this is inherently racy because another producer
+ * may fill the queue before this consumer wakes up.
+ */
+void __cheri_libcall
+multiwaiter_queue_send_init(struct EventWaiterSource *source,
+                            struct QueueHandle       *handle);
+
+/**
+ * Initialise an event waiter source as in `multiwaiter_queue_receive_init`,
+ * using a sealed queue endpoint.  The `source` argument must be a receive
+ * endpoint.
+ *
+ * Returns 0 on success, `-EINVAL` on invalid arguments.  May return
+ * `-ECOMPARTMENTFAIL` if the queue is deallocated in the middle of this call.
+ */
+int __cheri_compartment("message_queue")
+  multiwaiter_queue_receive_init_sealed(struct EventWaiterSource *source,
+                                        struct SObjStruct        *handle);
+
+/**
+ * Initialise an event waiter source as in `multiwaiter_queue_send_init`,
+ * using a sealed queue endpoint.  The `source` argument must be a send
+ * endpoint.
+ *
+ * Returns 0 on success, `-EINVAL` on invalid arguments.  May return
+ * `-ECOMPARTMENTFAIL` if the queue is deallocated in the middle of this call.
+ */
+int __cheri_compartment("message_queue")
+  multiwaiter_queue_send_init_sealed(struct EventWaiterSource *source,
+                                     struct SObjStruct        *handle);
 
 __END_DECLS
diff --git a/sdk/include/semaphore.h b/sdk/include/semaphore.h
deleted file mode 100644
index 3e60d96..0000000
--- a/sdk/include/semaphore.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright Microsoft and CHERIoT Contributors.
-// SPDX-License-Identifier: MIT
-
-#pragma once
-
-#include <cdefs.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <timeout.h>
-
-/*
- * Semaphore APIs.
- * This is a FreeRTOS-compatible semaphore implementation. A semaphore is
- * created with a max count of N and initial count of 0. Each take() operation
- * increments the count and blocks at the maximum. Each give() decrements the
- * count and blocks at 0.
- */
-
-__BEGIN_DECLS
-
-/**
- * Create a new semaphore.
- *
- * @param ret storage for the returned sealed semaphore handle
- * @param maxNItems the max count the semaphore can be taken
- *
- * @return error code. 0 on success
- */
-int __cheri_compartment("sched")
-  semaphore_create(Timeout           *timeout,
-                   struct SObjStruct *heapCapability,
-                   void             **ret,
-                   size_t             maxNItems);
-
-/**
- * Delete semaphore, waking up all blockers.
- *
- * @param sema sealed semaphore handle
- *
- * @return error code. 0 on success
- */
-int __cheri_compartment("sched")
-  semaphore_delete(struct SObjStruct *heapCapability, void *sema);
-
-/**
- * Take the semaphore. If its count has reached the maximum, block.
- *
- * @param sema sealed semaphore handle
- * @param timeout The timeout for this call.
- *
- * @return error code. 0 on success
- */
-int __cheri_compartment("sched") semaphore_take(Timeout *timeout, void *sema);
-
-/**
- * Release the semaphore. If its count has reached 0, block.
- *
- * @param sema sealed semaphore handle
- * @param timeout The timeout for this call.
- *
- * @return error code. 0 on success
- */
-int __cheri_compartment("sched") semaphore_give(Timeout *timeout, void *sema);
-
-__END_DECLS
diff --git a/sdk/include/stdatomic.h b/sdk/include/stdatomic.h
new file mode 100644
index 0000000..04cd222
--- /dev/null
+++ b/sdk/include/stdatomic.h
@@ -0,0 +1,153 @@
+#pragma once
+/**
+ * This file implements the C11 and C++23 C atomics interfaces.  On targets
+ * without hardware atomics, these will all lower to calls into the atomics
+ * shared library.  You must link either atomics or atomics_fixed (if you use
+ * only fixed-width atomics) into your firmware image.
+ *
+ * *WARNING*: The C++ atomics interface is more efficient for non-primitive
+ * types but is *not* guaranteed to be interoperable with the C version.
+ * Interoperable code should use only primitive types in atomics.
+ */
+
+#ifdef __cplusplus
+#	include <atomic>
+#	define _Atomic(T) std::atomic<T>
+#else
+#	include <stddef.h>
+#	include <stdint.h>
+enum memory_order
+{
+	memory_order_relaxed = __ATOMIC_RELAXED,
+	memory_order_consume = __ATOMIC_CONSUME,
+	memory_order_acquire = __ATOMIC_ACQUIRE,
+	memory_order_release = __ATOMIC_RELEASE,
+	memory_order_acq_rel = __ATOMIC_ACQ_REL,
+	memory_order_seq_cst = __ATOMIC_SEQ_CST,
+};
+
+typedef _Atomic(_Bool) atomic_flag;
+
+#	define ATOMIC_FLAG_INIT false
+
+// Clang thinks that all atomics are too big, so ignore it.
+__clang_ignored_warning_push("-Watomic-alignment")
+
+__always_inline _Bool
+atomic_flag_test_and_set_explicit(volatile atomic_flag *obj, enum memory_order order)
+{
+	return __c11_atomic_exchange(obj, true, order);
+}
+
+__always_inline _Bool atomic_flag_test_and_set(volatile atomic_flag *obj)
+{
+	return atomic_flag_test_and_set_explicit(obj, memory_order_seq_cst);
+}
+
+__always_inline _Bool
+atomic_flag_test_and_clear_explicit(volatile atomic_flag *obj,
+                                    enum memory_order          order)
+{
+	return __c11_atomic_exchange(obj, false, order);
+}
+
+__always_inline _Bool atomic_flag_test_and_clear(volatile atomic_flag *obj)
+{
+	return atomic_flag_test_and_clear_explicit(obj, memory_order_seq_cst);
+}
+
+__clang_ignored_warning_pop()
+
+
+// The functions in the following block are mapped directly to builtins.
+#	define atomic_init(obj, value) __c11_atomic_init(obj, value)
+#	define atomic_compare_exchange_strong_explicit(                           \
+	  object, expected, desired, success, failure)                             \
+		__c11_atomic_compare_exchange_strong(                                  \
+		  object, expected, desired, success, failure)
+#	define atomic_compare_exchange_weak_explicit(                             \
+	  object, expected, desired, success, failure)                             \
+		__c11_atomic_compare_exchange_weak(                                    \
+		  object, expected, desired, success, failure)
+#	define atomic_exchange_explicit(object, desired, order)                   \
+		__c11_atomic_exchange(object, desired, order)
+#	define atomic_fetch_add_explicit(object, operand, order)                  \
+		__c11_atomic_fetch_add(object, operand, order)
+#	define atomic_fetch_and_explicit(object, operand, order)                  \
+		__c11_atomic_fetch_and(object, operand, order)
+#	define atomic_fetch_or_explicit(object, operand, order)                   \
+		__c11_atomic_fetch_or(object, operand, order)
+#	define atomic_fetch_sub_explicit(object, operand, order)                  \
+		__c11_atomic_fetch_sub(object, operand, order)
+#	define atomic_fetch_xor_explicit(object, operand, order)                  \
+		__c11_atomic_fetch_xor(object, operand, order)
+#	define atomic_load_explicit(object, order) __c11_atomic_load(object, order)
+#	define atomic_store_explicit(object, desired, order)                      \
+		__c11_atomic_store(object, desired, order)
+
+// The functions in the following block are convenience wrappers around the
+// previous block
+
+#	define atomic_compare_exchange_strong(object, expected, desired)          \
+		atomic_compare_exchange_strong_explicit(object,                        \
+		                                        expected,                      \
+		                                        desired,                       \
+		                                        memory_order_seq_cst,          \
+		                                        memory_order_seq_cst)
+#	define atomic_compare_exchange_weak(object, expected, desired)            \
+		atomic_compare_exchange_weak_explicit(object,                          \
+		                                      expected,                        \
+		                                      desired,                         \
+		                                      memory_order_seq_cst,            \
+		                                      memory_order_seq_cst)
+#	define atomic_exchange(object, desired)                                   \
+		atomic_exchange_explicit(object, desired, memory_order_seq_cst)
+#	define atomic_fetch_add(object, operand)                                  \
+		atomic_fetch_add_explicit(object, operand, memory_order_seq_cst)
+#	define atomic_fetch_and(object, operand)                                  \
+		atomic_fetch_and_explicit(object, operand, memory_order_seq_cst)
+#	define atomic_fetch_or(object, operand)                                   \
+		atomic_fetch_or_explicit(object, operand, memory_order_seq_cst)
+#	define atomic_fetch_sub(object, operand)                                  \
+		atomic_fetch_sub_explicit(object, operand, memory_order_seq_cst)
+#	define atomic_fetch_xor(object, operand)                                  \
+		atomic_fetch_xor_explicit(object, operand, memory_order_seq_cst)
+#	define atomic_load(object)                                                \
+		atomic_load_explicit(object, memory_order_seq_cst)
+#	define atomic_store(object, desired)                                      \
+		atomic_store_explicit(object, desired, memory_order_seq_cst)
+#endif
+
+typedef _Atomic(_Bool)              atomic_bool;
+typedef _Atomic(char)               atomic_char;
+typedef _Atomic(signed char)        atomic_schar;
+typedef _Atomic(unsigned char)      atomic_uchar;
+typedef _Atomic(short)              atomic_short;
+typedef _Atomic(unsigned short)     atomic_ushort;
+typedef _Atomic(int)                atomic_int;
+typedef _Atomic(unsigned int)       atomic_uint;
+typedef _Atomic(long)               atomic_long;
+typedef _Atomic(unsigned long)      atomic_ulong;
+typedef _Atomic(long long)          atomic_llong;
+typedef _Atomic(unsigned long long) atomic_ullong;
+typedef _Atomic(int_least8_t)       atomic_int_least8_t;
+typedef _Atomic(uint_least8_t)      atomic_uint_least8_t;
+typedef _Atomic(int_least16_t)      atomic_int_least16_t;
+typedef _Atomic(uint_least16_t)     atomic_uint_least16_t;
+typedef _Atomic(int_least32_t)      atomic_int_least32_t;
+typedef _Atomic(uint_least32_t)     atomic_uint_least32_t;
+typedef _Atomic(int_least64_t)      atomic_int_least64_t;
+typedef _Atomic(uint_least64_t)     atomic_uint_least64_t;
+typedef _Atomic(int_fast8_t)        atomic_int_fast8_t;
+typedef _Atomic(uint_fast8_t)       atomic_uint_fast8_t;
+typedef _Atomic(int_fast16_t)       atomic_int_fast16_t;
+typedef _Atomic(uint_fast16_t)      atomic_uint_fast16_t;
+typedef _Atomic(int_fast32_t)       atomic_int_fast32_t;
+typedef _Atomic(uint_fast32_t)      atomic_uint_fast32_t;
+typedef _Atomic(int_fast64_t)       atomic_int_fast64_t;
+typedef _Atomic(uint_fast64_t)      atomic_uint_fast64_t;
+typedef _Atomic(intptr_t)           atomic_intptr_t;
+typedef _Atomic(uintptr_t)          atomic_uintptr_t;
+typedef _Atomic(size_t)             atomic_size_t;
+typedef _Atomic(ptrdiff_t)          atomic_ptrdiff_t;
+
diff --git a/sdk/lib/queue/queue.cc b/sdk/lib/queue/queue.cc
new file mode 100644
index 0000000..62f2179
--- /dev/null
+++ b/sdk/lib/queue/queue.cc
@@ -0,0 +1,594 @@
+#include <cheri.hh>
+#include <cstdlib>
+#include <errno.h>
+#include <locks.hh>
+#include <queue.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <timeout.h>
+#include <type_traits>
+
+using namespace CHERI;
+using cheriot::atomic;
+
+using Debug = ConditionalDebug<false, "Queue library">;
+
+#ifdef __cplusplus
+using MessageQueueCounter = atomic<uint32_t>;
+#else
+typedef uint32_t MessageQueueCounter;
+#endif
+
+#include <assembly-helpers.h>
+
+namespace
+{
+	/**
+	 * Helpers for the queue.  The queue uses two counters that wrap on double
+	 * the number of elements.  This ensures that full and empty conditions
+	 * have different values: a queue is full when the producer is one queue
+	 * length ahead of the consumer, and empty when the producer and consumer
+	 * are equal.
+	 */
+
+	/**
+	 * Helper for wrapping increment.  Increments `counter`, wrapping to zero
+	 * if it reaches double `size`.
+	 */
+	constexpr uint32_t increment_and_wrap(uint32_t size, uint32_t counter)
+	{
+		counter++;
+		if (2 * size == counter)
+		{
+			return 0;
+		}
+		return counter;
+	}
+
+	/**
+	 * Returns the number of items in the queue for the given size with the
+	 * specified producer and consumer counters.
+	 */
+	constexpr uint32_t
+	items_remaining(uint32_t size, uint32_t producer, uint32_t consumer)
+	{
+		// If the consumer is ahead of the producer then the producer has
+		// wrapped.  In this case, treat the consumer as a negative offset
+		if (consumer > producer)
+		{
+			return (2 * size) - consumer + producer;
+		}
+		return producer - consumer;
+	}
+
+	/**
+	 * Returns true if and only if the `size`, `producer`, and `consumer`
+	 * counters indicate a full queue.
+	 */
+	constexpr bool is_full(uint32_t size, uint32_t producer, uint32_t consumer)
+	{
+		return items_remaining(size, producer, consumer) == size;
+	}
+
+	/**
+	 * Returns true if and only if the `producer`, and `consumer` counters
+	 * indicate an empty queue.
+	 */
+	constexpr bool is_empty(uint32_t producer, uint32_t consumer)
+	{
+		return producer == consumer;
+	}
+
+	/**
+	 * Helper that exhaustively checks the correctness of the is-full
+	 * calculation for a size.
+	 *
+	 * This is purely constexpr logic for compile-time checks, it generates no
+	 * code.
+	 */
+	template<uint32_t Size>
+	struct CheckIsFull
+	{
+		/**
+		 * Check that the template arguments return false for is-full.  This is
+		 * a separate template so that we see the values in a compiler error.
+		 */
+		template<uint32_t Producer, uint32_t Consumer>
+		static constexpr void check_not_full()
+		{
+			static_assert(!is_full(Size, Producer, Consumer),
+			              "is-full calculation is incorrect");
+		}
+
+		/**
+		 * Check that the template arguments return true for is-full.  This is a
+		 * separate template so that we see the values in a compiler error.
+		 */
+		template<uint32_t Producer, uint32_t Consumer>
+		static constexpr void check_full()
+		{
+			static_assert(is_full(Size, Producer, Consumer),
+			              "is-full calculation is incorrect");
+		}
+
+		/**
+		 * Helper that uses `increment_and_wrap` to add `displacement` to
+		 * `start`, giving the value of a counter after `displacement`
+		 * increments.
+		 */
+		static constexpr uint32_t add(uint32_t start, uint32_t displacement)
+		{
+			for (uint32_t i = 0; i < displacement; i++)
+			{
+				start = increment_and_wrap(Size, start);
+			}
+			return start;
+		}
+
+		/**
+		 * Check that items-remaining returns the correct value for the given
+		 * counter values.
+		 */
+		template<uint32_t Producer, uint32_t Consumer, uint32_t Displacement>
+		static constexpr void check_items_remaining()
+		{
+			static_assert(items_remaining(Size, Producer, Consumer) ==
+			                Displacement,
+			              "items-remaining calculation is incorrect");
+		}
+
+		/**
+		 * For every producer counter value from 0 up to `Size` after a consumer
+		 * counter value, check that it returns the correct value for the
+		 * items-remaining, is-empty, and is-full calculations.
+		 */
+		template<uint32_t Consumer, uint32_t Displacement = 0>
+		static constexpr void check_offsets()
+		{
+			constexpr auto Producer = add(Consumer, Displacement);
+			check_items_remaining<Producer, Consumer, Displacement>();
+			if constexpr (Displacement == 0)
+			{
+				static_assert(is_empty(Producer, Consumer),
+				              "is-empty calculation is incorrect");
+			}
+			else
+			{
+				static_assert(!is_empty(Producer, Consumer),
+				              "is-empty calculation is incorrect");
+			}
+			if constexpr (Displacement == Size)
+			{
+				check_full<Producer, Consumer>();
+			}
+			else
+			{
+				static_assert(Displacement < Size,
+				              "Displacement overflowed somehow");
+				check_not_full<Producer, Consumer>();
+				check_offsets<Consumer, Displacement + 1>();
+			}
+		}
+
+		/**
+		 * Check every valid consumer value for the given size, and every valid
+		 * producer value for each consumer value.
+		 */
+		template<uint32_t Consumer = 0>
+		static constexpr bool check_sizes()
+		{
+			check_offsets<Consumer>();
+			if constexpr (Consumer < Size * 2)
+			{
+				check_sizes<Consumer + 1>();
+			}
+			return true;
+		}
+
+		static constexpr bool Value = check_sizes();
+	};
+
+	/**
+	 * Helper.  This is never false, it is always either true or compile
+	 * failure.
+	 */
+	template<uint32_t Size>
+	constexpr bool CheckIsFullValue = CheckIsFull<Size>::Value;
+
+	// Check some sizes that are likely to be wrong (powers of two, primes, and
+	// some other values)
+	static_assert(CheckIsFullValue<1>, "CheckIsFull failed");
+	static_assert(CheckIsFullValue<3>, "CheckIsFull failed");
+	static_assert(CheckIsFullValue<4>, "CheckIsFull failed");
+	static_assert(CheckIsFullValue<10>, "CheckIsFull failed");
+	static_assert(CheckIsFullValue<17>, "CheckIsFull failed");
+	static_assert(CheckIsFullValue<32>, "CheckIsFull failed");
+	static_assert(CheckIsFullValue<33>, "CheckIsFull failed");
+
+	/**
+	 * Returns a pointer to the element in the queue indicated by `counter`.
+	 */
+	Capability<void> buffer_at_counter(struct QueueHandle &handle,
+	                                   uint32_t            counter)
+	{
+		// Handle wrap for the second run around the counter.
+		size_t index =
+		  counter >= handle.queueSize ? counter - handle.queueSize : counter;
+		auto             offset = index * handle.elementSize;
+		Capability<void> pointer{handle.buffer};
+		pointer.address() += offset;
+		return pointer;
+	}
+
+	/**
+	 * Flag lock that uses the two high bits of a word for the lock.
+	 *
+	 * The remaining bits are free for other uses, but should not be modified
+	 * while the lock is not held.
+	 */
+	struct HighBitFlagLock
+	{
+		/**
+		 * A reference to the word that holds the lock.
+		 */
+		atomic<uint32_t> &lockWord;
+
+		/**
+		 * The bit to use for the lock.
+		 */
+		static constexpr uint32_t LockBit    = 1U << 31;
+		static constexpr uint32_t WaitersBit = 1U << 30;
+
+		// Function required to conform to the Lock concept.
+		void lock()
+		{
+			__builtin_unreachable();
+		}
+
+		static constexpr uint32_t reserved_bits()
+		{
+			return LockBit | WaitersBit;
+		}
+
+		/**
+		 * Try to acquire the lock.  Returns true on success, false on failure.
+		 */
+		bool try_lock(Timeout *t)
+		{
+			uint32_t value;
+			do
+			{
+				value = lockWord.load();
+				if (value & LockBit)
+				{
+					// If the lock is held, set the flag that indicates that
+					// there are waiters.
+					if ((value & WaitersBit) == 0)
+					{
+						if (!lockWord.compare_exchange_strong(
+						      value, value | WaitersBit))
+						{
+							continue;
+						}
+					}
+					if (lockWord.wait(t, value) == -ETIMEDOUT)
+					{
+						return false;
+					}
+					continue;
+				}
+			} while (
+			  !lockWord.compare_exchange_strong(value, (value | LockBit)));
+			return true;
+		}
+
+		/**
+		 * Release the lock.
+		 */
+		void unlock()
+		{
+			uint32_t value;
+			// Clear the lock bit.
+			value = lockWord.load();
+			// If we're releasing the lock with waiters, wake them up.
+			if (lockWord.exchange(value & ~LockBit) & WaitersBit)
+			{
+				lockWord.notify_all();
+			}
+		}
+	};
+
+	uint32_t counter_load(std::atomic<uint32_t> *counter)
+	{
+		return counter->load() & ~(HighBitFlagLock::reserved_bits());
+	}
+
+	void counter_store(std::atomic<uint32_t> *counter, uint32_t value)
+	{
+		uint32_t old;
+		do
+		{
+			old = counter->load();
+		} while (!counter->compare_exchange_strong(
+		  old, (old & HighBitFlagLock::reserved_bits()) | value));
+	}
+
+	/// Permissions for read-only access to a counter.
+	static constexpr PermissionSet ReadOnly{Permission::Global,
+	                                        Permission::Load};
+	/// Permissions for read-only access to a buffer.
+	static constexpr PermissionSet ReadOnlyCapability{
+	  Permission::Global,
+	  Permission::Load,
+	  Permission::LoadStoreCapability,
+	  Permission::LoadGlobal,
+	  Permission::LoadGlobal};
+	/// Permissions for write-only access to a buffer.
+	static constexpr PermissionSet WriteOnlyCapability{
+	  Permission::Global,
+	  Permission::Store,
+	  Permission::LoadStoreCapability};
+
+	/**
+	 * Helper to drop our short-lived claims.
+	 */
+	void drop_claims()
+	{
+		Timeout t{0};
+		heap_claim_fast(&t, nullptr, nullptr);
+	}
+
+} // namespace
+
+struct QueueHandle queue_make_receive_handle(struct QueueHandle handle)
+{
+	Capability buffer   = handle.buffer;
+	Capability producer = handle.producer;
+	buffer.permissions() &= ReadOnlyCapability;
+	producer.permissions() &= ReadOnly;
+	handle.buffer   = buffer;
+	handle.producer = producer;
+	return handle;
+}
+
+struct QueueHandle queue_make_send_handle(struct QueueHandle handle)
+{
+	Capability buffer   = handle.buffer;
+	Capability consumer = handle.consumer;
+	buffer.permissions() &= WriteOnlyCapability;
+	consumer.permissions() &= ReadOnly;
+	handle.buffer   = buffer;
+	handle.consumer = consumer;
+	return handle;
+}
+
+int queue_create(Timeout            *timeout,
+                 struct SObjStruct  *heapCapability,
+                 struct QueueHandle *outQueue,
+                 void              **outAllocation,
+                 size_t              elementSize,
+                 size_t              elementCount)
+{
+	size_t bufferSize;
+	size_t allocSize;
+	bool   overflow =
+	  __builtin_mul_overflow(elementCount, elementSize, &bufferSize);
+	// We must be able to accurately represent the buffer, so round it up to a
+	// representable length.
+	bufferSize = CHERI::representable_length(bufferSize);
+	static constexpr size_t CounterSize = sizeof(uint32_t);
+	// Round up the size to be correctly aligned for the counters at the end
+	// (if necessary) and add two counters worth of space.
+	overflow |= __builtin_add_overflow(
+	  bufferSize,
+	  (2 * CounterSize) + (CounterSize - (bufferSize & (CounterSize - 1))),
+	  &allocSize);
+	if (overflow)
+	{
+		return -EINVAL;
+	}
+	// We need the counters to be able to run to double the queue size without
+	// hitting the high bits.  Error if this is the case.
+	//
+	// This should never be reached: a queue needs to be at least 512 MiB
+	// (assuming one-byte elements) to hit this limit.
+	if (((elementCount | (elementCount * 2)) &
+	     HighBitFlagLock::reserved_bits()) != 0)
+	{
+		return -EINVAL;
+	}
+
+	// Allocate the space for the queue.
+	Capability buffer{heap_allocate(timeout, heapCapability, allocSize)};
+	if (!buffer.is_valid())
+	{
+		return -ENOMEM;
+	}
+
+	Capability<std::atomic<uint32_t>> producer{
+	  buffer.cast<std::atomic<uint32_t>>()};
+	Capability<std::atomic<uint32_t>> consumer{
+	  buffer.cast<std::atomic<uint32_t>>()};
+	// Make the producer and consumer point after the buffer
+	producer.address() += bufferSize;
+	consumer.address() += bufferSize + CounterSize;
+	// Set their bounds to 4 bytes.
+	producer.bounds() = CounterSize;
+	consumer.bounds() = CounterSize;
+	// The pointer used to free the allocation
+	*outAllocation  = buffer;
+	buffer.bounds() = bufferSize;
+	Debug::log("Created queue with buffer: {}", buffer);
+	// The handle
+	*outQueue = {elementSize, elementCount, buffer, producer, consumer};
+
+	return 0;
+}
+
+int queue_send(Timeout *timeout, struct QueueHandle *handle, const void *src)
+{
+	Debug::log("Send called on: {}", handle);
+	auto *producer   = handle->producer;
+	auto *consumer   = handle->consumer;
+	bool  shouldWake = false;
+	{
+		Debug::log("Lock word: {}", producer->load());
+		HighBitFlagLock l{*producer};
+		if (LockGuard g{l, timeout})
+		{
+			uint32_t producerCounter = counter_load(producer);
+			uint32_t consumerCounter = counter_load(consumer);
+			Debug::log("Producer counter: {}, consumer counter: {}, Size: {}",
+			           producerCounter,
+			           consumerCounter,
+			           handle->queueSize);
+			while (is_full(handle->queueSize, producerCounter, consumerCounter))
+			{
+				if (consumer->wait(timeout, consumerCounter) == -ETIMEDOUT)
+				{
+					Debug::log("Timed out on futex");
+					return -ETIMEDOUT;
+				}
+				consumerCounter = counter_load(consumer);
+			}
+			auto entry = buffer_at_counter(*handle, producerCounter);
+			if (int claim = heap_claim_fast(timeout, handle->buffer, src);
+			    claim != 0)
+			{
+				Debug::log("Claim failed: {}", claim);
+				return claim;
+			}
+			if (!check_pointer<PermissionSet{Permission::Load},
+			                   const void,
+			                   false>(src, handle->elementSize))
+			{
+				drop_claims();
+				Debug::log("Load / bounds check failed: {}");
+				return -EPERM;
+			}
+			Debug::log("Send copying {} bytes from {} to {}",
+			           handle->elementSize,
+			           src,
+			           entry);
+			memcpy(entry, src, handle->elementSize);
+			drop_claims();
+			counter_store(
+			  handle->producer,
+			  increment_and_wrap(handle->queueSize, producerCounter));
+			// Check if the queue was empty before we updated the producer
+			// counter.  By the time that we reach this point, anything on the
+			// consumer side will be on the path to a futex_wait with the old
+			// version of the producer counter and so will bounce out again.
+			shouldWake = is_empty(producerCounter, counter_load(consumer));
+		}
+		else
+		{
+			Debug::log("Timed out on lock");
+			return -ETIMEDOUT;
+		}
+	}
+	if (shouldWake)
+	{
+		handle->producer->notify_all();
+	}
+	return 0;
+}
+
+int queue_receive(Timeout *timeout, struct QueueHandle *handle, void *dst)
+{
+	Debug::log("Receive called on: {}", handle);
+	auto *producer   = handle->producer;
+	auto *consumer   = handle->consumer;
+	bool  shouldWake = false;
+	{
+		HighBitFlagLock l{*consumer};
+		if (LockGuard g{l, timeout})
+		{
+			uint32_t producerCounter = counter_load(producer);
+			uint32_t consumerCounter = counter_load(consumer);
+			Debug::log("Producer counter: {}, consumer counter: {}, Size: {}",
+			           producerCounter,
+			           consumerCounter,
+			           handle->queueSize);
+			while (is_empty(producerCounter, consumerCounter))
+			{
+				if (producer->wait(timeout, producerCounter) == -ETIMEDOUT)
+				{
+					return -ETIMEDOUT;
+				}
+				producerCounter = counter_load(producer);
+			}
+			auto entry = buffer_at_counter(*handle, consumerCounter);
+			if (int claim = heap_claim_fast(timeout, handle->buffer, dst);
+			    claim != 0)
+			{
+				return claim;
+			}
+			if (!check_pointer<PermissionSet{Permission::Store}, void, false>(
+			      dst, handle->elementSize))
+			{
+				drop_claims();
+				Debug::log("Check pointer failed with {} for {} byte write",
+				           dst,
+				           handle->elementSize);
+				return -EPERM;
+			}
+			Debug::log("Receive copying {} bytes from {} to {}",
+			           handle->elementSize,
+			           entry,
+			           dst);
+			memcpy(dst, entry, handle->elementSize);
+			drop_claims();
+			counter_store(
+			  consumer, increment_and_wrap(handle->queueSize, consumerCounter));
+			// Check if the queue was full before we updated the consumer
+			// counter.  By the time that we reach this point, anything on the
+			// producer side will be on the path to a futex_wait with the old
+			// version of the consumer counter and so will bounce out again.
+			shouldWake = is_full(
+			  handle->queueSize, counter_load(producer), consumerCounter);
+		}
+		else
+		{
+			Debug::log("Timed out on lock");
+			return -ETIMEDOUT;
+		}
+	}
+	if (shouldWake)
+	{
+		handle->consumer->notify_all();
+	}
+	return 0;
+}
+
+int queue_items_remaining(struct QueueHandle *handle, size_t *items)
+{
+	auto producerCounter = counter_load(handle->producer);
+	auto consumerCounter = counter_load(handle->consumer);
+	*items =
+	  items_remaining(handle->queueSize, producerCounter, consumerCounter);
+	Debug::log("Producer counter: {}, consumer counter: {}, items: {}",
+	           producerCounter,
+	           consumerCounter,
+	           *items);
+	return 0;
+}
+
+void multiwaiter_queue_send_init(struct EventWaiterSource *source,
+                                 struct QueueHandle       *handle)
+{
+	uint32_t producer   = counter_load(handle->producer);
+	uint32_t consumer   = counter_load(handle->consumer);
+	source->eventSource = handle->consumer;
+	source->value =
+	  is_full(handle->queueSize, producer, consumer) ? consumer : -1;
+}
+
+void multiwaiter_queue_receive_init(struct EventWaiterSource *source,
+                                    struct QueueHandle       *handle)
+{
+	uint32_t producer   = counter_load(handle->producer);
+	uint32_t consumer   = counter_load(handle->consumer);
+	source->eventSource = handle->producer;
+	source->value       = is_empty(producer, consumer) ? producer : -1;
+}
diff --git a/sdk/lib/queue/queue_compartment.cc b/sdk/lib/queue/queue_compartment.cc
new file mode 100644
index 0000000..b2d5bc7
--- /dev/null
+++ b/sdk/lib/queue/queue_compartment.cc
@@ -0,0 +1,205 @@
+#include <cheri.hh>
+#include <compartment.h>
+#include <cstdlib>
+#include <errno.h>
+#include <locks.hh>
+#include <queue.h>
+#include <token.h>
+
+using namespace CHERI;
+
+using Debug = ConditionalDebug<false, "Queue compartment">;
+
+namespace
+{
+	__always_inline SKey receive_key()
+	{
+		return STATIC_SEALING_TYPE(ReceiveHandle);
+	}
+	__always_inline SKey send_key()
+	{
+		return STATIC_SEALING_TYPE(SendHandle);
+	}
+
+	struct QueueEndpoint
+	{
+		QueueHandle handle;
+		void       *allocation;
+		// Lock that protects against double free.
+		FlagLockPriorityInherited lock;
+	};
+
+} // namespace
+
+int queue_create_sealed(Timeout            *timeout,
+                        struct SObjStruct  *heapCapability,
+                        struct SObjStruct **outQueueSend,
+                        struct SObjStruct **outQueueReceive,
+                        size_t              elementSize,
+                        size_t              elementCount)
+{
+	if (heap_address_is_valid(timeout) ||
+	    !check_pointer<PermissionSet{Permission::Load, Permission::Store}>(
+	      timeout))
+	{
+		return -EPERM;
+	}
+
+	// Allocate the queue endpoints
+	auto [send, sendSealed] =
+	  token_allocate<QueueEndpoint>(timeout, heapCapability, send_key());
+	if (!send)
+	{
+		return timeout->may_block() ? -ENOMEM : -ETIMEDOUT;
+	}
+	auto [receive, receiveSealed] =
+	  token_allocate<QueueEndpoint>(timeout, heapCapability, receive_key());
+	if (!receive)
+	{
+		token_obj_destroy(heapCapability, send_key(), sendSealed);
+		return timeout->may_block() ? -ENOMEM : -ETIMEDOUT;
+	}
+
+	// Bidirectional queue handle
+	QueueHandle handle;
+	// The pointer to the queue that is used when freeing
+	void *freeBuffer;
+	// Allocate the queue object
+	int ret = queue_create(
+	  timeout, heapCapability, &handle, &freeBuffer, elementSize, elementCount);
+	if (ret != 0)
+	{
+		token_obj_destroy(heapCapability, send_key(), sendSealed);
+		token_obj_destroy(heapCapability, receive_key(), receiveSealed);
+		return ret;
+	}
+
+	send->handle        = queue_make_send_handle(handle);
+	send->allocation    = freeBuffer;
+	receive->handle     = queue_make_receive_handle(handle);
+	receive->allocation = freeBuffer;
+	// Add a second claim on the buffer so that we can free the queue by freeing
+	// it twice, once in each endpoint.
+	heap_claim(heapCapability, freeBuffer);
+
+	if (int claimed = heap_claim_fast(timeout, outQueueSend, outQueueReceive);
+	    claimed != 0)
+	{
+		return claimed;
+	}
+	if (!check_pointer<PermissionSet{Permission::Load,
+	                                 Permission::Store,
+	                                 Permission::LoadStoreCapability}>(
+	      outQueueReceive, sizeof(void *)) ||
+	    !check_pointer<PermissionSet{Permission::Load,
+	                                 Permission::Store,
+	                                 Permission::LoadStoreCapability}>(
+	      outQueueSend, sizeof(void *)))
+	{
+		// Free twice because we claimed it once in addition to the original
+		// allocation.
+		heap_free(heapCapability, freeBuffer);
+		heap_free(heapCapability, freeBuffer);
+		return -EPERM;
+	}
+	*outQueueSend    = sendSealed;
+	*outQueueReceive = receiveSealed;
+	return 0;
+}
+
+int queue_destroy_sealed(Timeout           *timeout,
+                         struct SObjStruct *heapCapability,
+                         struct SObjStruct *queueHandle)
+{
+	Debug::log("Destroying queue {}", queueHandle);
+	auto  token = receive_key();
+	auto *end   = token_unseal(token, Sealed<QueueEndpoint>{queueHandle});
+	// This function takes either endpoint, so we need to try unsealing with
+	// both keys.
+	if (!end)
+	{
+		token = send_key();
+		end   = token_unseal(token, Sealed<QueueEndpoint>{queueHandle});
+	}
+	if (!end)
+	{
+		return -EINVAL;
+	}
+	// Don't bother with a lock guard: we will destroy this lock if we reach the
+	// end. If we lose a race here, this will trap and we will implicitly return
+	// `-ECOMPARTMENTFAIL`.
+	if (!end->lock.try_lock(timeout))
+	{
+		return -ETIMEDOUT;
+	}
+	if (heap_free(heapCapability, end->allocation) != 0)
+	{
+		end->lock.unlock();
+		return -EPERM;
+	}
+	token_obj_destroy(heapCapability, token, queueHandle);
+	return 0;
+}
+
+int queue_send_sealed(Timeout           *timeout,
+                      struct SObjStruct *handle,
+                      const void        *src)
+{
+	auto *end = token_unseal(send_key(), Sealed<QueueEndpoint>{handle});
+	// If we failed to unseal, or if the timeout is on the heap, invalid
+	// argument error.
+	if (!end || heap_address_is_valid(timeout))
+	{
+		return -EINVAL;
+	}
+	return queue_send(timeout, &end->handle, src);
+}
+
+int queue_receive_sealed(Timeout *timeout, struct SObjStruct *handle, void *dst)
+{
+	auto *end = token_unseal(receive_key(), Sealed<QueueEndpoint>{handle});
+	// If we failed to unseal, or if the timeout is on the heap, invalid
+	// argument error.
+	if (!end || heap_address_is_valid(timeout))
+	{
+		return -EINVAL;
+	}
+	return queue_receive(timeout, &end->handle, dst);
+}
+
+int multiwaiter_queue_receive_init_sealed(struct EventWaiterSource *source,
+                                          struct SObjStruct        *handle)
+{
+	auto *end = token_unseal(receive_key(), Sealed<QueueEndpoint>{handle});
+	if (!end)
+	{
+		return -EINVAL;
+	}
+	multiwaiter_queue_receive_init(source, &end->handle);
+	return 0;
+}
+
+int multiwaiter_queue_send_init_sealed(struct EventWaiterSource *source,
+                                       struct SObjStruct        *handle)
+{
+	auto *end = token_unseal(send_key(), Sealed<QueueEndpoint>{handle});
+	if (!end)
+	{
+		return -EINVAL;
+	}
+	multiwaiter_queue_send_init(source, &end->handle);
+	return 0;
+}
+
+int queue_items_remaining_sealed(struct SObjStruct *queueHandle, size_t *items)
+{
+	auto *end = token_unseal(receive_key(), Sealed<QueueEndpoint>{queueHandle});
+	// This function takes either endpoint, so we need to try unsealing with
+	// both keys.
+	if (!end)
+	{
+		end = token_unseal(send_key(), Sealed<QueueEndpoint>{queueHandle});
+	}
+	queue_items_remaining(&end->handle, items);
+	return 0;
+}
diff --git a/sdk/lib/queue/xmake.lua b/sdk/lib/queue/xmake.lua
new file mode 100644
index 0000000..0f44ea6
--- /dev/null
+++ b/sdk/lib/queue/xmake.lua
@@ -0,0 +1,10 @@
+-- Copyright Microsoft and CHERIoT Contributors.
+-- SPDX-License-Identifier: MIT
+
+library("message_queue_library")
+  set_default(false)
+  add_files("queue.cc")
+
+compartment("message_queue")
+  set_default(false)
+  add_files("queue_compartment.cc")
diff --git a/tests/ccompile-test.c b/tests/ccompile-test.c
index dc3b6d0..f712028 100644
--- a/tests/ccompile-test.c
+++ b/tests/ccompile-test.c
@@ -20,7 +20,6 @@
 #include <multiwaiter.h>
 #include <queue.h>
 #include <riscvreg.h>
-#include <semaphore.h>
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stddef.h>
diff --git a/tests/multiwaiter-test.cc b/tests/multiwaiter-test.cc
index f4ec516..f559295 100644
--- a/tests/multiwaiter-test.cc
+++ b/tests/multiwaiter-test.cc
@@ -71,55 +71,56 @@
 	TEST(events[0].value == 0, "Futex reports wake but none occurred");
 	TEST(events[1].value == 1, "Futex reports no wake");
 
-	void *queue;
+	QueueHandle queue;
+	void       *queueMemory;
 	t.remaining = 0;
-	ret         = queue_create(&t, MALLOC_CAPABILITY, &queue, sizeof(int), 1);
+	ret =
+	  queue_create(&t, MALLOC_CAPABILITY, &queue, &queueMemory, sizeof(int), 1);
+
 	TEST(ret == 0, "Queue create failed:", ret);
 	int     val = 0;
 	Timeout noWait{0};
-	ret = queue_send(&noWait, queue, &val);
+	ret = queue_send(&noWait, &queue, &val);
 	TEST(ret == 0, "Queue send failed: {}", ret);
 
 	debug_log("Testing queue, blocked on send");
-	async([=]() {
+	async([=]() mutable {
 		sleep(1);
 		int     val;
 		Timeout noWait{0};
-		int     ret = queue_recv(&noWait, queue, &val);
+		int     ret = queue_receive(&noWait, &queue, &val);
 		TEST(ret == 0, "Background receive failed: {}", ret);
 		TEST(val == 0, "Background receive returned incorrect value: {}", ret);
 		debug_log("Background thread made queue ready to send");
 	});
-	events[0]   = {queue, EventWaiterQueue, EventWaiterQueueSendReady};
+	multiwaiter_queue_send_init(&events[0], &queue);
 	t.remaining = 6;
 	ret         = multiwaiter_wait(&t, mw, events, 1);
 	TEST(ret == 0, "multiwaiter returned {}, expected 0", ret);
-	TEST(events[0].value == EventWaiterQueueSendReady,
-	     "Queue reports not ready");
+	TEST(events[0].value == 1, "Queue reports not ready");
 
 	debug_log("Testing queue, blocked on receive");
-	async([=]() {
+	async([=]() mutable {
 		sleep(1);
 		int     val = 1;
 		Timeout noWait{0};
-		int     ret = queue_send(&noWait, queue, &val);
+		int     ret = queue_send(&noWait, &queue, &val);
 		TEST(ret == 0, "Background send failed: {}", ret);
 		debug_log("Background thread made queue ready to receive");
 	});
-	events[0]   = {queue, EventWaiterQueue, EventWaiterQueueReceiveReady};
-	t.remaining = 6;
-	ret         = multiwaiter_wait(&t, mw, events, 1);
+	multiwaiter_queue_receive_init(&events[0], &queue);
+	t   = 10;
+	ret = multiwaiter_wait(&t, mw, events, 1);
 	TEST(ret == 0, "multiwaiter returned {}, expected 0", ret);
-	TEST(events[0].value == EventWaiterQueueReceiveReady,
-	     "Queue did not return ready to receive");
-	ret = queue_recv(&noWait, queue, &val);
+	TEST(events[0].value == 1, "Queue did not return ready to receive");
+	ret = queue_receive(&noWait, &queue, &val);
 	TEST(ret == 0, "Queue ready to receive but receive returned {}", ret);
-	TEST(val == 1, "Incorrect value returned from queue");
+	TEST(val == 1, "Incorrect value returned from queue: {}", val);
 
 	debug_log("Testing waiting on a queue and a futex");
 	futex = 0;
 	setFutex(&futex, 1);
-	events[0]   = {queue, EventWaiterQueue, EventWaiterQueueReceiveReady};
+	multiwaiter_queue_receive_init(&events[0], &queue);
 	events[1]   = {&futex, EventWaiterFutex, 0};
 	t.remaining = 6;
 	ret         = multiwaiter_wait(&t, mw, events, 2);
@@ -228,6 +229,6 @@
 	multiwaiter_wait(&t, mw, events, 1);
 
 	event_delete(MALLOC_CAPABILITY, ev);
-	queue_delete(MALLOC_CAPABILITY, queue);
+	free(queueMemory);
 	multiwaiter_delete(MALLOC_CAPABILITY, mw);
 }
diff --git a/tests/queue-test.cc b/tests/queue-test.cc
index 87ba14a..869798f 100644
--- a/tests/queue-test.cc
+++ b/tests/queue-test.cc
@@ -1,83 +1,177 @@
 // Copyright Microsoft and CHERIoT Contributors.
 // SPDX-License-Identifier: MIT
 
+#include "compartment.h"
+#include "token.h"
+#include <cstdlib>
 #define TEST_NAME "Queue"
 #include "tests.hh"
 #include <debug.hh>
 #include <errno.h>
-#include <futex.h>
 #include <queue.h>
-#include <thread_pool.h>
 #include <timeout.h>
 
-using thread_pool::async;
 static constexpr size_t ItemSize                    = 8;
 static constexpr size_t MaxItems                    = 2;
 static constexpr char   Message[MaxItems][ItemSize] = {"TstMsg0", "TstMsg1"};
 
-void test_queue()
+extern "C" ErrorRecoveryBehaviour
+compartment_error_handler(ErrorState *frame, size_t mcause, size_t mtval)
 {
-	static uint32_t futex;
-	char            bytes[ItemSize];
-	static void    *queue;
-	Timeout         timeout{0, 0};
+	debug_log("Thread {} error handler invoked with mcause {}.  PCC: {}",
+	          thread_id_get_fast(),
+	          mcause,
+	          frame->pcc);
+	return ErrorRecoveryBehaviour::ForceUnwind;
+}
+
+void test_queue_unsealed()
+{
+	char               bytes[ItemSize];
+	static QueueHandle queue;
+	static void       *queueMemory;
+	Timeout            timeout{0, 0};
 	debug_log("Testing queue send operations");
-	int rv =
-	  queue_create(&timeout, MALLOC_CAPABILITY, &queue, ItemSize, MaxItems);
+	auto checkSpace = [&](size_t         expected,
+	                      SourceLocation loc = SourceLocation::current()) {
+		size_t items;
+		queue_items_remaining(&queue, &items);
+		TEST(items == expected,
+		     "Queue test line {} reports {} items, should contain {}",
+		     loc.line(),
+		     items,
+		     expected);
+	};
+	int rv = queue_create(
+	  &timeout, MALLOC_CAPABILITY, &queue, &queueMemory, ItemSize, MaxItems);
+	TEST(queue.elementSize == ItemSize,
+	     "Queue element size is {}, expected {}",
+	     queue.elementSize,
+	     ItemSize);
+	TEST(queue.queueSize == MaxItems,
+	     "Queue size is {}, expected {}",
+	     queue.queueSize,
+	     MaxItems);
 	TEST(rv == 0, "Queue creation failed with {}", rv);
-	rv = queue_send(&timeout, queue, Message[0]);
+	rv = queue_send(&timeout, &queue, Message[0]);
+	checkSpace(1);
 	TEST(rv == 0, "Sending the first message failed with {}", rv);
-	rv = queue_send(&timeout, queue, Message[1]);
+	checkSpace(1);
+	rv = queue_send(&timeout, &queue, Message[1]);
 	TEST(rv == 0, "Sending the second message failed with {}", rv);
+	checkSpace(2);
 	// Queue is full, it should time out.
 	timeout.remaining = 5;
-	rv                = queue_send(&timeout, queue, Message[1]);
+	rv                = queue_send(&timeout, &queue, Message[1]);
 	TEST(rv == -ETIMEDOUT,
 	     "Sending to a full queue didn't time out as expected, returned {}",
 	     rv);
+	checkSpace(2);
 	debug_log("Testing queue receive operations");
 	timeout.remaining = 10;
-	rv                = queue_recv(&timeout, queue, bytes);
+	rv                = queue_receive(&timeout, &queue, bytes);
 	TEST(rv == 0, "Receiving the first message failed with {}", rv);
 	TEST(memcmp(Message[0], bytes, ItemSize) == 0,
 	     "First message received but not as expected. Got {}",
 	     bytes);
-	rv = queue_recv(&timeout, queue, bytes);
+	checkSpace(1);
+	rv = queue_receive(&timeout, &queue, bytes);
 	TEST(rv == 0, "Receiving the second message failed with {}", rv);
 	TEST(memcmp(Message[1], bytes, ItemSize) == 0,
 	     "Second message received but not as expected. Got {}",
 	     bytes);
+	checkSpace(0);
 	timeout.remaining = 5;
-	rv                = queue_recv(&timeout, queue, bytes);
+	rv                = queue_receive(&timeout, &queue, bytes);
 	TEST(
 	  rv == -ETIMEDOUT,
 	  "Receiving from an empty queue didn't time out as expected, returned {}",
 	  rv);
-	debug_log("Testing queue deletion right underneath another thread");
-	async([]() {
-		char    bytesForAsync[ItemSize];
-		Timeout infinity{0, UnlimitedTimeout};
-		/*
-		 * This test waits on a queue indefinitely, but will be woken up by the
-		 * queue_delete() in the main thread. After waking up, the queue object
-		 * is dead and will trap, causing a force unwind in the scheduler
-		 * compartment.
-		 */
-		int rv = queue_recv(&infinity, queue, bytesForAsync);
-		TEST(
-		  rv == -1,
-		  "queue_recv() should return -1 because the queue was freed() "
-		  "underneath us and will trap then force unwind, but instead got {}",
-		  rv);
-		futex = 1;
-		futex_wake(&futex, 1);
-	});
-	timeout.remaining = 20;
-	thread_sleep(&timeout);
-	rv = queue_delete(MALLOC_CAPABILITY, queue);
+	// Check that the items remaining calculations are correct after overflow.
+	queue_send(&timeout, &queue, Message[1]);
+	checkSpace(1);
+	queue_receive(&timeout, &queue, bytes);
+	checkSpace(0);
+	queue_send(&timeout, &queue, Message[1]);
+	checkSpace(1);
+	queue_receive(&timeout, &queue, bytes);
+	checkSpace(0);
+	rv = heap_free(MALLOC_CAPABILITY, queueMemory);
 	TEST(rv == 0, "Queue deletion failed with {}", rv);
-	// Wait until the async is done.
-	rv = futex_wait(&futex, 0);
-	TEST(rv == 0, "futex_wait() failed with {}", rv);
+	debug_log("All queue library tests successful");
+}
+
+void test_queue_sealed()
+{
+	size_t  heapSpace = heap_quota_remaining(MALLOC_CAPABILITY);
+	Timeout t{1};
+	SObj    receiveHandle;
+	SObj    sendHandle;
+	char    bytes[ItemSize];
+	int     ret = queue_create_sealed(
+	      &t, MALLOC_CAPABILITY, &sendHandle, &receiveHandle, ItemSize, MaxItems);
+	TEST(ret == 0, "Queue creation failed with {}", ret);
+
+	t   = UnlimitedTimeout;
+	ret = queue_send_sealed(&t, receiveHandle, Message[1]);
+	TEST(
+	  ret == -EINVAL,
+	  "Sending with a receive handle should return -EINVAL ({}), returned {}",
+	  EINVAL,
+	  ret);
+	ret = queue_receive_sealed(&t, sendHandle, bytes);
+	TEST(
+	  ret == -EINVAL,
+	  "Sending with a receive handle should return -EINVAL ({}), returned {}",
+	  EINVAL,
+	  ret);
+
+	ret = queue_send_sealed(&t, sendHandle, Message[1] + 1);
+	TEST(ret == -EPERM,
+	     "Sending with short buffer should return -EPERM ({}), returned {}",
+	     EPERM,
+	     ret);
+	ret = queue_send_sealed(&t, sendHandle, Message[1]);
+	TEST(
+	  ret == 0, "Sending with valid buffer should return 0, returned {}", ret);
+	ret = queue_receive_sealed(&t, receiveHandle, bytes + 1);
+	TEST(ret == -EPERM,
+	     "Receiving with short buffer should return -EPERM ({}), returned {}",
+	     EPERM,
+	     ret);
+	size_t items;
+	ret = queue_items_remaining_sealed(receiveHandle, &items);
+	TEST(ret == 0, "Getting items remaining should return 0, returned {}", ret);
+	TEST(items == 1, "Items remaining should be 1, is {}", items);
+	ret = queue_items_remaining_sealed(sendHandle, &items);
+	TEST(ret == 0, "Getting items remaining should return 0, returned {}", ret);
+	TEST(items == 1, "Items remaining should be 1, is {}", items);
+	ret = queue_receive_sealed(&t, receiveHandle, bytes);
+	TEST(ret == 0,
+	     "Receiving with valid buffer should return 0, returned {}",
+	     ret);
+
+	// Put something in the queue before we delete the send handle.
+	ret = queue_send_sealed(&t, sendHandle, Message[1]);
+	TEST(
+	  ret == 0, "Sending with valid buffer should return 0, returned {}", ret);
+
+	t   = 1;
+	ret = queue_destroy_sealed(&t, MALLOC_CAPABILITY, sendHandle);
+	TEST(ret == 0, "Queue send destruction failed with {}", ret);
+
+	t   = 1;
+	ret = queue_destroy_sealed(&t, MALLOC_CAPABILITY, receiveHandle);
+	TEST(ret == 0, "Queue receive destruction failed with {}", ret);
+
+	TEST(heap_quota_remaining(MALLOC_CAPABILITY) == heapSpace,
+	     "Heap space leaked");
+	debug_log("All queue compartment tests successful");
+}
+
+void test_queue()
+{
+	test_queue_unsealed();
+	test_queue_sealed();
 	debug_log("All queue tests successful");
 }
diff --git a/tests/xmake.lua b/tests/xmake.lua
index 243827e..e988c33 100644
--- a/tests/xmake.lua
+++ b/tests/xmake.lua
@@ -64,6 +64,7 @@
          path.join(sdkdir, "lib/string"),
          path.join(sdkdir, "lib/crt"),
          path.join(sdkdir, "lib/compartment_helpers"),
+         path.join(sdkdir, "lib/queue"),
          path.join(sdkdir, "lib/thread_pool"))
 
 -- Compartment for the test entry point.
@@ -76,6 +77,7 @@
     add_deps("test_runner", "thread_pool")
     -- Helper libraries
     add_deps("freestanding", "string", "crt", "cxxrt", "atomic_fixed", "compartment_helpers")
+    add_deps("message_queue", "message_queue_library")
     -- Tests
     add_deps("mmio_test")
     add_deps("allocator_test")
@@ -97,7 +99,7 @@
                 compartment = "test_runner",
                 priority = 3,
                 entry_point = "run_tests",
-                stack_size = 0x600,
+                stack_size = 0x800,
                 -- This must be an odd number for the trusted stack exhaustion
                 -- test to fail in the right compartment.
                 trusted_stack_frames = 9