Merge remote-tracking branch 'upstream/main' into update

Change-Id: Ib2e2c8f152573f8dda27e9421819284d68fa0e37
diff --git a/examples/01.hello_world/xmake.lua b/examples/01.hello_world/xmake.lua
index 3483e64..752f61a 100644
--- a/examples/01.hello_world/xmake.lua
+++ b/examples/01.hello_world/xmake.lua
@@ -6,9 +6,6 @@
 includes(sdkdir)
 set_toolchains("cheriot-clang")
 
--- Support libraries
-includes(path.join(sdkdir, "lib"))
-
 option("board")
     set_default("sail")
 
diff --git a/examples/02.hello_compartment/xmake.lua b/examples/02.hello_compartment/xmake.lua
index bd282cf..5ed4492 100644
--- a/examples/02.hello_compartment/xmake.lua
+++ b/examples/02.hello_compartment/xmake.lua
@@ -6,9 +6,6 @@
 includes(sdkdir)
 set_toolchains("cheriot-clang")
 
--- Support libraries
-includes(path.join(sdkdir, "lib"))
-
 option("board")
     set_default("sail")
 
diff --git a/examples/03.hello_safe_compartment/xmake.lua b/examples/03.hello_safe_compartment/xmake.lua
index 32d5908..9992817 100644
--- a/examples/03.hello_safe_compartment/xmake.lua
+++ b/examples/03.hello_safe_compartment/xmake.lua
@@ -6,9 +6,6 @@
 includes(sdkdir)
 set_toolchains("cheriot-clang")
 
--- Support libraries
-includes(path.join(sdkdir, "lib"))
-
 option("board")
     set_default("sail")
 
diff --git a/examples/04.temporal_safety/xmake.lua b/examples/04.temporal_safety/xmake.lua
index 8cd8402..849aabc 100644
--- a/examples/04.temporal_safety/xmake.lua
+++ b/examples/04.temporal_safety/xmake.lua
@@ -6,9 +6,6 @@
 includes(sdkdir)
 set_toolchains("cheriot-clang")
 
--- Support libraries
-includes(path.join(sdkdir, "lib"))
-
 option("board")
     set_default("sail")
 
diff --git a/examples/05.sealing/xmake.lua b/examples/05.sealing/xmake.lua
index f3b4547..d20bc47 100644
--- a/examples/05.sealing/xmake.lua
+++ b/examples/05.sealing/xmake.lua
@@ -6,9 +6,6 @@
 includes(sdkdir)
 set_toolchains("cheriot-clang")
 
--- Support libraries
-includes(path.join(sdkdir, "lib"))
-
 option("board")
     set_default("sail")
 
diff --git a/examples/06.producer-consumer/xmake.lua b/examples/06.producer-consumer/xmake.lua
index 5e4a157..8de70ef 100644
--- a/examples/06.producer-consumer/xmake.lua
+++ b/examples/06.producer-consumer/xmake.lua
@@ -6,9 +6,6 @@
 includes(sdkdir)
 set_toolchains("cheriot-clang")
 
--- Support libraries
-includes(path.join(sdkdir, "lib"))
-
 option("board")
     set_default("sail")
 
diff --git a/examples/07.error_handling/xmake.lua b/examples/07.error_handling/xmake.lua
index f56bbc1..c929e37 100644
--- a/examples/07.error_handling/xmake.lua
+++ b/examples/07.error_handling/xmake.lua
@@ -6,9 +6,6 @@
 includes(sdkdir)
 set_toolchains("cheriot-clang")
 
--- Support libraries
-includes(path.join(sdkdir, "lib"))
-
 option("board")
     set_default("sail")
 
diff --git a/examples/08.memory_safety/xmake.lua b/examples/08.memory_safety/xmake.lua
index 93f38d5..29ba2cb 100644
--- a/examples/08.memory_safety/xmake.lua
+++ b/examples/08.memory_safety/xmake.lua
@@ -3,9 +3,6 @@
 includes(sdkdir)
 set_toolchains("cheriot-clang")
 
--- Support libraries
-includes(path.join(sdkdir, "lib"))
-
 option("board")
     set_default("sail")
 
diff --git a/examples/09.javascript/xmake.lua b/examples/09.javascript/xmake.lua
index 78f53c5..cb3fdd3 100644
--- a/examples/09.javascript/xmake.lua
+++ b/examples/09.javascript/xmake.lua
@@ -6,9 +6,6 @@
 includes(sdkdir)
 set_toolchains("cheriot-clang")
 
--- Support libraries
-includes(path.join(sdkdir, "lib"))
-
 option("board")
     set_default("sail")
 
diff --git a/scripts/run_clang_tidy_format.sh b/scripts/run_clang_tidy_format.sh
index 6b17baf..aecda51 100755
--- a/scripts/run_clang_tidy_format.sh
+++ b/scripts/run_clang_tidy_format.sh
@@ -35,7 +35,7 @@
 # FreeRTOS-Compat headers follow FreeRTOS naming conventions and should be
 # excluded for now.  Eventually they should be included for everything except
 # the identifier naming checks.
-HEADERS=$(find ${DIRECTORIES} -name '*.h' -or -name '*.hh' | grep -v libc++ | grep -v third_party | grep -v 'std.*.h' | grep -v errno.h | grep -v strings.h | grep -v string.h | grep -v -assembly.h | grep -v cdefs.h | grep -v /riscv.h | grep -v inttypes.h | grep -v /cheri-builtins.h | grep -v c++-config | grep -v ctype.h | grep -v switcher.h | grep -v assert.h | grep -v std*.h | grep -v /build/ | grep -v microvium | grep -v FreeRTOS-Compat)
+HEADERS=$(find ${DIRECTORIES} -name '*.h' -or -name '*.hh' | grep -v libc++ | grep -v third_party | grep -v 'std.*.h' | grep -v errno.h | grep -v strings.h | grep -v string.h | grep -v -assembly.h | grep -v cdefs.h | grep -v /riscv.h | grep -v inttypes.h | grep -v /cheri-builtins.h | grep -v c++-config | grep -v ctype.h | grep -v switcher.h | grep -v assert.h | grep -v std*.h | grep -v setjmp.h | grep -v unwind.h | grep -v /build/ | grep -v microvium | grep -v FreeRTOS-Compat)
 SOURCES=$(find ${DIRECTORIES} -name '*.cc' | grep -v /build/ | grep -v third_party | grep -v arith64.c)
 
 echo Headers: ${HEADERS}
diff --git a/sdk/compartment.ldscript b/sdk/compartment.ldscript
index 6419245..ba46ec4 100644
--- a/sdk/compartment.ldscript
+++ b/sdk/compartment.ldscript
@@ -13,7 +13,9 @@
 		# delta.  The final layout will be the compartment import table
 		# followed by the text segment.  There won't be any padding, because
 		# the compartment import table is more strongly aligned than text.
-		LONG(DEFINED(compartment_error_handler) ? compartment_error_handler - __compartment_code_start + SIZEOF(.compartment_import_table) : -1);
+		SHORT(DEFINED(compartment_error_handler) ? compartment_error_handler - __compartment_code_start + SIZEOF(.compartment_import_table) : 0xffff);
+		# The stackless compartment error handler, if it is defined.
+		SHORT(DEFINED(compartment_error_handler_stackless) ? compartment_error_handler_stackless - __compartment_code_start + SIZEOF(.compartment_import_table) : 0xffff);
 		# Array of compartment exports
 		*(.compartment_exports .compartment_exports.*);
 	}
@@ -34,6 +36,7 @@
 		# If there is a compartment error handler, make sure that it is before
 		# anything that can have linker relaxations so that its displacement
 		# from __compartment_code_start is a constant.
+		*(.compartment_error_handler_stackless));
 		*(.compartment_error_handler);
 		*(.text .text.*);
 	}
diff --git a/sdk/core/allocator/main.cc b/sdk/core/allocator/main.cc
index 8f5776f..8568af9 100644
--- a/sdk/core/allocator/main.cc
+++ b/sdk/core/allocator/main.cc
@@ -828,10 +828,10 @@
 
 } // namespace
 
-__cheriot_minimum_stack(0x80) ssize_t
+__cheriot_minimum_stack(0x90) ssize_t
   heap_quota_remaining(struct SObjStruct *heapCapability)
 {
-	STACK_CHECK(0x80);
+	STACK_CHECK(0x90);
 	LockGuard g{lock};
 	auto     *cap = malloc_capability_unseal(heapCapability);
 	if (cap == nullptr)
@@ -841,9 +841,9 @@
 	return cap->quota;
 }
 
-__cheriot_minimum_stack(0xb0) void heap_quarantine_empty()
+__cheriot_minimum_stack(0xc0) void heap_quarantine_empty()
 {
-	STACK_CHECK(0xb0);
+	STACK_CHECK(0xc0);
 	LockGuard g{lock};
 	while (gm->heapQuarantineSize > 0)
 	{
@@ -857,12 +857,12 @@
 	}
 }
 
-__cheriot_minimum_stack(0x200) void *heap_allocate(Timeout *timeout,
+__cheriot_minimum_stack(0x210) void *heap_allocate(Timeout *timeout,
                                                    SObj     heapCapability,
                                                    size_t   bytes,
                                                    uint32_t flags)
 {
-	STACK_CHECK(0x200);
+	STACK_CHECK(0x210);
 	if (!check_timeout_pointer(timeout))
 	{
 		return nullptr;
@@ -882,10 +882,10 @@
 	return malloc_internal(bytes, std::move(g), cap, timeout, false, flags);
 }
 
-__cheriot_minimum_stack(0x1b0) ssize_t
+__cheriot_minimum_stack(0x1c0) ssize_t
   heap_claim(SObj heapCapability, void *pointer)
 {
-	STACK_CHECK(0x1b0);
+	STACK_CHECK(0x1c0);
 	LockGuard g{lock};
 	auto     *cap = malloc_capability_unseal(heapCapability);
 	if (cap == nullptr)
@@ -912,18 +912,18 @@
 	return 0;
 }
 
-__cheriot_minimum_stack(0xe0) int heap_can_free(SObj  heapCapability,
+__cheriot_minimum_stack(0xf0) int heap_can_free(SObj  heapCapability,
                                                 void *rawPointer)
 {
-	STACK_CHECK(0xe0);
+	STACK_CHECK(0xf0);
 	LockGuard g{lock};
 	return heap_free_internal(heapCapability, rawPointer, false);
 }
 
-__cheriot_minimum_stack(0x250) int heap_free(SObj  heapCapability,
+__cheriot_minimum_stack(0x260) int heap_free(SObj  heapCapability,
                                              void *rawPointer)
 {
-	STACK_CHECK(0x250);
+	STACK_CHECK(0x260);
 	LockGuard g{lock};
 	int       ret = heap_free_internal(heapCapability, rawPointer, true);
 	if (ret != 0)
@@ -942,9 +942,9 @@
 	return 0;
 }
 
-__cheriot_minimum_stack(0x180) ssize_t heap_free_all(SObj heapCapability)
+__cheriot_minimum_stack(0x190) ssize_t heap_free_all(SObj heapCapability)
 {
-	STACK_CHECK(0x180);
+	STACK_CHECK(0x190);
 	LockGuard g{lock};
 	auto     *capability = malloc_capability_unseal(heapCapability);
 	if (capability == nullptr)
@@ -981,13 +981,13 @@
 	return freed;
 }
 
-__cheriot_minimum_stack(0x200) void *heap_allocate_array(Timeout *timeout,
+__cheriot_minimum_stack(0x210) void *heap_allocate_array(Timeout *timeout,
                                                          SObj   heapCapability,
                                                          size_t nElements,
                                                          size_t elemSize,
                                                          uint32_t flags)
 {
-	STACK_CHECK(0x200);
+	STACK_CHECK(0x210);
 	if (!check_timeout_pointer(timeout))
 	{
 		return nullptr;
@@ -1158,14 +1158,14 @@
 	return nullptr;
 }
 
-__cheriot_minimum_stack(0x270) SObj
+__cheriot_minimum_stack(0x280) SObj
   token_sealed_unsealed_alloc(Timeout *timeout,
                               SObj     heapCapability,
                               SKey     key,
                               size_t   sz,
                               void   **unsealed)
 {
-	STACK_CHECK(0x270);
+	STACK_CHECK(0x280);
 	if (!check_timeout_pointer(timeout))
 	{
 		return INVALID_SOBJ;
@@ -1185,12 +1185,12 @@
 	return INVALID_SOBJ;
 }
 
-__cheriot_minimum_stack(0x250) SObj token_sealed_alloc(Timeout *timeout,
+__cheriot_minimum_stack(0x260) SObj token_sealed_alloc(Timeout *timeout,
                                                        SObj     heapCapability,
                                                        SKey     rawKey,
                                                        size_t   sz)
 {
-	STACK_CHECK(0x250);
+	STACK_CHECK(0x260);
 	return allocate_sealed_unsealed(
 	         timeout, heapCapability, rawKey, sz, {Permission::Seal})
 	  .first;
@@ -1218,11 +1218,11 @@
 	return unseal_if_valid(obj);
 }
 
-__cheriot_minimum_stack(0x250) int token_obj_destroy(SObj heapCapability,
+__cheriot_minimum_stack(0x260) int token_obj_destroy(SObj heapCapability,
                                                      SKey key,
                                                      SObj object)
 {
-	STACK_CHECK(0x250);
+	STACK_CHECK(0x260);
 	void *unsealed;
 	{
 		LockGuard g{lock};
@@ -1240,11 +1240,11 @@
 	return heap_free(heapCapability, unsealed);
 }
 
-__cheriot_minimum_stack(0xe0) int token_obj_can_destroy(SObj heapCapability,
+__cheriot_minimum_stack(0xf0) int token_obj_can_destroy(SObj heapCapability,
                                                         SKey key,
                                                         SObj object)
 {
-	STACK_CHECK(0xe0);
+	STACK_CHECK(0xf0);
 	void *unsealed;
 	{
 		LockGuard g{lock};
diff --git a/sdk/core/loader/boot.cc b/sdk/core/loader/boot.cc
index 11cb2c2..a72541f 100644
--- a/sdk/core/loader/boot.cc
+++ b/sdk/core/loader/boot.cc
@@ -855,6 +855,9 @@
 
 			// Stack pointer points to the top of the stack.
 			stack.address() += config.stack.size();
+			// Reserve space at the start of the stack for error handling and so
+			// on.
+			stack.address() -= STACK_ENTRY_RESERVED_SPACE;
 			Debug::log("Thread's stack is {}", stack);
 			threadTStack->csp = stack;
 
@@ -881,6 +884,8 @@
 			threadTStack->frameoffset = offsetof(TrustedStack, frames[1]);
 			threadTStack->frames[0].calleeExportTable =
 			  build(compartment.exportTable);
+			// Special case: The first frame has the initial csp.
+			threadTStack->frames[0].csp = stack;
 
 			Debug::log("Thread's trusted stack is {}", threadTStack);
 
diff --git a/sdk/core/loader/types.h b/sdk/core/loader/types.h
index fea0a7e..ec488c9 100644
--- a/sdk/core/loader/types.h
+++ b/sdk/core/loader/types.h
@@ -1022,10 +1022,17 @@
 
 		/**
 		 * The offset of the compartment's error handler from the start of
-		 * `pcc`.  This must always be positive, a value of -1 is used to
-		 * indicate that this compartment does not provide an error handler.
+		 * `pcc`.  A value of 0xffff indicates that this compartment does not
+		 * provide an error handler.
 		 */
-		ptrdiff_t errorHandler;
+		uint16_t errorHandler;
+
+		/**
+		 * The offset of the compartment's stackless error handler from the
+		 * start of `pcc`.  A value of 0xffff indicates that this compartment
+		 * does not provide a stackless error handler.
+		 */
+		uint16_t errorHandlerStackless;
 	};
 
 	/**
diff --git a/sdk/core/scheduler/main.cc b/sdk/core/scheduler/main.cc
index 95ef0e2..18c0d7b 100644
--- a/sdk/core/scheduler/main.cc
+++ b/sdk/core/scheduler/main.cc
@@ -434,10 +434,10 @@
 	return ret;
 }
 
-__cheriot_minimum_stack(0x80) int __cheri_compartment("sched")
+__cheriot_minimum_stack(0x90) int __cheri_compartment("sched")
   thread_sleep(Timeout *timeout, uint32_t flags)
 {
-	STACK_CHECK(0x80);
+	STACK_CHECK(0x90);
 	if (!check_timeout_pointer(timeout))
 	{
 		return -EINVAL;
@@ -449,12 +449,12 @@
 	return 0;
 }
 
-__cheriot_minimum_stack(0xa0) int futex_timed_wait(Timeout        *timeout,
+__cheriot_minimum_stack(0xb0) int futex_timed_wait(Timeout        *timeout,
                                                    const uint32_t *address,
                                                    uint32_t        expected,
                                                    uint32_t        flags)
 {
-	STACK_CHECK(0xa0);
+	STACK_CHECK(0xb0);
 	if (!check_timeout_pointer(timeout) ||
 	    !check_pointer<PermissionSet{Permission::Load}>(address))
 	{
@@ -536,9 +536,9 @@
 	return 0;
 }
 
-__cheriot_minimum_stack(0x90) int futex_wake(uint32_t *address, uint32_t count)
+__cheriot_minimum_stack(0xa0) int futex_wake(uint32_t *address, uint32_t count)
 {
-	STACK_CHECK(0x90);
+	STACK_CHECK(0xa0);
 	if (!check_pointer<PermissionSet{Permission::Store}>(address))
 	{
 		return -EINVAL;
@@ -577,13 +577,13 @@
 	return woke;
 }
 
-__cheriot_minimum_stack(0x50) int multiwaiter_create(
+__cheriot_minimum_stack(0x60) int multiwaiter_create(
   Timeout           *timeout,
   struct SObjStruct *heapCapability,
   MultiWaiter      **ret,
   size_t             maxItems)
 {
-	STACK_CHECK(0x50);
+	STACK_CHECK(0x60);
 	int error;
 	// Don't bother checking if timeout is valid, the allocator will check for
 	// us.
@@ -597,20 +597,20 @@
 	return write_result(reinterpret_cast<void **>(ret), mw);
 }
 
-__cheriot_minimum_stack(0x60) int multiwaiter_delete(
+__cheriot_minimum_stack(0x70) int multiwaiter_delete(
   struct SObjStruct *heapCapability,
   MultiWaiter       *mw)
 {
-	STACK_CHECK(0x60);
+	STACK_CHECK(0x70);
 	return deallocate<MultiWaiterInternal>(heapCapability, mw);
 }
 
-__cheriot_minimum_stack(0xb0) int multiwaiter_wait(Timeout           *timeout,
+__cheriot_minimum_stack(0xc0) int multiwaiter_wait(Timeout           *timeout,
                                                    MultiWaiter       *waiter,
                                                    EventWaiterSource *events,
                                                    size_t newEventsCount)
 {
-	STACK_CHECK(0xb0);
+	STACK_CHECK(0xc0);
 	return typed_op<MultiWaiterInternal>(waiter, [&](MultiWaiterInternal &mw) {
 		if (newEventsCount > mw.capacity())
 		{
@@ -699,9 +699,9 @@
 } // namespace
 
 [[cheri::interrupt_state(disabled)]] __cheriot_minimum_stack(
-  0x20) const uint32_t *interrupt_futex_get(struct SObjStruct *sealed)
+  0x30) const uint32_t *interrupt_futex_get(struct SObjStruct *sealed)
 {
-	STACK_CHECK(0x20);
+	STACK_CHECK(0x30);
 	auto     *interruptCapability = Handle::unseal<InterruptCapability>(sealed);
 	uint32_t *result              = nullptr;
 	if (interruptCapability && interruptCapability->state.mayWait)
@@ -719,9 +719,9 @@
 }
 
 [[cheri::interrupt_state(disabled)]] __cheriot_minimum_stack(
-  0x10) int interrupt_complete(struct SObjStruct *sealed)
+  0x20) int interrupt_complete(struct SObjStruct *sealed)
 {
-	STACK_CHECK(0x10);
+	STACK_CHECK(0x20);
 	auto *interruptCapability = Handle::unseal<InterruptCapability>(sealed);
 	if (interruptCapability && interruptCapability->state.mayComplete)
 	{
diff --git a/sdk/core/switcher/entry.S b/sdk/core/switcher/entry.S
index 716248d..8ef9cca 100644
--- a/sdk/core/switcher/entry.S
+++ b/sdk/core/switcher/entry.S
@@ -227,6 +227,8 @@
 #endif
 	zero_stack         t2, s0, gp
 after_zero:
+	// Reserve space for unwind state and so on.
+	cincoffset 	       csp, csp, -STACK_ENTRY_RESERVED_SPACE
 #ifdef CONFIG_MSHWM
 	// store new stack top as stack high water mark
 	csrw               CSR_MSHWM, sp
@@ -256,6 +258,8 @@
 	// At this point, we have already truncated the stack and so the length of
 	// the stack is the length that the callee can use.
 	cgetlen            t2, csp
+	// Include the space we reserved for the unwind state.
+	addi               t2, t2, -STACK_ENTRY_RESERVED_SPACE
 	bgtu               tp, t2, .Lstack_too_small
 
 	// Get the flags field into tp
@@ -481,24 +485,11 @@
 	csrw               CSR_MSHWMB, x1
 #endif
 	cspecialw          mepcc, ct2
-	csb                zero, TrustedStack_offset_inForcedUnwind(csp)
 	// c2 is csp, which will be loaded last and will overwrite the trusted
 	// stack pointer with the thread's stack pointer.
 	reloadRegisters c1, cgp, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, csp
 	mret
 
-// If we detect an invalid entry and there is no error handler installed, we want
-// to resume rather than unwind.
-.Linvalid_entry:
-// Mark this threads as in the middle of a forced unwind.
-	li                 a0, 1
-	csb                a0, TrustedStack_offset_inForcedUnwind(ctp)
-// Make sure we don't leak anything to the compartment.
-// Registers might been used by the call and therefore need zeroing.
-	zeroAllRegistersExcept a0, s0, s1, sp, a2, gp
-// Store an error value in return registers, which will be passed to the
-// caller on unwind. a1 is zeroed by zeroAllRegistersExcept.
-	li                 a0, -1
 // We are starting a forced unwind.  This is reached either when we are unable
 // to run an error handler, or when we do run an error handler and it instructs
 // us to return.  This treats all register values as undefined on entry.
@@ -506,57 +497,32 @@
 	// Pop the trusted stack frame.
 	cjal               .Lpop_trusted_stack_frame
 	cmove              cra, ca2
-.Lout_of_trusted_stack:
-	cmove              ct0, csp
-	// Fetch the trusted stack pointer.
-	cspecialr          csp, mtdc
-	// csp now points to the save reg frame that we can use.
-	// Spill all of the registers that we want to propagate to the caller:
-	// c1(cra), c2(csp), c3(cgp), c8(cs0), c9(cs1), c10(ca0), c11(ca1)
-	csc                ct0, TrustedStack_offset_csp(csp)
-	spillRegisters c1, cgp, c8, c9, c10, c11
-	// Store an unsealed version of cra in the mepcc slot, where it will be
-	// used for mret later.  mret requires an unsealed capability in mepcc, so
-	// we have to unseal it if it is sealed.
-	LoadCapPCC         cs0, compartment_switcher_sealing_key
-	// ca2 at this point was loaded by .Lpop_trusted_stack_frame from the pcc
-	// in the trusted stack and so should always be sealed as a sentry type.
-	cgettype           gp, cra
-	csetaddr           cs0, cs0, gp
-	cunseal            cra, cra, cs0
-	csc                cra, TrustedStack_offset_mepcc(csp)
-	clw                t0, TrustedStack_offset_mstatus(csp)
-	// If gp==2 then the we need to disable interrupts on return, otherwise we
-	// need to enable them.  The interrupt enable bit is bit 7.  We want to set
-	// bit 7 if interrupts are enabled, clear it if they are disabled, but not
-	// toggle any other bits.
-	// Clear the interrupt enable bit unconditionally
-	andi               t0, t0, ~0x80
-	// Set it again if we should have interrupts enabled
-	li                 a3, 2
-	beq                gp, a3, .Ldo_not_enable
-	ori                t0, t0, 0x80
-.Ldo_not_enable:
-	csw                t0, TrustedStack_offset_mstatus(csp)
+	// Zero all registers apart from RA, GP, SP and return args.
+	// cra, cs0, cs1, and cgp were restored from the compartment's stack
+	// csp restored from the trusted stack.
+	// ca0, used for first return value
+	// ca1, used for second return value
+	zeroAllRegistersExcept ra, sp, gp, s0, s1, a0, a1
+	li                 a0, -ECOMPARTMENTFAIL
+	li                 a1, 0
+	cret
 
-	// Zero all registers that we aren't explicitly restoring to avoid leaks
-	// from the faulting callee to the caller.
-	csc                cnull, TrustedStack_offset_c4(csp)
-	csc                cnull, TrustedStack_offset_c5(csp)
-	csc                cnull, TrustedStack_offset_c6(csp)
-	csc                cnull, TrustedStack_offset_c7(csp)
-	csc                cnull, TrustedStack_offset_c12(csp)
-	csc                cnull, TrustedStack_offset_c13(csp)
-	csc                cnull, TrustedStack_offset_c14(csp)
-	csc                cnull, TrustedStack_offset_c15(csp)
-	// Mark this threads as in the middle of a forced unwind.
-	li                 a0, 1
-	csb                a0, TrustedStack_offset_inForcedUnwind(csp)
-	// Spill a fake status and cap cause (CHERI fault, no cause)
-	li                 a0, 0x1c
-	csw                a0, TrustedStack_offset_mcause(csp)
-	csrw               mtval, zero
-	// Fall through to handle error
+
+// If we have run out of trusted stack, then just restore the caller's state
+// and return an error value.
+.Lout_of_trusted_stack:
+	// Restore the spilled values
+	clc                cs0, SPILL_SLOT_cs0(csp)
+	clc                cs1, SPILL_SLOT_cs1(csp)
+	clc                cra, SPILL_SLOT_pcc(csp)
+	clc                cgp, SPILL_SLOT_cgp(csp)
+	cincoffset         csp, csp, SPILL_SLOT_SIZE
+	// Set the return registers
+	li                 a0, -ENOTENOUGHTRUSTEDSTACK
+	li                 a1, 0
+	// Zero everything else
+	zeroAllRegistersExcept ra, sp, gp, s0, s1, a0, a1
+	cret
 
 // If we have a possibly recoverable error, see if we have a useful error
 // handler.  At this point, the register state will have been saved in the
@@ -593,40 +559,85 @@
 
 	// Load the interrupted thread's stack pointer into ct0
 	clc                ct0, TrustedStack_offset_csp(csp)
-	// Fetch the base of compartment stack before cincoffset for later
-	// comparison. The subsequent cincoffset could cause the base to change, if
-	// the capability becomes unrepresentable. That would clear the tag, and so
-	// we'd fault later.  Faulting in the switcher will then trigger a forced
-	// unwind.
-	cgetbase           tp, ct0
-	// Allocate space for the register save frame on the stack.  If we didn't
-	// have enough space here, we'll fault setting up the call frame, which
-	// will detect a trap in the switcher and force unwind.
-	cincoffset         ct0, ct0, -(16*8)
 	// See if we can find a handler:
 	clhu               tp, TrustedStack_offset_frameoffset(csp)
 	li                 t1, TrustedStack_offset_frames
-	beq                tp, t1, .Lend_of_stack
+	beq                tp, t1, .Lreset_mepcc_and_install_context
 	addi               tp, tp, -TrustedStackFrame_size
+
 	// ctp points to the current available trusted stack frame.
 	cincoffset         ctp, csp, tp
-	// ct1 now contains the export table for the callee
+	// a0 indicates whether we're calling a stackless error handler (0: stack,
+	// 1: stackless)
+	li                 a0, 0
+
+	// Allocate space for the register save frame on the stack.
+	cincoffset         ct0, ct0, -(16*8)
+
+	// WARNING: ENCODING SPECIFIC.
+	// The following depends on the fact that before-the-start values are not
+	// representable in the CHERIoT encoding and so will clear the tag.  If
+	// this property changes then this will need to be replaced by a check that
+	// against the base of the stack.  Note that this check can't be a simple
+	// cgetbase on ct0, because moving the address below the base sufficiently
+	// far that it's out of *representable* bounds will move the reported base
+	// value (base is a displacement from the address).
+	cgettag            t1, ct0
+	// If there isn't enough space on the stack, see if there's a stackless
+	// handler.
+	beqz               t1, .Ltry_stackless_handler
+
 	clc                ct1, TrustedStackFrame_offset_calleeExportTable(ctp)
-	// Reset the export table pointer to point to the *start* of the export
+	// Set the export table pointer to point to the *start* of the export
 	// table.  It will currently point to the entry point that was raised.
 	// TODO: We might want to pass this to the error handler, it might be
 	// useful for providing per-entry-point error results.
 	cgetbase           s0, ct1
 	csetaddr           ct1, ct1, s0
-	clw                s0, ExportTable_offset_errorHandler(ct1)
-	// A value of -1 indicates no error handler
-	// Give up if there is no error handler for this compartment.
-	addi               s1, s0, 1
-	beqz               s1, .Lno_handler_found
+	clhu               s0, ExportTable_offset_errorHandler(ct1)
+	// A value of 0xffff indicates no error handler
+	// If we found one, use it, otherwise fall through and try to find a
+	// stackless handler.
+	li                 s1, 0xffff
+	bne                s0, s1, .Lhandler_found
 
-	// If we have found a handler, mark this threads as no longer on the
-	// force-unwind path.  Any future fault will trigger a forced unwind.
-	csb                zero, TrustedStack_offset_inForcedUnwind(csp)
+.Ltry_stackless_handler:
+	clc                ct1, TrustedStackFrame_offset_calleeExportTable(ctp)
+	// Set the export table pointer to point to the *start* of the export
+	// table.  It will currently point to the entry point that was raised.
+	cgetbase           s0, ct1
+	csetaddr           ct1, ct1, s0
+	clhu               s0, ExportTable_offset_errorHandlerStackless(ct1)
+	// A value of 0xffff indicates no error handler
+	// Give up if there is no error handler for this compartment.
+	li                 s1, 0xffff
+	beq                s0, s1, .Lforce_unwind
+
+	// The stack may have had its tag cleared at this point, so for stackless
+	// handlers we need to restore the on-entry stack.
+	// Get the previous trusted stack frame
+
+	// Load the caller's csp
+	clc                ca0, TrustedStackFrame_offset_csp(ctp)
+
+	// If this is the top stack frame, then the csp field is the value on
+	// entry.  If it's any other frame then we need to go to the previous one
+	cincoffset         cs1, csp, TrustedStack_offset_frames
+	beq                s1, t1, .Lrecovered_stack
+
+	// The address of the stack pointer will point to the bottom of the
+	// caller's save area, so we set the bounds to be the base up to the
+	// current address.
+	cgetaddr           a1, ca0
+	cgetbase           a2, ca0
+	sub                a1, a1, a2
+	csetaddr           ca0, ca0, a2
+	// The code that installs the context expects csp to be in ct0
+	csetboundsexact    ct0, ca0, a1
+.Lrecovered_stack:
+	li                 a0, 1
+
+.Lhandler_found:
 
 	// Increment the handler invocation count.
 	clhu               s1, TrustedStackFrame_offset_errorHandlerCount(ctp)
@@ -656,6 +667,16 @@
 	csetaddr           cra, cra, s1
 	cincoffset         cra, cra, s0
 
+	// If we're in an error handler with a stack, set up the stack, otherwise
+	// we just need to set up argument registers.
+	beqz               a0, .Lset_up_stack_handler
+	clw                a0, TrustedStack_offset_mcause(csp)
+	csrr               a1, mtval
+	li                 a2, 0
+	cmove              csp, ct0
+	j                  .Linvoke_error_handler
+
+.Lset_up_stack_handler:
 	// Set up the on-stack context for the callee
 	clc                cs1, 0(csp)
 	ccleartag          cs1, cs1
@@ -671,6 +692,8 @@
 	clw                a1, TrustedStack_offset_mcause(csp)
 	csrr               a2, mtval
 	cmove              csp, ca0
+
+.Linvoke_error_handler:
 	// Clear all registers except:
 	// cra is set by cjalr.  csp and cgp are needed for the called compartment.
 	// ca0, used for the register state
@@ -751,23 +774,11 @@
 	j                  .Lhandle_error
 
 
-// We have reached the end of the stack.  If we are in a forced unwind then we
-// just install the context, if we've gone off the top of the stack then we
-// should report this gracefully.
-.Lend_of_stack:
-	clb                a2, TrustedStack_offset_inForcedUnwind(csp)
-	bnez               a2, .Lreset_mepcc_and_install_context
 	// Value 24 is reserved for custom use.
 .Lset_mcause_and_exit_thread:
 	csrw               mcause, 24
 	j                  .Lthread_exit
 
-// No handler was found.  If we are in the middle of unwinding, then we want to
-// just install the context but if this is a fault then we keep going up the
-// stack.
-.Lno_handler_found:
-	clb                a2, TrustedStack_offset_inForcedUnwind(csp)
-	beqz               a2, .Lforce_unwind
 	// The continue-resume path expects the location that we will mret to to be
 	// in ct2.  If we're just resuming, then resume from the stashed link
 	// register value.
@@ -876,8 +887,24 @@
 	// two instructions.
 	cspecialr          ca0, mtdc
 	clhu               a1, TrustedStack_offset_frameoffset(ca0)
+	addi               a1, a1, -TrustedStackFrame_size
 	cincoffset         ca0, ca0, a1
 	clc                ca0, TrustedStackFrame_offset_csp(ca0)
+	// If this is the first frame, then the recovered stack will be the stack
+	// on entry.  If this is not the first frame then then we need to find the
+	// saved CSP from the caller and reset the bounds.  The address of the
+	// saved CSP will be the value after the switcher spilled registers and so
+	// will be the top of the callee's stack.
+	li                 a2, TrustedStack_offset_frames
+	beq                a1, a2, 0f
+
+	// Find the previous frame's csp and reset the bounds
+	cgetaddr           a1, ca0
+	cgetbase           a2, ca0
+	sub                a1, a1, a2
+	csetaddr           ca0, ca0, a2
+	csetboundsexact    ca0, ca0, a1
+0:
 	cret
 
 	.section .text, "ax", @progbits
@@ -988,6 +1015,25 @@
 	csrr               a0, CSR_MSHWM
 	cret
 
+	.section .text, "ax", @progbits
+	.p2align 2
+	.type __Z39switcher_handler_invocation_count_resetv,@function
+__Z39switcher_handler_invocation_count_resetv:
+	// Trusted stack pointer in ca1
+	cspecialr          ca1, mtdc
+	// Offset of the current trusted stack frame to a1
+	clhu               a0, TrustedStack_offset_frameoffset(ca1)
+	addi               a0, a0, -TrustedStackFrame_size
+	// Current trusted stack frame to ca1, a0 is dead
+	cincoffset         ca1, ca1, a0
+	// Current invocation count (for return) in a0
+	clh                a0, TrustedStackFrame_offset_errorHandlerCount(ca1)
+	// Reset invocation count
+	csh                zero, TrustedStackFrame_offset_errorHandlerCount(ca1)
+	// Zero trusted stack frame pointer register
+	li                 a1, 0
+	cret
+
 // The linker expects export tables to start with space for cgp and pcc, then
 // the compartment error handler.  We should eventually remove that requirement
 // for library export tables, but since they don't consume RAM after loading
@@ -1022,3 +1068,4 @@
 export __Z28switcher_thread_hazard_slotsv
 export __Z13thread_id_getv
 export __Z25stack_lowest_used_addressv
+export __Z39switcher_handler_invocation_count_resetv
diff --git a/sdk/core/switcher/export-table-assembly.h b/sdk/core/switcher/export-table-assembly.h
index 3d446ef..5581f78 100644
--- a/sdk/core/switcher/export-table-assembly.h
+++ b/sdk/core/switcher/export-table-assembly.h
@@ -10,3 +10,4 @@
 EXPORT_ASSEMBLY_OFFSET(ExportTable, pcc, 0)
 EXPORT_ASSEMBLY_OFFSET(ExportTable, cgp, 8)
 EXPORT_ASSEMBLY_OFFSET(ExportTable, errorHandler, 16)
+EXPORT_ASSEMBLY_OFFSET(ExportTable, errorHandlerStackless, 18)
diff --git a/sdk/core/switcher/trusted-stack-assembly.h b/sdk/core/switcher/trusted-stack-assembly.h
index f321548..02fc248 100644
--- a/sdk/core/switcher/trusted-stack-assembly.h
+++ b/sdk/core/switcher/trusted-stack-assembly.h
@@ -28,12 +28,12 @@
 
 // Size of everything up to this point
 #	define TSTACK_REGFRAME_SZ (19 * 8)
-// frameoffset, inForcedUnwind and padding
+// frameoffset and padding
 #	define TSTACK_HEADER_SZ 16
 #else
 // Size of everything up to this point
 #	define TSTACK_REGFRAME_SZ ((17 * 8) + (2 * 4))
-// frameoffset, inForcedUnwind and padding
+// frameoffset and padding
 #	define TSTACK_HEADER_SZ 8
 #endif
 // The basic trusted stack is the size of the save area, 8 bytes of state for
@@ -47,7 +47,6 @@
                        TSTACK_REGFRAME_SZ + TSTACK_HEADER_SZ)
 EXPORT_ASSEMBLY_OFFSET(TrustedStack, frameoffset, TSTACK_REGFRAME_SZ)
 EXPORT_ASSEMBLY_OFFSET(TrustedStack, threadID, TSTACK_REGFRAME_SZ + 2)
-EXPORT_ASSEMBLY_OFFSET(TrustedStack, inForcedUnwind, TSTACK_REGFRAME_SZ + 4)
 
 EXPORT_ASSEMBLY_OFFSET(TrustedStackFrame, csp, 0)
 EXPORT_ASSEMBLY_OFFSET(TrustedStackFrame, calleeExportTable, 8)
@@ -66,3 +65,10 @@
  *  Load, Store, LoadStoreCapability, LoadMutable StoreLocal and LoadGlobal
  */
 #define COMPARTMENT_STACK_PERMISSIONS 0x7e
+
+/**
+ * Space reserved at the top of a stack on entry to the compartment.
+ *
+ * This *must* be a multiple of 16, which is the stack alignment.
+ */
+#define STACK_ENTRY_RESERVED_SPACE 16
diff --git a/sdk/core/switcher/tstack.h b/sdk/core/switcher/tstack.h
index 0875249..45af06e 100644
--- a/sdk/core/switcher/tstack.h
+++ b/sdk/core/switcher/tstack.h
@@ -59,17 +59,12 @@
 	 * The ID of the current thread.  Never modified during execution.
 	 */
 	uint16_t threadID;
-	/**
-	 * Flag indicating whether this thread is in the process of a forced
-	 * unwind.  If so, this is one, otherwise it is zero.
-	 */
-	uint8_t inForcedUnwind;
 	// Padding up to multiple of 16-bytes.
 	uint8_t padding[
 #ifdef CONFIG_MSHWM
-	  11
+	  12
 #else
-	  3
+	  4
 #endif
 	];
 	/**
diff --git a/sdk/include/cdefs.h b/sdk/include/cdefs.h
index ad3b9ae..2242e7e 100644
--- a/sdk/include/cdefs.h
+++ b/sdk/include/cdefs.h
@@ -115,4 +115,8 @@
  */
 unsigned __builtin_strlen(const char *str) __asm__("_Z6strlenPKc");
 
+#if !defined(CLANG_TIDY) && !__has_builtin(__builtin_cheri_top_get)
+#	error Your compiler is too old for this version of CHERIoT RTOS, please upgrade to a newer version
+#endif
+
 #endif // _CDEFS_H_
diff --git a/sdk/include/errno.h b/sdk/include/errno.h
index 07cf199..35aa193 100644
--- a/sdk/include/errno.h
+++ b/sdk/include/errno.h
@@ -89,6 +89,7 @@
 #define ENOTRECOVERABLE 131 // State not recoverable.
 #define EOVERFLOW 139       // Value too large to be stored in data type.
 #define ENOTENOUGHSTACK 140 // Insufficient stack space for cross-compartment call.
+#define ENOTENOUGHTRUSTEDSTACK 141 // Insufficient stack space for cross-compartment call.
 #define EWOULDBLOCK EAGAIN  // Operation would block.
 #define ENOTSUP EOPNOTSUPP  // Not supported.
 #define __ELASTERROR 2000   // Users can add values starting here.
diff --git a/sdk/include/fail-simulator-on-error.h b/sdk/include/fail-simulator-on-error.h
index 52a65d3..d26dceb 100644
--- a/sdk/include/fail-simulator-on-error.h
+++ b/sdk/include/fail-simulator-on-error.h
@@ -56,20 +56,17 @@
 		  frame->get_register_value<CHERI::RegisterNumber::CSP>()};
 		CHERI::Capability returnCapability{
 		  frame->get_register_value<CHERI::RegisterNumber::CRA>()};
+		// The top of the stack is 16 bytes above the stack pointer on entry,
+		// to provide space for unwind lists and so on.
 		if (registerNumber == CHERI::RegisterNumber::CRA &&
 		    returnCapability.address() == 0 &&
 		    exceptionCode == CHERI::CauseCode::TagViolation &&
-		    stackCapability.top() == stackCapability.address())
+		    (stackCapability.top() - 16) == stackCapability.address())
 		{
 			// looks like thread exit -- just log it then ForceUnwind
 			DebugErrorHandler::log(
 			  "Thread exit CSP={}, PCC={}", stackCapability, frame->pcc);
 		}
-		else if (exceptionCode == CHERI::CauseCode::None)
-		{
-			// An unwind occurred from a called compartment, just resume.
-			return ErrorRecoveryBehaviour::InstallContext;
-		}
 		else
 		{
 			// An unexpected error -- log it and end the simulation
diff --git a/sdk/include/setjmp.h b/sdk/include/setjmp.h
new file mode 100644
index 0000000..d8d3cdf
--- /dev/null
+++ b/sdk/include/setjmp.h
@@ -0,0 +1,64 @@
+// Copyright Microsoft and CHERIoT Contributors.
+// SPDX-License-Identifier: MIT
+#pragma once
+
+/**
+ * This is a minimal implementation of setjmp/longjmp.
+ *
+ * CHERIoT cannot store a `jmp_buf` anywhere other than the stack without
+ * clearing tags (which will then cause `longjmp` to fail).
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * Jump buffer for setjmp/longjmp.
+ */
+struct __jmp_buf
+{
+	uintptr_t __cs0;
+	uintptr_t __cs1;
+	uintptr_t __csp;
+	uintptr_t __cra;
+};
+
+/**
+ * C requires that `setjmp` and `longjmp` take a `jmp_buf` by reference and so
+ * this ends up being defined as an array of one element, which allows it to
+ * both be allocated and passed by reference.
+ */
+typedef struct __jmp_buf jmp_buf[1];
+
+/**
+ * C `setjmp` function.  Returns (up to) twice.  First returns 0, returns a
+ * value passed to `longjmp` on the second return.
+ */
+__attribute__((returns_twice)) extern "C" int setjmp(jmp_buf env);
+__asm__(".section .text.setjmp,\"awG\",@progbits,setjmp,comdat\n"
+        ".globl setjmp\n"
+        ".p2align 2\n"
+        ".type setjmp,@function\n"
+        "setjmp:\n"
+        "	csc	cs0, 0(ca0)\n"
+        "	csc	cs1, 8(ca0)\n"
+        "	csc	csp, 16(ca0)\n"
+        "	csc	cra, 24(ca0)\n"
+        "	li	a0, 0\n"
+        "	cret\n");
+
+/**
+ * C `longjmp` function.  Does not return, jumps back to the `setjmp` call.
+ */
+extern "C" void longjmp(jmp_buf env, int val);
+__asm__(".section .text.longjmp,\"awG\",@progbits,longjmp,comdat\n"
+        ".globl longjmp\n"
+        ".p2align 2\n"
+        ".type longjmp,@function\n"
+        "longjmp:\n"
+        "	clc	cs0, 0(ca0)\n"
+        "	clc	cs1, 8(ca0)\n"
+        "	clc	csp, 16(ca0)\n"
+        "	clc	cra, 24(ca0)\n"
+        "	mv	a0, a1\n"
+        "	cjr	cra\n");
diff --git a/sdk/include/switcher.h b/sdk/include/switcher.h
index a1088b6..5bfa480 100644
--- a/sdk/include/switcher.h
+++ b/sdk/include/switcher.h
@@ -1,6 +1,7 @@
 #pragma once
 #include <cdefs.h>
 #include <stddef.h>
+#include <stdint.h>
 
 /**
  * Returns true if the trusted stack contains at least `requiredFrames` frames
@@ -56,3 +57,26 @@
  * compartment invocation.
  */
 __cheri_libcall ptraddr_t stack_lowest_used_address(void);
+
+/**
+ * Resets the switcher's count of invocations.
+ *
+ * Switcher place a limit on the number of times a compartment invocation may
+ * fault (default 512).  This prevents a compartment from getting stuck
+ * partially recovering from errors.  This function resets the count to zero.
+ * It should be called from outer compartments' run loops and from places where
+ * the caller is certain that error handling is making forward progress.
+ *
+ * Note: If this is called from an error handler that subsequently returns with
+ * with install-context, the switcher will subtract one from this and set the
+ * switcher error count to -1.  Because this is odd, the switcher will detect
+ * the next invocation of the error handler as a double fault and will force
+ * unwind.  Do not call this if you are currently in an error handler unless
+ * you are jumping out via some mechanism that does *not* involve returning to
+ * the switcher.
+ *
+ * Returns the previous value of the invocation count.  The low bit is set if
+ * an error handler is currently running, the remaining bits are the count.
+ */
+__cheri_libcall uint16_t switcher_handler_invocation_count_reset(void);
+
diff --git a/sdk/include/unwind.h b/sdk/include/unwind.h
new file mode 100644
index 0000000..7a1c898
--- /dev/null
+++ b/sdk/include/unwind.h
@@ -0,0 +1,99 @@
+#pragma once
+#include <cdefs.h>
+#include <setjmp.h>
+#include <switcher.h>
+
+/**
+ * On-stack linked list of cleanup handlers.
+ */
+struct CleanupList
+{
+	/// Next pointer.
+	CleanupList *next;
+	/// Jump buffer to return to.
+	__jmp_buf env;
+};
+
+/**
+ * Head of the cleanup list.
+ *
+ * This is stored in the space that the switcher reserves at the top of the
+ * stack.  The stack is zeroed on entry to a compartment and so this will be
+ * null until explicitly written to.
+ */
+__always_inline static inline struct CleanupList **cleanup_list_head()
+{
+	void     *csp = __builtin_cheri_stack_get();
+	ptraddr_t top = __builtin_cheri_top_get(csp);
+	csp           = __builtin_cheri_address_set(csp, top - 8);
+	return (struct CleanupList **)csp;
+}
+
+/**
+ * Unwind the stack to the most recent `CHERIOT_HANDLER` block.
+ */
+__always_inline static inline void cleanup_unwind(void)
+{
+	CleanupList **__head = cleanup_list_head();
+	CleanupList  *__top  = *__head;
+	*__head              = __top->next;
+	switcher_handler_invocation_count_reset();
+	longjmp(&__top->env, 1);
+}
+
+/**
+ * Simple error handling macros.  These are modelled on the OpenStep exception
+ * macros and are similarly built on top of `setjmp`.  Code between
+ * `CHERIOT_DURING` and `CHERIOT_HANDLER` corresponds to a `try` block.  Code
+ * between `CHERIOT_HANDLER` and `CHERIOT_END_HANDLER` corresponds to a `catch`
+ * block, though no exception value is actually thrown.
+ *
+ * Any automatic-storage values accessed in both blocks must be declared
+ * `volatile`.
+ */
+#define CHERIOT_DURING                                                         \
+	{                                                                          \
+		CleanupList cleanupListEntry;                                          \
+		auto      **__head    = cleanup_list_head();                           \
+		cleanupListEntry.next = *__head;                                       \
+		*__head               = &cleanupListEntry;                             \
+		if (setjmp(&cleanupListEntry.env) == 0)                                \
+		{
+/// See CHERIOT_DURING.
+#define CHERIOT_HANDLER                                                        \
+	*__head = cleanupListEntry.next;                                           \
+	}                                                                          \
+	else                                                                       \
+	{                                                                          \
+		*__head = cleanupListEntry.next;
+
+/// See CHERIOT_DURING.
+#define CHERIOT_END_HANDLER                                                    \
+	}                                                                          \
+	}
+
+#ifdef __cplusplus
+
+/**
+ * On-error helper.  Invokes `fn` and, if `cleanup_unwind` is called, invokes
+ * `err`.  Destructors in between `fn` and the frame that calls
+ * `cleanup_unwind` are not called, but this function returns normally and so
+ * destructors of objects above this on the stack will be called normally.
+ */
+static inline void on_error(auto fn, auto err)
+{
+	CHERIOT_DURING
+	fn();
+	CHERIOT_HANDLER
+	err();
+	CHERIOT_END_HANDLER
+}
+
+/**
+ * On-error helper with no error handler (returns normally from forced unwind).
+ */
+static inline void on_error(auto fn)
+{
+	on_error(fn, []() {});
+}
+#endif
diff --git a/sdk/lib/README.md b/sdk/lib/README.md
index d412525..8d272c6 100644
--- a/sdk/lib/README.md
+++ b/sdk/lib/README.md
@@ -7,11 +7,18 @@
 This collection currently includes:
 
  - [atomic](atomic/) provides atomic support functions.
+ - [compartment_helpers](compartment_helpers/) contains helpers for checking / ensuring that pointers are valid.
  - [crt](crt/) provides C runtime functions that the compiler may emit.
  - [cxxrt](cxxrt/) provides a minimal C++ runtime (no exceptions or RTTI support).
+ - [debug](debug/) contains functions to support the debug logging APIs.
+ - [event_group](event_group/) contains a FreeRTOS-like event-group API.
  - [freestanding](freestanding/) provides a minimal free-standing C implementation.
+ - [locks](locks/) contains functions for various kinds of lock.
+ - [microvium](microvium/) builds the [microvium](https://github.com/coder-mike/microvium) JavaScript VM to provide an on-device JavaScript interpreter.
+ - [queue](queue/) contains functions for message queues.
  - [stdio](stdio/) provides a *very* limited subset of `stdio.h` for debugging.
  - [string](string/) provides `string.h` functions.
  - [thread_pool](thread_pool) provides a simple thread pool that other threads can dispatch work to for asynchronous execution.
- - [microvium](microvium/) builds the [microvium](https://github.com/coder-mike/microvium) JavaScript VM to provide an on-device JavaScript interpreter.
+ - [unwind_error_handler](unwind_error_handler) provides an error handler that unwinds the stack.
+
 
diff --git a/sdk/lib/unwind_error_handler/README.md b/sdk/lib/unwind_error_handler/README.md
new file mode 100644
index 0000000..e7632d5
--- /dev/null
+++ b/sdk/lib/unwind_error_handler/README.md
@@ -0,0 +1,10 @@
+Unwind error handler library
+----------------------------
+
+This library provides an error handler that unwinds using the APIs in [`unwind.h`](../../include/unwind.h).
+
+This error handler can work even in cases of stack overflow.
+
+Note: Unlike most libraries, this is *not* built as a shared library.
+Error handlers must be part of the compartment that invokes them.
+As a result, this must be added as a dependency of each compartment that wishes to use it, rather than as a dependency of the firmware target.
diff --git a/sdk/lib/unwind_error_handler/unwind.S b/sdk/lib/unwind_error_handler/unwind.S
new file mode 100644
index 0000000..6e56a98
--- /dev/null
+++ b/sdk/lib/unwind_error_handler/unwind.S
@@ -0,0 +1,39 @@
+.section .compartment_error_handler_stackless,"aw",@progbits
+.globl compartment_error_handler_stackless
+.p2align 2
+.type compartment_error_handler_stackless,@function
+compartment_error_handler_stackless:
+// Get the head of the error list.
+	cgettop      t0, csp
+	csetaddr     csp, csp, t0
+	clc          cs0, -8(csp)
+	beqz         s0, .Lforce_unwind
+// Pop the top error from the list. */
+	clc          ct0, 0(cs0)
+	csc          ct0, -8(csp)
+// Mark this error handler as having finished.  We may still trap again
+// and reenter this, but now that we've popped the top element from the
+// stack we will run some different cleanup code next time. */
+.Llookup_reset:
+	auipcc          ct2, %cheriot_compartment_hi(__library_import_libcalls__Z39switcher_handler_invocation_count_resetv)
+	clc             ct2, %cheriot_compartment_lo_i(.Llookup_reset)(ct2)
+	cjalr           ct2
+// longjmp to the error handler.
+	clc          cs1, 16(cs0)
+	clc          csp, 24(cs0)
+	clc          cra, 32(cs0)
+	clc          cs0, 8(cs0)
+	cjr          cra
+.Lforce_unwind:
+	li           a0, 1
+	cret
+
+	.section	.compartment_imports,"aG",@progbits,__library_import_libcalls__Z39switcher_handler_invocation_count_resetv,comdat
+	.type	__library_import_libcalls__Z39switcher_handler_invocation_count_resetv,@object
+	.weak	__library_import_libcalls__Z39switcher_handler_invocation_count_resetv
+	.p2align	3
+__library_import_libcalls__Z39switcher_handler_invocation_count_resetv:
+	.word	__library_export_libcalls__Z39switcher_handler_invocation_count_resetv+1
+	.word	0
+	.size	__library_import_libcalls__Z39switcher_handler_invocation_count_resetv, 8
+
diff --git a/sdk/lib/unwind_error_handler/xmake.lua b/sdk/lib/unwind_error_handler/xmake.lua
new file mode 100644
index 0000000..b27593a
--- /dev/null
+++ b/sdk/lib/unwind_error_handler/xmake.lua
@@ -0,0 +1,3 @@
+target("unwind_error_handler")
+    set_kind("object")
+    add_files("unwind.S")
diff --git a/sdk/lib/xmake.lua b/sdk/lib/xmake.lua
index 66701dd..4833910 100644
--- a/sdk/lib/xmake.lua
+++ b/sdk/lib/xmake.lua
@@ -11,4 +11,5 @@
 	"queue",
 	"stdio",
 	"string",
-	"thread_pool")
+	"thread_pool",
+	"unwind_error_handler")
diff --git a/sdk/xmake.lua b/sdk/xmake.lua
index bac906e..da170b7 100644
--- a/sdk/xmake.lua
+++ b/sdk/xmake.lua
@@ -747,7 +747,7 @@
 				table.insert(objects, dep:targetfile())
 			end
 		end)
-		batchcmds:vrunv(target:tool("ld"), table.join({"--script=" .. linkerscript, "--relax", "-o", target:targetfile(), "--compartment-report=" .. target:targetfile() .. ".json" }, objects), opt)
+		batchcmds:vrunv(target:tool("ld"), table.join({"-n", "--script=" .. linkerscript, "--relax", "-o", target:targetfile(), "--compartment-report=" .. target:targetfile() .. ".json" }, objects), opt)
 		batchcmds:show_progress(opt.progress, "Creating firmware report " .. target:targetfile() .. ".json")
 		batchcmds:show_progress(opt.progress, "Creating firmware dump " .. target:targetfile() .. ".dump")
 		batchcmds:vexecv(target:tool("objdump"), {"-glxsdrS", "--demangle", target:targetfile()}, table.join(opt, {stdout = target:targetfile() .. ".dump"}))
diff --git a/tests/allocator-test.cc b/tests/allocator-test.cc
index c64eaac..97f19c9 100644
--- a/tests/allocator-test.cc
+++ b/tests/allocator-test.cc
@@ -615,7 +615,7 @@
 /**
  * Allocator test entry point.
  */
-void test_allocator()
+int test_allocator()
 {
 	GlobalConstructors::run();
 
@@ -682,4 +682,5 @@
 	TEST(quotaLeft == MALLOC_QUOTA,
 	     "After alloc and free from 0x100000-byte quota, {} bytes left",
 	     quotaLeft);
+	return 0;
 }
diff --git a/tests/check_pointer-test.cc b/tests/check_pointer-test.cc
index a678de8..c079185 100644
--- a/tests/check_pointer-test.cc
+++ b/tests/check_pointer-test.cc
@@ -128,7 +128,8 @@
 	     "with a raw capability.");
 }
 
-void test_check_pointer()
+int test_check_pointer()
 {
 	check_pointer_strict_mode(&object);
+	return 0;
 }
diff --git a/tests/compartment_calls-test.cc b/tests/compartment_calls-test.cc
index 1049560..2f0cff1 100644
--- a/tests/compartment_calls-test.cc
+++ b/tests/compartment_calls-test.cc
@@ -44,7 +44,7 @@
 	TEST(ret == 0, "compartment_call_inner returend {}", ret);
 }
 
-void test_compartment_call()
+int test_compartment_call()
 {
 	bool outTestFailed = false;
 	int  ret           = 0;
@@ -58,9 +58,7 @@
 	TEST(!trusted_stack_has_space(9),
 	     "Trusted stack should not have space for 9 more calls");
 
-	register char *cspRegister asm("csp");
-	asm("" : "=C"(cspRegister));
-	CHERI::Capability<void> csp{cspRegister};
+	CHERI::Capability<void> csp{__builtin_cheri_stack_get()};
 	CHERI::Capability<void> originalCSP{switcher_recover_stack()};
 	csp.address() = originalCSP.address();
 	TEST(csp == originalCSP,
@@ -69,10 +67,9 @@
 	     csp);
 
 	test_number_of_arguments();
-	ret = test_incorrect_export_table_with_handler(nullptr);
-	TEST(ret == -1, "Test incorrect entry point with error handler failed");
 
 	test_incorrect_export_table(nullptr, &outTestFailed);
 	TEST(outTestFailed == false,
 	     "Test incorrect entry point without error handler failed");
+	return 0;
 }
diff --git a/tests/compartment_calls.h b/tests/compartment_calls.h
index a93c81d..b740f31 100644
--- a/tests/compartment_calls.h
+++ b/tests/compartment_calls.h
@@ -43,6 +43,6 @@
   bool *outTestFailed);
 __cheri_compartment(
   "compartment_calls_inner_with_"
-  "handler") int test_incorrect_export_table_with_handler(__cheri_callback void (*fn)());
+  "handler") int test_incorrect_export_table_with_handler(__cheri_callback int (*fn)());
 __cheri_compartment("compartment_calls_outer") void compartment_call_outer();
-constexpr int ConstantValue = 0x41414141;
\ No newline at end of file
+constexpr int ConstantValue = 0x41414141;
diff --git a/tests/compartment_calls_inner_with_handler.cc b/tests/compartment_calls_inner_with_handler.cc
deleted file mode 100644
index d82ca8f..0000000
--- a/tests/compartment_calls_inner_with_handler.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright Microsoft and CHERIoT Contributors.
-// SPDX-License-Identifier: MIT
-
-#define TEST_NAME "Compartment calls (inner compartment)"
-#include "compartment_calls.h"
-#include "tests.hh"
-#include <cheri.hh>
-#include <errno.h>
-#include <tuple>
-
-using namespace CHERI;
-
-extern "C" ErrorRecoveryBehaviour
-compartment_error_handler(ErrorState *frame, size_t mcause, size_t mtval)
-{
-	return ErrorRecoveryBehaviour::ForceUnwind;
-}
-
-int test_incorrect_export_table_with_handler(__cheri_callback void (*fn)())
-{
-	/*
-	 * Trigger a cross-compartment call with an invalid export entry.
-	 */
-
-	debug_log(
-	  "test an incorrect export table entry with error handler installed");
-
-	fn();
-
-	TEST(false, "Should be unreachable");
-
-	return 0;
-}
\ No newline at end of file
diff --git a/tests/crash_recovery-test.cc b/tests/crash_recovery-test.cc
index 9048335..0254654 100644
--- a/tests/crash_recovery-test.cc
+++ b/tests/crash_recovery-test.cc
@@ -3,32 +3,40 @@
 
 #define TEST_NAME "Crash recovery (main runner)"
 #include "crash_recovery.h"
+#include <atomic>
 #include <cheri.hh>
 #include <errno.h>
 
-int crashes = 0;
+int               crashes = 0;
+std::atomic<bool> expectFault;
 
 extern "C" enum ErrorRecoveryBehaviour
 compartment_error_handler(struct ErrorState *frame, size_t mcause, size_t mtval)
 {
-	debug_log("Test saw error for PCC {}", frame->pcc);
-	debug_log("Error cause: {}, mtval: {}", mcause, mtval);
+	crashes++;
 	if (mcause == 0x2)
 	{
+		if (expectFault)
+		{
+			expectFault = false;
+			frame->pcc  = static_cast<char *>(frame->pcc) + 2;
+			return ErrorRecoveryBehaviour::InstallContext;
+		}
 		debug_log("Test hit assertion failure, unwinding");
 		return ErrorRecoveryBehaviour::ForceUnwind;
 	}
+	debug_log("Test saw error for PCC {}", frame->pcc);
+	debug_log("Error cause: {}, mtval: {}", mcause, mtval);
 	TEST((mcause == 0x1c) && (mtval == 0),
 	     "mcause should be 0x1c (CHERI), is {}, mtval should be 0 (force "
 	     "unwind), is {})",
 	     mcause,
 	     mtval);
-	crashes++;
 	debug_log("Resuming test at failure location");
 	return ErrorRecoveryBehaviour::InstallContext;
 }
 
-void test_crash_recovery()
+int test_crash_recovery()
 {
 	debug_log("Calling crashy compartment indirectly");
 	test_crash_recovery_outer(0);
@@ -38,28 +46,54 @@
 	          "nested call crashed");
 
 	debug_log("Calling crashy compartment to fault and unwind");
-	test_crash_recovery_inner(0);
+	void *ret = test_crash_recovery_inner(0);
 	check_stack();
-	debug_log("Calling crashy compartment returned (crashes: {})", crashes);
-	TEST(crashes == 1, "Failed to notice crash");
+	debug_log("Calling crashy compartment returned ({})", ret);
+	TEST(crashes == 0, "Should not have crashed");
+	TEST(ret != nullptr, "Failed to notice crash");
 
 	debug_log("Calling crashy compartment to return normally");
-	test_crash_recovery_inner(1);
+	ret = test_crash_recovery_inner(1);
 	check_stack();
 	debug_log("Calling crashy compartment returned (crashes: {})", crashes);
-	TEST(crashes == 1, "Should not have crashed");
+	TEST(crashes == 0, "Should not have crashed");
+	TEST(ret == nullptr, "Failed to notice crash");
 	debug_log("Returning normally from crash test");
 
 	debug_log("Calling crashy compartment to double fault and unwind");
-	test_crash_recovery_inner(2);
+	ret = test_crash_recovery_inner(2);
 	check_stack();
 	debug_log("Calling crashy compartment returned (crashes: {})", crashes);
-	TEST(crashes == 2, "Failed to notice crash");
+	TEST(crashes == 0, "Should not have crashed");
+	TEST(ret != nullptr, "Failed to notice crash");
 
 	debug_log(
 	  "Calling crashy compartment to corrupt CSP in stack pointer and unwind");
-	test_crash_recovery_inner(3);
+	ret = test_crash_recovery_inner(3);
 	check_stack();
 	debug_log("Calling crashy compartment returned (crashes: {})", crashes);
-	TEST(crashes == 3, "Failed to notice crash");
+	TEST(crashes == 0, "Should not have crashed");
+	TEST(ret != nullptr, "Failed to notice crash");
+
+	ptraddr_t handlerCount = switcher_handler_invocation_count_reset();
+	TEST(handlerCount == crashes * 2,
+	     "Should have called handler 3 times (3 entries, 3 exits giving a "
+	     "total of {}), was {}",
+	     crashes * 2,
+	     handlerCount);
+
+	// By default, we will be force unwound if we exceed 512 error-handler
+	// invocations.
+	constexpr int MaxCrashes = 600;
+	for (int i = 0; i < MaxCrashes; i++)
+	{
+		switcher_handler_invocation_count_reset();
+		expectFault = true;
+		// Crash with a guaranteed 16-bit instruction.  This cannot use
+		// `__builtin_trap` because the compiler knows that `__builtin_trap`
+		// does not return and so will not generate code following it.
+		asm volatile("c.unimp");
+	}
+	TEST(crashes == MaxCrashes, "Failed to notice crash");
+	return 0;
 }
diff --git a/tests/debug-test.c b/tests/debug-test.c
index 03c008c..9be9854 100644
--- a/tests/debug-test.c
+++ b/tests/debug-test.c
@@ -1,7 +1,7 @@
 #include <compartment.h>
 #include <debug.h>
 
-__cheri_compartment("debug_test") void test_debug_c()
+__cheri_compartment("debug_test") int test_debug_c()
 {
 	unsigned char x = 'c';
 	_Bool         t = true;
@@ -22,4 +22,5 @@
 	CHERIOT_INVARIANT(true, "Testing C++ invariant failure");
 	CHERIOT_INVARIANT(
 	  true, "Testing C++ invariant failure: 42:{}", 42, 1, 3, 4, "oops");
+	return 0;
 }
diff --git a/tests/debug-test.cc b/tests/debug-test.cc
index e44bb65..5828f38 100644
--- a/tests/debug-test.cc
+++ b/tests/debug-test.cc
@@ -2,7 +2,7 @@
 #include <compartment.h>
 #include <debug.h>
 
-void test_debug_cxx()
+int test_debug_cxx()
 {
 	unsigned char x = 'c';
 	CHERIOT_DEBUG_LOG("Debug messages",
@@ -19,4 +19,5 @@
 	CHERIOT_INVARIANT(true, "Testing C++ invariant failure");
 	CHERIOT_INVARIANT(
 	  true, "Testing C++ invariant failure: 42:{}", 42, 1, 3, 4, "oops");
+	return 0;
 }
diff --git a/tests/eventgroup-test.cc b/tests/eventgroup-test.cc
index aed68c1..12cebed 100644
--- a/tests/eventgroup-test.cc
+++ b/tests/eventgroup-test.cc
@@ -25,7 +25,7 @@
 	}
 } // namespace
 
-void test_eventgroup()
+int test_eventgroup()
 {
 	EventGroup *group;
 
@@ -70,4 +70,5 @@
 	ret = eventgroup_clear(&t, group, &bits, 0b100);
 	TEST(ret == 0, "Failed to clear event group bits: {}", ret);
 	TEST(bits == 0b1000, "Bits should be 0b1000, but is {}", bits);
+	return 0;
 }
diff --git a/tests/futex-test.cc b/tests/futex-test.cc
index 970253f..4abfe6c 100644
--- a/tests/futex-test.cc
+++ b/tests/futex-test.cc
@@ -21,7 +21,7 @@
                                         true);
 #endif
 
-void test_futex()
+int test_futex()
 {
 	static uint32_t futex;
 	int             ret;
@@ -185,4 +185,5 @@
 	     "PI futex with a zero thread ID returned {}, should be {}",
 	     ret,
 	     -EINVAL);
+	return 0;
 }
diff --git a/tests/list-test.cc b/tests/list-test.cc
index 7db652e..5ea7700 100644
--- a/tests/list-test.cc
+++ b/tests/list-test.cc
@@ -63,7 +63,7 @@
  */
 ds::linked_list::Sentinel<LinkedObject::ObjectRing> objects = {};
 
-void test_list()
+int test_list()
 {
 	debug_log("Testing the list implementation.");
 
@@ -238,4 +238,5 @@
 	     heapAtEnd);
 
 	debug_log("Done testing the list.");
+	return 0;
 }
diff --git a/tests/locks-test.cc b/tests/locks-test.cc
index 7b8db43..d85fea2 100644
--- a/tests/locks-test.cc
+++ b/tests/locks-test.cc
@@ -396,7 +396,7 @@
 
 } // namespace
 
-void test_locks()
+int test_locks()
 {
 	test_lock(flagLock);
 	test_lock(flagLockPriorityInherited);
@@ -412,4 +412,5 @@
 	test_ticket_lock_ordering();
 	test_ticket_lock_overflow();
 	test_recursive_mutex();
+	return 0;
 }
diff --git a/tests/misc-test.cc b/tests/misc-test.cc
index 28352b1..f537067 100644
--- a/tests/misc-test.cc
+++ b/tests/misc-test.cc
@@ -170,7 +170,7 @@
 	     permissions);
 }
 
-void test_misc()
+int test_misc()
 {
 	check_timeouts();
 	check_memchr();
@@ -200,4 +200,5 @@
 	                      void, test_word, true, false, false, false),
 	                    4,
 	                    {Permission::Global, Permission::Load});
+	return 0;
 }
diff --git a/tests/mmio-test.cc b/tests/mmio-test.cc
index 4b090e5..bfffdc1 100644
--- a/tests/mmio-test.cc
+++ b/tests/mmio-test.cc
@@ -14,7 +14,7 @@
 	     p);
 }
 
-void test_mmio()
+int test_mmio()
 {
 	check_permissions(
 	  MMIO_CAPABILITY_WITH_PERMISSIONS(Uart, uart, false, false, false, false),
@@ -74,4 +74,5 @@
 	   Permission::Store,
 	   Permission::LoadStoreCapability,
 	   Permission::LoadMutable});
+	return 0;
 }
diff --git a/tests/multiwaiter-test.cc b/tests/multiwaiter-test.cc
index b899be9..d0f380f 100644
--- a/tests/multiwaiter-test.cc
+++ b/tests/multiwaiter-test.cc
@@ -14,7 +14,7 @@
 using namespace CHERI;
 using namespace thread_pool;
 
-void test_multiwaiter()
+int test_multiwaiter()
 {
 	static uint32_t futex  = 0;
 	static uint32_t futex2 = 0;
@@ -130,4 +130,5 @@
 
 	free(queueMemory);
 	multiwaiter_delete(MALLOC_CAPABILITY, mw);
+	return 0;
 }
diff --git a/tests/queue-test.cc b/tests/queue-test.cc
index 5239f19..da18b87 100644
--- a/tests/queue-test.cc
+++ b/tests/queue-test.cc
@@ -185,10 +185,11 @@
 	debug_log("All FreeRTOS queue tests successful");
 }
 
-void test_queue()
+int test_queue()
 {
 	test_queue_unsealed();
 	test_queue_sealed();
 	test_queue_freertos();
 	debug_log("All queue tests successful");
+	return 0;
 }
diff --git a/tests/stack-test.cc b/tests/stack-test.cc
index 71ebd27..c814bf9 100644
--- a/tests/stack-test.cc
+++ b/tests/stack-test.cc
@@ -125,19 +125,19 @@
  *	- compartment stack with incorrect permissions
  *  - invalid compartment stack
  */
-void test_stack()
+int test_stack()
 {
-	int ret = test_with_small_stack(128);
-	TEST(ret == 0,
-	     "test_with_small_stack failed, returned {} with 128-byte stack",
-	     ret);
-	ret = test_with_small_stack(144);
+	int ret = test_with_small_stack(144);
 	TEST(ret == 0,
 	     "test_with_small_stack failed, returned {} with 144-byte stack",
 	     ret);
-	ret = test_with_small_stack(112);
+	ret = test_with_small_stack(160);
+	TEST(ret == 0,
+	     "test_with_small_stack failed, returned {} with 160-byte stack",
+	     ret);
+	ret = test_with_small_stack(128);
 	TEST(ret == -ENOTENOUGHSTACK,
-	     "test_with_small_stack failed, returned {} with 112-byte stack",
+	     "test_with_small_stack failed, returned {} with 128-byte stack",
 	     ret);
 	__cheri_callback void (*callback)() = cross_compartment_call;
 
@@ -178,4 +178,5 @@
 	expect_handler(false);
 
 	test_stack_invalid_on_call(callback);
+	return 0;
 }
diff --git a/tests/static_sealing-test.cc b/tests/static_sealing-test.cc
index 9730719..046d704 100644
--- a/tests/static_sealing-test.cc
+++ b/tests/static_sealing-test.cc
@@ -15,9 +15,10 @@
                                        test,
                                        42);
 
-void test_static_sealing()
+int test_static_sealing()
 {
 	// Get a pointer to it and ask for it to be unsealed.
 	Sealed<TestType> value{STATIC_SEALED_VALUE(test)};
 	test_static_sealed_object(value);
+	return 0;
 }
diff --git a/tests/stdio-test.cc b/tests/stdio-test.cc
index a23aa0b..b7f2ac2 100644
--- a/tests/stdio-test.cc
+++ b/tests/stdio-test.cc
@@ -3,7 +3,7 @@
 #include "tests.hh"
 #include <stdio.h>
 
-void test_stdio()
+int test_stdio()
 {
 	debug_log("Printing 'Hello, world!' to stdout");
 	printf("Hello, world!\n");
@@ -19,4 +19,5 @@
 	TEST(strcmp(buffer, "-42") == 0,
 	     "snprintf(\"%d\", -42) gave {}",
 	     std::string_view{buffer, BufferSize});
+	return 0;
 }
diff --git a/tests/test-runner.cc b/tests/test-runner.cc
index dfc1d10..0c85f14 100644
--- a/tests/test-runner.cc
+++ b/tests/test-runner.cc
@@ -11,6 +11,9 @@
 
 namespace
 {
+	/// Have we detected a crash in any of the compartments?
+	volatile bool crashDetected = false;
+
 	/**
 	 * Read the cycle counter.
 	 */
@@ -33,16 +36,28 @@
 	 */
 	void run_timed(const char *msg, auto &&fn)
 	{
-		int startCycles = rdcycle();
-		fn();
+		bool failed      = false;
+		int  startCycles = rdcycle();
+		if constexpr (std::is_same_v<std::invoke_result_t<decltype(fn)>, void>)
+		{
+			fn();
+		}
+		else
+		{
+			failed = (fn() != 0);
+		}
 		int cycles = rdcycle();
-		debug_log("{} finished in {} cycles", msg, cycles - startCycles);
+
+		if (failed)
+		{
+			debug_log("{} failed", msg);
+			crashDetected = true;
+		}
+		else
+			debug_log("{} finished in {} cycles", msg, cycles - startCycles);
 	}
 } // namespace
 
-/// Have we detected a crash in any of the compartments?
-volatile bool crashDetected = false;
-
 extern "C" enum ErrorRecoveryBehaviour
 compartment_error_handler(struct ErrorState *frame, size_t mcause, size_t mtval)
 {
@@ -111,10 +126,20 @@
 	          std::string_view{testString, 13});
 	const std::string S = "I am a walrus"s;
 	debug_log("Trying to print std::string: {}", S);
+	// Test stack pointer recovery in the root compartment.
+	CHERI::Capability<void> csp{__builtin_cheri_stack_get()};
+	CHERI::Capability<void> originalCSP{switcher_recover_stack()};
+	csp.address() = originalCSP.address();
+	TEST(csp == originalCSP,
+	     "Original stack pointer: {}\ndoes not match current stack pointer: {}",
+	     originalCSP,
+	     csp);
+
 	run_timed("All tests", []() {
 		run_timed("Debug helpers (C++)", test_debug_cxx);
 		run_timed("Debug helpers (C)", test_debug_c);
 		run_timed("MMIO", test_mmio);
+		run_timed("Unwind cleanup", test_unwind_cleanup);
 		run_timed("stdio", test_stdio);
 		run_timed("Static sealing", test_static_sealing);
 		run_timed("Crash recovery", test_crash_recovery);
diff --git a/tests/tests.hh b/tests/tests.hh
index 4193ed3..14780ed 100644
--- a/tests/tests.hh
+++ b/tests/tests.hh
@@ -5,24 +5,25 @@
 #include <debug.hh>
 #include <thread.h>
 
-__cheri_compartment("eventgroup_test") void test_eventgroup();
-__cheri_compartment("mmio_test") void test_mmio();
-__cheri_compartment("allocator_test") void test_allocator();
-__cheri_compartment("thread_pool_test") void test_thread_pool();
-__cheri_compartment("futex_test") void test_futex();
-__cheri_compartment("queue_test") void test_queue();
-__cheri_compartment("locks_test") void test_locks();
-__cheri_compartment("list_test") void test_list();
-__cheri_compartment("crash_recovery_test") void test_crash_recovery();
-__cheri_compartment("multiwaiter_test") void test_multiwaiter();
-__cheri_compartment("stack_test") void test_stack();
-__cheri_compartment("compartment_calls_test") void test_compartment_call();
-__cheri_compartment("check_pointer_test") void test_check_pointer();
-__cheri_compartment("misc_test") void test_misc();
-__cheri_compartment("static_sealing_test") void test_static_sealing();
-__cheri_compartment("stdio_test") void test_stdio();
-__cheri_compartment("debug_test") void test_debug_cxx();
-__cheri_compartment("debug_test") void test_debug_c();
+__cheri_compartment("eventgroup_test") int test_eventgroup();
+__cheri_compartment("mmio_test") int test_mmio();
+__cheri_compartment("allocator_test") int test_allocator();
+__cheri_compartment("thread_pool_test") int test_thread_pool();
+__cheri_compartment("futex_test") int test_futex();
+__cheri_compartment("queue_test") int test_queue();
+__cheri_compartment("locks_test") int test_locks();
+__cheri_compartment("list_test") int test_list();
+__cheri_compartment("crash_recovery_test") int test_crash_recovery();
+__cheri_compartment("multiwaiter_test") int test_multiwaiter();
+__cheri_compartment("stack_test") int test_stack();
+__cheri_compartment("compartment_calls_test") int test_compartment_call();
+__cheri_compartment("check_pointer_test") int test_check_pointer();
+__cheri_compartment("misc_test") int test_misc();
+__cheri_compartment("static_sealing_test") int test_static_sealing();
+__cheri_compartment("stdio_test") int test_stdio();
+__cheri_compartment("debug_test") int test_debug_cxx();
+__cheri_compartment("debug_test") int test_debug_c();
+__cheri_compartment("unwind_cleanup_test") int test_unwind_cleanup();
 
 // Simple tests don't need a separate compartment.
 void test_global_constructors();
diff --git a/tests/thread_pool-test.cc b/tests/thread_pool-test.cc
index 4499d09..52a08e0 100644
--- a/tests/thread_pool-test.cc
+++ b/tests/thread_pool-test.cc
@@ -44,7 +44,7 @@
 	return ErrorRecoveryBehaviour::InstallContext;
 }
 
-void test_thread_pool()
+int test_thread_pool()
 {
 	// We can't share stack variables, so create a heap allocation that we can
 	// capture as an explicit pointer.
@@ -137,7 +137,7 @@
 	Timeout t{3};
 	thread_sleep(&t);
 	TEST(interrupted, "Worker thread was not interrupted");
-	return;
+	return 0;
 	static cheriot::atomic<uint32_t> barrier{3};
 	auto                             barrierWait = []() {
         uint32_t value = barrier--;
@@ -156,4 +156,5 @@
 	async(barrierWait);
 	barrierWait();
 	debug_log("Thread pool quiesced");
+	return 0;
 }
diff --git a/tests/unwind_cleanup-test.cc b/tests/unwind_cleanup-test.cc
new file mode 100644
index 0000000..e368562
--- /dev/null
+++ b/tests/unwind_cleanup-test.cc
@@ -0,0 +1,144 @@
+// Copyright CHERIoT Contributors.
+// SPDX-License-Identifier: MIT
+
+#define TEST_NAME "Test unwind cleanup"
+#include "locks.hh"
+#include "tests.hh"
+#include "unwind.h"
+
+extern "C" ErrorRecoveryBehaviour
+compartment_error_handler(ErrorState *, size_t, size_t)
+{
+	cleanup_unwind();
+	return ErrorRecoveryBehaviour::ForceUnwind;
+}
+
+namespace
+{
+	void test_setjmp()
+	{
+		jmp_buf      env;
+		volatile int x = 0;
+		if (int r = setjmp(env); r == 0)
+		{
+			TEST_EQUAL(x, 0, "setjmp should return 0 the first time");
+			x = 42;
+			longjmp(env, 1);
+		}
+		else
+		{
+			TEST_EQUAL(r, 1, "setjmp should return 1 the second time");
+			TEST_EQUAL(
+			  x, 42, "On the second return, x should have been modified");
+			x = 53;
+		}
+		TEST_EQUAL(x, 53, "After longjmp, x should have been modified");
+	}
+
+	FlagLock flagLock;
+
+	void test_on_error()
+	{
+		LockGuard g(flagLock);
+		on_error([&]() { cleanup_unwind(); }, [&]() { g.unlock(); });
+		TEST(!g, "on_error should lock the lock");
+	}
+
+	void test_on_error_raii_inner()
+	{
+		LockGuard g(flagLock);
+		// No handler.  g's destructor runs after on_error returns.
+		on_error([&]() { cleanup_unwind(); });
+	}
+
+	void test_on_error_raii()
+	{
+		test_on_error_raii_inner();
+		TEST(flagLock.try_lock(), "raii should have been dropped the lock");
+		flagLock.unlock();
+	}
+
+	void test_c_macros()
+	{
+		volatile int x = 0;
+		CHERIOT_DURING
+		{
+			x = 42;
+			cleanup_unwind();
+		}
+		CHERIOT_HANDLER
+		{
+			TEST_EQUAL(x, 42, "In the handler, x should have been modified");
+			x = 53;
+		}
+		CHERIOT_END_HANDLER
+		TEST_EQUAL(x, 53, "After longjmp, object should have been modified");
+	}
+
+	__noinline void overflow_stack(volatile int *x)
+	{
+		int hugeBuffer[4096];
+		debug_log("Overflowing stack: {}", hugeBuffer);
+	}
+
+	/**
+	 * Make sure that we can unwind out of a trap.  This will invoke the normal
+	 * error handler.
+	 */
+	void test_from_trap()
+	{
+		volatile int x = 0;
+		CHERIOT_DURING
+		{
+			x = 42;
+			__builtin_trap();
+		}
+		CHERIOT_HANDLER
+		{
+			TEST_EQUAL(x, 42, "In the handler, x should have been modified");
+			x = 53;
+		}
+		CHERIOT_END_HANDLER
+		TEST_EQUAL(
+		  x, 53, "After error handler, object should have been modified");
+	}
+
+	/**
+	 * Make sure that we can unwind out of a stack overflow.  This will invoke
+	 * the stackless error handler.
+	 */
+	void test_from_stack_overflow()
+	{
+		volatile int x = 0;
+		CHERIOT_DURING
+		{
+			x = 42;
+			overflow_stack(&x);
+		}
+		CHERIOT_HANDLER
+		{
+			debug_log("Error handler");
+			TEST_EQUAL(x, 42, "In the handler, x should have been modified");
+			x = 53;
+		}
+		CHERIOT_END_HANDLER
+		TEST_EQUAL(x, 53, "After handler, object should have been modified");
+	}
+
+} // namespace
+
+int test_unwind_cleanup()
+{
+	test_setjmp();
+	test_on_error();
+	test_c_macros();
+	// Try these in both orders to make sure that both error handlers correctly
+	// clean up.
+	test_from_trap();
+	test_from_stack_overflow();
+
+	test_from_stack_overflow();
+	test_from_trap();
+	debug_log("Test unwind_cleanup passed");
+	return 0;
+}
diff --git a/tests/xmake.lua b/tests/xmake.lua
index bbc1d78..7ecc6cb 100644
--- a/tests/xmake.lua
+++ b/tests/xmake.lua
@@ -79,8 +79,6 @@
 test("stack")
 compartment("compartment_calls_inner")
     add_files("compartment_calls_inner.cc")
-compartment("compartment_calls_inner_with_handler")
-    add_files("compartment_calls_inner_with_handler.cc")
 test("compartment_calls")
 test("check_pointer")
 -- Test various APIs that are too small to deserve their own test file
@@ -88,6 +86,8 @@
     on_load(function(target)
         target:values_set("shared_objects", { exampleK = 1024, test_word = 4 }, {expand = false})
     end)
+test("unwind_cleanup")
+    add_deps("unwind_error_handler")
 
 includes(path.join(sdkdir, "lib"))
 
@@ -117,11 +117,12 @@
     add_deps("multiwaiter_test")
     add_deps("ccompile_test")
     add_deps("stack_test", "stack_integrity_thread")
-    add_deps("compartment_calls_test", "compartment_calls_inner", "compartment_calls_inner_with_handler")
+    add_deps("compartment_calls_test", "compartment_calls_inner")
     add_deps("check_pointer_test")
     add_deps("misc_test")
     add_deps("stdio_test")
     add_deps("debug_test")
+    add_deps("unwind_cleanup_test")
     -- Set the thread entry point to the test runner.
     on_load(function(target)
         target:values_set("board", "$(board)")