sdk/core/switcher/entry.S - 3p/cheriot-rtos - Git at Google

 // Copyright Microsoft and CHERIoT Contributors.
 // SPDX-License-Identifier: MIT

 #include "export-table-assembly.h"
 #include "trusted-stack-assembly.h"
 #include "misc-assembly.h"
 #include <errno.h>

 .include "assembly-helpers.s"

 #  Symbolic names for the stack high water mark registers until
 #  the assembler knows about them.

 /**
  * Machine-mode stack high water mark CSR
  */
 #define CSR_MSHWM  0xbc1
 /**
  * Machine mode stack high water mark stack base CSR
  */
 #define CSR_MSHWMB 0xbc2

 #define MAX_FAULTS_PER_COMPARTMENT_CALL 1024

 #define SPILL_SLOT_cs0 0
 #define SPILL_SLOT_cs1 8
 #define SPILL_SLOT_cgp 16
 #define SPILL_SLOT_pcc 24
 #define SPILL_SLOT_SIZE 32

 /*
  * The switcher uniformly speaks of registers using their RISC-V ELF psABI names
  * and not their raw index, as, broadly speaking, we use registers in a similar
  * way to C functions.  However, it's probably convenient to have a mapping
  * readily accessible, so here 'tis:
  *
  *  #     x0   x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15
  *  psABI zero ra sp gp tp t0 t1 t2 s0 s1 a0  a1  a2  a3  a4  a5
  *
  * When we use the psABI name without a 'c' prefix, we are sometimes meaning to
  * refer to the address component of the capability.
  *
  * Despite the use of psABI names and conformance at the interface (argument
  * registers used for arguments, return address register used for its canonical
  * purpose, &c), one should not read too much of the psABI calling convention
  * into the code here.  Within the switcher, the machine is a raw register
  * machine and C is a distant, high-level language.
  *
  * Since this is the part of the map labeled "here be dragons", we have added
  * "Register Atlas" comments throughout.  Lines within an atlas consist of a
  * comma-whitespace-separated list of registers, a colon, and descriptive text.
  * In general, atlases should cover all (including dead) registers.  Point
  * changes to the atlas are denoted with "Atlas update", to emphasize that
  * registers not named are not dead but instead retain their meaning from the
  * last full atlas.
  *
  * Labels associated with interesting control flow are annotated with
  *
  *  - "FROM:", which may be repeated once for each predecessor label or these:
  *    - "above": the immediately prior block
  *    - "cross-call": untrusted code making a cross-compartment call
  *    - "malice": untrusted code outside the switcher
  *    - "interrupt": an asynchronous external event
  *    - "error": a trap from within the switcher
  *
  *  - "IFROM:", which indicates an indirect transfer of control (through cjalr
  *    or mepcc/mret, for example).
  *
  *  - "ITO:", the other direction of "IFROM:"
  *
  *  - "IRQ ASSUME:", "any", "deferred" or "enabled".  This declares the state of
  *    the machine, either from explicit instructions or implicit aspects of the
  *    architecture.
  *
  *  - "IRQ REQUIRE:", "any", "deferred" or "enabled".  If not "any", then all
  *    paths into this label must have the indicated IRQ disposition.
  *
  *  - "LIVE IN:", a list of live (in) registers at this point of the code and/or
  *    - "*": the entire general purpose register file (no CSRs or SCRs implied)
  *    - "callee-save": the psABI callee-save registers
  *    - "mcause"
  *    - "mtdc"
  *    - "mtval"
  *    - "mepcc"
  *
  * Control flow instructions may be annotated with "LIVE OUT:" labels.  These
  * capture the subset of live registers meant to be available to the target.
  *
  * For all annotations, optional commentary given in parentheses and may
  * continue onto adjacent lines.
  *
  */
 /*
  * Multiple points in the switcher are exposed to callers via sentries (either
  * forward-arc sentries manufactured elsewhere or backwards-arc sentries
  * manufactured by CJALRs herein.  Sentries can have their GL(obal) permission
  * cleared by the bearer, but nothing here relies on PCC being GL(obal): we
  * never store anything derived from our PCC to memory, much less through an
  * authority not bearing SL permission.
  *
  * Similarly, the switcher communicates with the outside world by means of
  * sealed data capabilities (to TrustedStacks and compartment export tables).
  * These, too, can have their GL(obal) bit cleared by bearers, but again, it
  * does not much matter for switcher correctness; see comments marked with
  * "LOCAL SEAL" notes in the code below.
  *
  * We do rely on PCC having L(oad)G(lobal) permission -- which is under seal in
  * sentries and so not mutable by the caller, even if a sentry is loaded
  * through an authority without LG -- so that, in particular, the sealing
  * authorities used herein are GL(obal) and so the sealed capabilities that
  * result are also GL(obal).
  */


 switcher_code_start:

 # Global for the sealing key.  Stored in the switcher's code section.
 	.section .text, "ax", @progbits
 	.globl compartment_switcher_sealing_key
 	.p2align 3
 compartment_switcher_sealing_key:
 .Lsealing_key_trusted_stacks:
 	.long 0
 	.long 0
 .Lunsealing_key_import_tables:
 	.long 0
 	.long 0
 # Global for the scheduler's PCC.  Stored in the switcher's code section.
 	.section .text, "ax", @progbits
 	.globl switcher_scheduler_entry_pcc
 	.p2align 3
 switcher_scheduler_entry_pcc:
 	.long 0
 	.long 0
 # Global for the scheduler's CGP.  Stored in the switcher's code section.
 	.section .text, "ax", @progbits
 	.globl switcher_scheduler_entry_cgp
 	.p2align 3
 switcher_scheduler_entry_cgp:
 	.long 0
 	.long 0
 # Global for the scheduler's CSP.  Stored in the switcher's code section.
 	.section .text, "ax", @progbits
 	.globl switcher_scheduler_entry_csp
 	.p2align 2
 switcher_scheduler_entry_csp:
 	.long 0
 	.long 0

 /**
  * Copy a register context from `src` to `dst` using `scratch` as the register
  * to hold loaded capabilities and `counter` as the register to hold the loop
  * counter.  All four registers are clobbered by this macro.
  */
 .macro copyContext dst, src, scratch, counter
 	    addi           \counter, zero, 15
 	1:
 	    clc            \scratch, 0(\src)
 	    csc            \scratch, 0(\dst)
 	    addi           \counter, \counter, -1
 	    cincoffset     \dst, \dst, 8
 	    cincoffset     \src, \src, 8
 	    bnez           \counter, 1b
 .endm

 /// Spill a single register to a trusted stack pointed to by csp.
 .macro trustedSpillOne, reg
 	csc \reg, TrustedStack_offset_\reg(csp)
 .endm

 /**
  * Spill all of the registers in the list (in order) to a trusted stack pointed
  * to by csp.
  */
 .macro trustedSpillRegisters reg1, regs:vararg
 	forall trustedSpillOne, \reg1, \regs
 .endm

 /// Reload a single register from a trusted stack pointed to by csp.
 .macro trustedReloadOne, reg
 	clc \reg, TrustedStack_offset_\reg(csp)
 .endm

 /**
  * Reload all of the registers in the list (in order) to a trusted stack pointed
  * to by csp.
  */
 .macro trustedReloadRegisters reg1, regs:vararg
 	forall trustedReloadOne, \reg1, \regs
 .endm

 /**
  * Zero the stack.  The three operands are the base address, the top address,
  * and a scratch register to use.  The base must be a capability but it must
  * be provided without the c prefix because it is used as both a capability
  * and integer register.  All three registers are clobbered but should not be
  * considered safe to expose outside the TCB.
  */
 .macro zero_stack base top scratch
 	addi               \scratch, \top, -32
 	addi               \top, \top, -16
 	bgt                \base, \scratch, 1f
 	// Zero the stack in 32-byte chunks
 0:
 	csc                cnull, 0(c\base)
 	csc                cnull, 8(c\base)
 	csc                cnull, 16(c\base)
 	csc                cnull, 24(c\base)
 	cincoffset         c\base, c\base, 32
 	ble                \base, \scratch, 0b
 1:
 	bgt                \base, \top, 2f
 	// Zero any 16-byte tail
 	csc                cnull, 0(c\base)
 	csc                cnull, 8(c\base)
 2:
 .endm

 /**
  * Clear the hazard pointers associated with this thread.  (See
  * include/stdlib.h:/heap_claim_fast, and its implementation in
  * lib/compartment_helpers/claim_fast.cc for more about hazard pointers.)  We
  * don't care about leaks here (they're store-only from anywhere except the
  * allocator), so just write a 32-bit zero over half of each one to clobber the
  * tags.
  */
 .macro clear_hazard_slots trustedStack, scratch
 	clc                \scratch, TrustedStack_offset_hazardPointers(\trustedStack)
 	csw                zero, 0(\scratch)
 	csw                zero, 8(\scratch)
 .endm

 	.section .text, "ax", @progbits
 	.globl __Z26compartment_switcher_entryz
 	.p2align 2
 	.type __Z26compartment_switcher_entryz,@function
 __Z26compartment_switcher_entryz:
 	/*
 	 * FROM: cross-call
 	 * FROM: malice
 	 * IRQ ASSUME: deferred (loader/boot.cc constructs only IRQ-deferring
 	 *             sentries to this function; the export entry at the end
 	 *             of this file is somewhat fictitious)
 	 * LIVE IN: mtdc, ra, sp, gp, s0, s1, t0, t1, a0, a1, a2, a3, a4, a5
 	 *          (that is, all registers except tp and t2)
 	 *
 	 * Atlas:
 	 *  mtdc: pointer to this thread's TrustedStack
 	 *        (may be 0 from buggy/malicious scheduler thread)
 	 *  ra: caller return address
 	 *      (at the moment, this is ensured because we enter via an
 	 *      IRQ-disabling forward sentry, which requires ra as the destination
 	 *      register of the cjalr the caller used, but we are not relying on
 	 *      this property, and we hope to relax the switcher's IRQ posture)
 	 *  sp: nominally, caller's stack pointer; will check integrity below
 	 *  gp: caller state, to be spilled, value unused in switcher
 	 *  s0, s1: caller state, to be spilled, value unused in switcher
 	 *  t0: possible caller argument to callee, passed or zered in switcher
 	 *      (specifically, this is the pointer to arguments beyond a0-a5 and/or
 	 *      variadic arguments)
 	 *  t1: sealed export table entry for the target callee
 	 *      (see LLVM's RISCVExpandPseudo::expandCompartmentCall and, more
 	 *      generally, the ABI chapter of the CHERIoT ISA document,
 	 *      https://cheriot.org/cheriot-sail/cheriot-architecture.pdf)
 	 *  a0, a1, a2, a3, a4, a5: possible caller arguments to callee, passed or
 	 *                          zeroed in switcher.
 	 *  tp, t2: dead
 	 */
 	/*
 	 * By virtue of making a call, the caller is indicating that all caller-save
 	 * registers are dead.  Because we are crossing a trust boundary, the
 	 * switcher must spill callee-save registers.  If we find ourselves unable
 	 * to do so for "plausibly accidental" reasons, we'll return an error to the
 	 * caller (via the exception path; see .Lhandle_error_in_switcher).
 	 * Specifically, the first spill here is to the lowest address and so
 	 * guaranteed to raise a bounds fault if any of the stores here would access
 	 * below the base (lowest address) of the stack capability.
 	 *
 	 * Certain other kinds of less plausibly accidental malice (for example, an
 	 * untagged or sealed or SD-permission-less capability in sp) will also be
 	 * caught by this first spill.  In some sense we should forcibly unwind the
 	 * caller, but it's acceptable, in the sense that no would-be-callee can be
 	 * harmed, to just return an error instead.
 	 *
 	 * Yet other kinds of less plausibly accidental malice can survive the first
 	 * spill.  For example, consider a MC-permission-less capability in sp and a
 	 * non-capability value in s0.  While the first spill will not trap, these
 	 * forms of malice will certainly be detected in a few instructions, when we
 	 * scrutinize sp in detail.  They might (or might not) cause an intervening
 	 * (specifically, spill) instruction to trap.  Either way will result in us
 	 * ending up in .Lcommon_force_unwind, either directly or via the exception
 	 * handler.
 	 *
 	 * At entry, the register file is safe to expose to the caller, and so if
 	 * and when we take the "just return an error" option, no changes, beyond
 	 * populating the error return values in a0 and a1, are required.
 	 */
 	/*
 	 * __Z26compartment_switcher_entryz is exposed to callers directly as a
 	 * forward-arc interrupt-disabling sentry via the somewhat lengthy chain
 	 * of events involving...
 	 *   - the .compartment_import_table sections defined in
 	 *     compartment.ldscript,
 	 *   - the export table defined below (.section .compartment_export_table),
 	 *   - firmware.ldscript.in's use of that export table to define
 	 *     .switcher_export_table,
 	 *   - the firmware image header (loader/types.h's ImgHdr), in particular
 	 *     ImgHdr::switcher::exportTable and, again, firmware.ldscript.in's
 	 *     use of .switcher_export_table to populate that field, and
 	 *   - loader/boot.cc:/populate_imports and its caller's computation of
 	 *     switcherPCC.
 	 */
 	/*
 	 * TODO: We'd like to relax the interrupt posture of the switcher where
 	 * possible.  Specifically, unless both the caller and callee are running
 	 * and to be run with interrupts deferred, we'd like the switcher, and
 	 * especially its stack-zeroing, to be preemtable.
 	 */
 .Lswitch_entry_first_spill:
 	/*
 	 * FROM: above
 	 * ITO: .Lswitch_just_return (via .Lhandle_error_in_switcher)
 	 */
 	csc               cs0, (SPILL_SLOT_cs0-SPILL_SLOT_SIZE)(csp)
 	cincoffset        csp, csp, -SPILL_SLOT_SIZE
 	csc               cs1, SPILL_SLOT_cs1(csp)
 	csc               cgp, SPILL_SLOT_cgp(csp)
 	csc               cra, SPILL_SLOT_pcc(csp)
 	/*
 	 * Atlas update:
 	 *  ra, gp, s0, s1: dead (presently, redundant caller values)
 	 */

 	/*
 	 * Before we access any privileged state, we can verify the compartment's
 	 * csp is valid. If not, force unwind.  Note that these checks are purely to
 	 * protect the callee, not the switcher itself, which can always bail and
 	 * forcibly unwind the caller.
 	 *
 	 * Make sure the caller's CSP has the expected permissions (including that
 	 * it is a stack pointer, by virtue of being local and bearing SL) and that
 	 * its top and base are at least 16-byte aligned.  We have already checked
 	 * that it is tagged and unsealed and at least 8-byte aligned by virtue of
 	 * surviving the stores above.
 	 *
 	 * TODO for formal verification: it should be the case that after these
 	 * tests and the size checks below, no instruction in the switcher
 	 * authorized by the capability now in sp can fault.
 	 */
 //.Lswitch_csp_check:
 	cgetperm           t2, csp
 	li                 tp, COMPARTMENT_STACK_PERMISSIONS
 	bne                tp, t2, .Lcommon_force_unwind
 	cgetbase           t2, csp
 	or                 t2, t2, sp
 	andi               t2, t2, 0xf
 	bnez               t2, .Lcommon_force_unwind
 	/*
 	 * Atlas update:
 	 *  t2, tp: dead (again)
 	 *  sp: the caller's untrusted stack pointer, now validated and pointing at
 	 *      the callee-save register spill area we made above
 	 */

 	// mtdc should always have an offset of 0.
 	cspecialr          ct2, mtdc
 	// Atlas update: t2: a pointer to this thread's TrustedStack structure
 	/*
 	 * This is our first access via mtdc, and so it might trap, if the scheduler
 	 * tries a cross-compartment call.  That will be a fairly short trip to an
 	 * infinite loop (see commentary in exception_entry_asm).
 	 */
 	clear_hazard_slots /* trusted stack = */ ct2, /* scratch = */ ctp
 	// Atlas update: tp: dead (again)

 //.Lswitch_trusted_stack_push:
 	/*
 	 * TrustedStack::frames[] is a flexible array member at the end of the
 	 * structure, and the stack of frames it represents grows *upwards* (with
 	 * [0] the initial activation, [1] the first cross-compartment call, and so
 	 * on).  Thus, if the frame offset points "one past the end" (or futher
 	 * out), we have no more frames available, so off we go to
 	 * .Lswitch_trusted_stack_exhausted .
 	 */
 	clhu               tp, TrustedStack_offset_frameoffset(ct2)
 	cgetlen            s0, ct2
 	/*
 	 * Atlas update:
 	 *  s0: scalar length of the TrustedStack structure
 	 *  tp: scalar offset of the next available TrustedStack::frames[] element
 	 */
 	// LIVE OUT: mtdc, sp
 	bgeu               tp, s0, .Lswitch_trusted_stack_exhausted
 	// Atlas update: s0: dead
 	// we are past the stacks checks.
 	cincoffset         ctp, ct2, tp
 	// Atlas update: tp: pointer to the next available TrustedStackFrame
 	/*
 	 * Populate that stack frame by...
 	 * 1. spilling the caller's stack pointer, as modified by the spills at the
 	 *    start of this function.
 	 */
 	csc                csp, TrustedStackFrame_offset_csp(ctp)
 	/*
 	 * 2. zeroing the number of error handler invocations (we have just entered
 	 *    this call, so no faults triggered during this call yet).
 	 */
 	csh                zero, TrustedStackFrame_offset_errorHandlerCount(ctp)
 	/*
 	 * 3. For now, store a null export entry.  This is largely cosmetic; we will
 	 *    not attempt to access this value before it is set to the real export
 	 *    table entry below.  Should we trap, the logic at
 	 *    .Lhandle_error_switcher_pcc will cause us to force unwind, popping
 	 *    this frame before any subsequent action.
 	 *
 	 *    TODO for formal verification: prove that this store is dead and can
 	 *    be eliminated.
 	 */
 	csc                cnull, TrustedStackFrame_offset_calleeExportTable(ctp)
 	/*
 	 * Update the frame offset, using s1 to hold a scratch scalar.  Any fault
 	 * before this point (wrong target cap, unaligned stack, etc.) is seen as a
 	 * fault in the caller. After writing the new TrustedSstack::frameoffset,
 	 * any fault is seen as a callee fault.
 	 */
 	clhu               s1, TrustedStack_offset_frameoffset(ct2)
 	addi               s1, s1, TrustedStackFrame_size
 	csh                s1, TrustedStack_offset_frameoffset(ct2)

 	/*
 	 * Chop off the stack, using...
 	 *  - s0 for the current untrusted stack base address (the lowest address of
 	 *    the register spill we created at .Lswitch_entry_first_spill)
 	 *  - s1 for the length of the stack suffix to which the callee is entitled
 	 */
 //.Lswitch_stack_chop:
 	cgetaddr           s0, csp
 	cgetbase           s1, csp
 	csetaddr           csp, csp, s1
 	sub                s1, s0, s1
 	csetboundsexact    ct2, csp, s1
 	csetaddr           csp, ct2, s0
 	/*
 	 * Atlas:
 	 *  s0: address of stack boundary between caller and callee frames, that is,
 	 *      the lowest address of the register spill from
 	 *      .Lswitch_entry_first_spill)
 	 *  sp: pointer to stack, with its limit and address set to the address in
 	 *      s0.  The base and permissions have not been altered from sp at
 	 *      entry, and the tag remains set since all manipulations have been
 	 *      monotone non-increasing of, and within, bounds.
 	 *  tp: pointer to the freshly populated TrustedStackFrame (still)
 	 *  t1: sealed export table entry for the target callee (still)
 	 *  a0, a1, a2, a3, a4, a5, t0: call argument values / to be zeroed (still)
 	 *  t2, s1: dead (again)
 	 *  ra, gp: dead (still)
 	 */
 #ifdef CONFIG_MSHWM
 	// Read the stack high water mark (which is 16-byte aligned)
 	csrr               gp, CSR_MSHWM
 	// Skip zeroing if high water mark >= stack pointer
 //.Lswitch_shwm_skip_zero:
 	bge                gp, sp, .Lswitch_after_zero
 	/*
 	 * Use stack high water mark as base address for zeroing.  If this faults
 	 * then it will trigger a force unwind.  This can happen only if the caller
 	 * is doing something bad.
 	 */
 	csetaddr           ct2, csp, gp
 #endif
 	zero_stack         /* base = */ t2, /* top = */ s0, /* scratch = */ gp
 .Lswitch_after_zero:
 	/*
 	 * FROM: above
 	 * FROM: .Lswitch_shwm_skip_zero
 	 * LIVE IN: mtdc, sp, tp, t0, t1, a0, a1, a2, a3, a4, a5
 	 *
 	 * Atlas:
 	 *  sp: pointer to stack, with bounds as t2, cursor at boundary in s0
 	 *      (still)
 	 *  tp: pointer to the freshly populated TrustedStackFrame (still)
 	 *  t1: sealed export table entry for the target callee (still)
 	 *  a0, a1, a2, a3, a4, a5, t0: call argument values / to be zeroed (still)
 	 *  ra, s1: dead (still)
 	 *  s0, t2, gp: dead (again)
 	 */

 	// Fetch the sealing key
 	LoadCapPCC         cs0, .Lunsealing_key_import_tables
 	// Atlas update: s0: switcher sealing key
 	/*
 	 * The caller's handle to the callee (the sealed capability to the export
 	 * table entry) is in t1, which has been kept live all this time.  Unseal
 	 * and load the entry point offset.
 	 */
 //.Lswitch_unseal_entry:
 	cunseal            ct1, ct1, cs0
 	/*
 	 * Atlas update:
 	 *  t1: if tagged, an unsealed pointer with bounds encompassing callee
 	 *      compartment ExportTable and ExportEntry array and cursor pointing at
 	 *      the callee ExportEntry; if untagged, the caller is malicious or
 	 *      deeply confused, the next instruction will trap, and we'll
 	 *      .Lcommon_force_unwind via exception_entry_asm and
 	 *      .Lhandle_error_in_switcher.
 	 */
 	/*
 	 * LOCAL SEAL: If it happened that the export table reference given to us
 	 * is not GL(obal), then the result of unsealing above, now in t1, will
 	 * also be not GL(obal).  This reference is stored to the TrustedStack frame
 	 * through a SL-bearing authority (because the TrustedStack also holds our
 	 * register spill area, and so must have SL) but neither it or any monotone
 	 * progeny otherwise escape the switcher's private register file.
 	 */
 	/*
 	 * Load the entry point offset.  If cunseal failed then this will fault and
 	 * we will force unwind; see .Lhandle_error_switcher_pcc_check.
 	 */
 	clhu               s0, ExportEntry_offset_functionStart(ct1)
 	// Atlas update: s0: callee compartment function entrypoint offset
 	/*
 	 * At this point, we know that the cunseal has succeeded (we didn't trap on
 	 * the load) and so it's safe to store the unsealed value of the export
 	 * table pointer.
 	 *
 	 * TODO for formal verification: Nothing between this point and transition
 	 * to the callee should fault.
 	 */
 	csc                ct1, TrustedStackFrame_offset_calleeExportTable(ctp)

 //.Lswitch_stack_check_length:
 	/*
 	 * Load the minimum stack size required by the callee, clobbering tp, which
 	 * holds a capability to the TrustedStackFrame, bringing us closer to a
 	 * register file that is not holding values kept secret from the callee.
 	 */
 	clbu               tp, ExportEntry_offset_minimumStackSize(ct1)
 	// Atlas update: tp: minimum stack size, in units of 8 bytes.
 	slli               tp, tp, 3
 	// Atlas update: tp: minimum stack size, in bytes.
 	/*
 	 * Check that the stack is large enough for the callee.
 	 * At this point, we have already truncated the stack and so the length of
 	 * the stack is the length that the callee can use.
 	 */
 	cgetlen            t2, csp
 	// Atlas update: t2: length of available stack
 	/*
 	 * Include the space we reserved for the unwind state.
 	 *
 	 * tp holds the number of required stack bytes, a value between 0 and 0x7F8
 	 * (the result of an unsigned byte load left shifted by 3).  Given this
 	 * extremely limited range, adding STACK_ENTRY_RESERVED_SPACE will not cause
 	 * overflow (while instead subtracting it from the available length, in t2,
 	 * might underflow).
 	 *
 	 * TODO for formal verification: prove the above.
 	 */
 	addi               tp, tp, STACK_ENTRY_RESERVED_SPACE
 	// LIVE OUT: mtdc
 	bgtu               tp, t2, .Lswitch_stack_too_small

 	/*
 	 * Reserve space for unwind state and so on; this cannot take sp out of
 	 * bounds, in light of the check we just performed.
 	 */
 	cincoffset 	       csp, csp, -STACK_ENTRY_RESERVED_SPACE
 #ifdef CONFIG_MSHWM
 	// store new stack top as stack high water mark
 	csrw               CSR_MSHWM, sp
 #endif

 	// Get the flags field into tp
 	clbu               tp, ExportEntry_offset_flags(ct1)
 	// Atlas update: tp: callee entry flags field

 	// All ExportEntry state has been consulted; move to ExportTable header
 	cgetbase           s1, ct1
 	csetaddr           ct1, ct1, s1
 	/*
 	 * Atlas update:
 	 *  t1: pointer to the callee compartment ExportTable structure.  Bounds
 	 *      still inclusive of ExportEntry array, but that will not be accessed.
 	 */
 //.Lswitch_callee_load:
 	// At this point we begin loading callee compartment state.
 	clc                cgp, ExportTable_offset_cgp(ct1)
 	// Atlas update: gp: target compartment CGP
 	clc                cra, ExportTable_offset_pcc(ct1)
 	cincoffset         cra, cra, s0
 	// Atlas update: ra: target function entrypoint (pcc base + offset from s0)

 	// Zero any unused argument registers
 	/*
 	 * The low 3 bits of the flags field (tp) contain the number of argument
 	 * registers to pass.  We create a small sled that zeroes them in the order
 	 * they are used as argument registers, and we jump into the middle of it at
 	 * an offset defined by that value, preserving the prefix of the sequence.
 	 */
 .Lswitch_load_zero_arguments_start:
 	// FROM: above
 	auipcc             cs0, %cheriot_compartment_hi(.Lswitch_zero_arguments_start)
 	cincoffset         cs0, cs0, %cheriot_compartment_lo_i(.Lswitch_load_zero_arguments_start)
 	// Atlas update: s0: .Lzero_arguments_start
 	andi               t2, tp, 0x7 // loader/types.h's ExportEntry::flags
 	/*
 	 * Change from the number of registers to pass into the number of 2-byte
 	 * instructions to skip.
 	 */
 	sll                t2, t2, 1
 	// Offset the jump target by the number of instructions to skip
 	cincoffset         cs0, cs0, t2
 	// Jump into the sled.
 	cjr                cs0
 .Lswitch_zero_arguments_start:
 	// IFROM: above
 	zeroRegisters      a0, a1, a2, a3, a4, a5, t0

 	/*
 	 * Enable interrupts if the interrupt-disable bit is not set in flags.  See
 	 * loader/types.h's InterruptStatus and ExportEntry::InterruptStatusMask.
 	 * InterruptStatus::Inherited is prohibited on export entries, so we need
 	 * look only at one bit.
 	 */
 	andi               t1, tp, ExportEntryInterruptStatusSwitcherMask
 	bnez               t1, .Lswitch_skip_interrupt_enable
 	csrsi              mstatus, 0x8
 .Lswitch_skip_interrupt_enable:
 	/*
 	 * FROM: above
 	 * IRQ REQUIRE: any (have adopted callee disposition)
 	 *
 	 * Atlas:
 	 *  ra: (still) target function entrypoint
 	 *  sp: (still) pointer to stack, below compartment invocation local storage
 	 *  gp: (still) target compartment CGP
 	 *  a0, a1, a2, a3, a4, a5, t0: arguments or zeroed, as above
 	 *  tp, t1, t2, s0, s1: dead
 	 */
 	/*
 	 * There is an interesting narrow race to consider here.  We're preemptable
 	 * and in the switcher.  That means someone could call
 	 * __Z25switcher_interrupt_threadPv on us, and when we came back on core,
 	 * we'd jump ahead to switcher_after_compartment_call, via...
 	 *
 	 *   - .Lexception_scheduler_return_installed
 	 *   - .Lhandle_injected_error
 	 *   - .Lhandle_error
 	 *   - .Lhandle_error_switcher_pcc
 	 *   - .Lhandle_error_in_switcher
 	 *   - .Lcommon_force_unwind
 	 *
 	 * That is, rather than invoking the callee's compartment's error handler,
 	 * and letting it service the MCAUSE_THREAD_INTERRUPT, we'll return to the
 	 * caller with -ECOMPARTMENTFAIL.
 	 *
 	 * TODO: https://github.com/CHERIoT-Platform/cheriot-rtos/issues/372
 	 */
 	/*
 	 * Up to 10 registers are carrying state for the callee or are properly
 	 * zeroed.  Clear the remaining 5 now.
 	 */
 //.Lswitch_caller_dead_zeros:
 	zeroRegisters      tp, t1, t2, s0, s1
 //.Lswitch_callee_call:
 	/*
 	 * "cjalr cra" simultaneously moves the live-in ra value into the *next*
 	 * program counter and the program counter (of the instruction itself) into
 	 * ra (while sealing it to be a backwards-arc sentry).  That is, the value
 	 * we have so carefully been keeping in ra is clobbered, but only after it
 	 * becomes the next program counter.
 	 */
 	// LIVE OUT: *
 	cjalr              cra

 	.globl switcher_after_compartment_call
 switcher_after_compartment_call:
 	/*
 	 * FROM: malice
 	 * IFROM: above
 	 * FROM: .Lswitch_stack_too_small
 	 * FROM: .Lcommon_force_unwind
 	 * IRQ ASSUME: any (both IRQ-deferring and IRQ-enabling sentries are
 	 *             provided to the callees and can escape for malice's use, and
 	 *             the TrustedStack spill frame is not precious, and nothing
 	 *             that would happen were we are preempted would shift our
 	 *             TrustedStack::frameoffset or the contents of ::frames)
 	 * LIVE IN: mtdc, a0, a1
 	 *
 	 * Atlas:
 	 *  mtdc: pointer to this thread's TrustedStack
 	 *  a0, a1: return value(s).  The callee function must ensure that it clears
 	 *          these as appropriate if it is returning 0 or 1 values and not 2.
 	 *  ra, sp, gp: dead or callee state (to be replaced by caller state)
 	 *  tp, s0, s1, t0, t1, t2, a2, a3, a4, a5: dead or callee state (to be
 	 *                                          zeroed before return to caller)
 	 */
 	/*
 	 * Pop a frame from the trusted stack, leaving all registers in the state
 	 * expected by the caller of a cross-compartment call.  The callee is
 	 * responsible for zeroing unused return registers; the switcher will zero
 	 * other non-return argument and temporary registers.
 	 *
 	 * This unwind path is common to both ordinary return (from above), benign
 	 * errors after we'd set up the trusted frame (.Lswitch_stack_too_small),
 	 * and forced unwinds (.Lcommon_force_unwind).
 	 *
 	 * TODO for formal verification: the below should not fault before returning
 	 * back to the caller. If a fault occurs there must be a serious bug
 	 * elsewhere.
 	 */
 	/*
 	 * As just before the call, we are preemptive and in the switcher.  If we
 	 * are signaled via MCAUSE_THREAD_INTERRUPT at this point, we will come
 	 * back here (with a0 holding -ECOMPARTMENTFAIL and a1 holding 0).  This
 	 * block is _idempotent_ until the update of mtdc's
 	 * TrustedStack::frameoffset, so until then we will effectively just
 	 * clobber the return values.  After that, though, we'd forcibly unwind out
 	 * of the caller.
 	 *
 	 * TODO: https://github.com/CHERIoT-Platform/cheriot-rtos/issues/372
 	 */
 	/*
 	 * The return sentry given to the callee as part of that cjalr could be
 	 * captured by the callee or passed between compartments arbitrarily for
 	 * later use.  That is, in some sense, we cannot assume that any use of this
 	 * sentry corresponds to the most recent derivation of it by this thread.
 	 * Phrased differently, the sentry created by the "cjalr" above is not tied
 	 * to the topmost TrustedStackFrame at the time of its creation.  Invoking
 	 * this sentry, regardless of how one comes to hold it, and even if
 	 * invocation is not matched to the call that constructed any given instance
 	 * of it, will always result in popping the topmost trusted stack frame (at
 	 * the time of invocation) and returning to its caller.  Thus, the
 	 * possibility of more than one of these sentries in scope at any moment is
 	 * not concerning.
 	 *
 	 * Additionally, threads are given a manufactured, interrupt-deferring
 	 * sentry to here as part of their initial activation frame (so that
 	 * returning acts as an orderly unwind).  See
 	 * loader/boot.cc:/boot_threads_create .
 	 *
 	 * Being robust to malicious or "unusual" entry here is facilitated by the
 	 * requirements of the next block of code being minimal: mtdc must be a
 	 * TrustedStack pointer.  The contents of a0 and a1 will be exposed to the
 	 * compartment above the one currently executing, or the thread will be
 	 * terminated if there is no such.
 	 */

 	cspecialr          ctp, mtdc
 	// Atlas update: tp: pointer to TrustedStack

 	clear_hazard_slots ctp, ct2

 	/*
 	 * Make sure there is a frame left in the trusted stack by...
 	 *
 	 * 1. Loading TrustedStack::frameoffset and offsetof(TrustedStack, frames)
 	 */
 	clhu               t2, TrustedStack_offset_frameoffset(ctp)
 	li                 t0, TrustedStack_offset_frames
 	/*
 	 * 2. Decreasing frameoffset by one frame.  This will go below
 	 *    offsetof(TrustedStack, frames) if there are no active frames.
 	 */
 	addi               t2, t2, -TrustedStackFrame_size
 	/*
 	 * 3. Comparing.  If this is the first trusted stack frame, then the csp
 	 * that we would be loading is the csp on entry, which does not have a
 	 * spilled area.  In this case, we would fault when loading (because the
 	 * stack cursor is at its limit), so would exit the thread, but we should
 	 * instead gracefully exit the thread.
 	 */
 	bgeu               t0, t2, .Lcommon_defer_irqs_and_thread_exit
 	cincoffset         ct1, ctp, t2
 	/*
 	 * Atlas update:
 	 *  t0: dead (again)
 	 *  t1: pointer to the TrustedStackFrame to bring on core
 	 *  t2: the TrustedStack::frameoffset associated with t1
 	 */

 	/*
 	 * Restore the untrusted stack pointer from the trusted stack.  This points
 	 * at the spill frame, created by .Lswitch_entry_first_spill and following
 	 * instructions, holding caller register values.
 	 */
 	clc                csp, TrustedStackFrame_offset_csp(ct1)
 	/*
 	 * Atlas update:
 	 *  sp: pointer to untrusted stack (the spill frame created by
 	 *      .Lswitch_entry_first_spill)
 	 */
 	// Update the current frame offset in the TrustedStack
 	csh                t2, TrustedStack_offset_frameoffset(ctp)
 	/*
 	 * Do the loads *after* moving the trusted stack pointer.  In theory, the
 	 * checks after `.Lswitch_entry_first_spill` make it impossible for this to
 	 * fault, but if we do fault here and hadn't moved the frame offset, then
 	 * we'd end up in an infinite loop trying repeatedly to pop the same
 	 * trusted stack frame.  This would be bad.  Instead, we move the trusted
 	 * stack pointer *first* and so, if the accesses to the untrusted stack
 	 * fault, we will detect a fault in the switcher, enter the force-unwind
 	 * path, and pop the frame for the compartment that gave us a malicious
 	 * csp.
 	 */
 	clc                cs0, SPILL_SLOT_cs0(csp)
 	clc                cs1, SPILL_SLOT_cs1(csp)
 	clc                cra, SPILL_SLOT_pcc(csp)
 	clc                cgp, SPILL_SLOT_cgp(csp)
 	cincoffset         csp, csp, SPILL_SLOT_SIZE
 #ifdef CONFIG_MSHWM
 	/*
 	 * Read the stack high water mark, which is 16-byte aligned.  We will use
 	 * this as base address for stack clearing.  Note that it cannot be greater
 	 * than stack top as we set it to stack top when we pushed to the trusted
 	 * stack frame, and it is a monotonically non-increasing value.
 	 */
 	csrr               tp, CSR_MSHWM
 #else
 	cgetbase           tp, csp
 #endif
 	cgetaddr           t1, csp
 	csetaddr           ct2, csp, tp
 	zero_stack         t2, t1, tp
 #ifdef CONFIG_MSHWM
 	csrw               CSR_MSHWM, sp
 #endif

 	// Zero all registers not holding state intended for caller; see atlas below
 .Lswitch_callee_dead_zeros:
 	/*
 	 * FROM: above
 	 * FROM: .Lswitch_trusted_stack_exhausted
 	 * LIVE IN: mtdc, ra, sp, gp, s0, s1, a0, a1
 	 *
 	 * Atlas:
 	 *  mtdc: pointer to this thread's TrustedStack
 	 *  a0, a1: return value(s)
 	 *  ra, sp, gp, s0, s1: caller state
 	 *  tp, t0, t1, t2, a2, a3, a4, a5: dead (to be zeroed here)
 	 */
 	zeroAllRegistersExcept ra, sp, gp, s0, s1, a0, a1
 .Lswitch_just_return:
 	/*
 	 * FROM: above
 	 * IFROM: .Lswitch_entry_first_spill (via .Lhandle_error_in_switcher)
 	 * LIVE IN: mtdc, ra, sp, gp, s0, s1, a0, a1
 	 *
 	 * Atlas:
 	 *  mtdc: pointer to this thread's TrustedStack
 	 *  a0, a1: return value(s) (still)
 	 *  ra, sp, gp, s0, s1: caller state
 	 *  tp, t0, t1, t2, a2, a3, a4, a5: zero (if from above) or caller state (if
 	 *                                  from .Lhandle_error_in_switcher via
 	 *                                  .Lhandle_return_context_install)
 	 */
 	cret

 	/*
 	 * If the stack is too small, we don't do the call, but to avoid leaking
 	 * any other state we still go through the same return path as normal.  We
 	 * set the return registers to -ENOTENOUGHSTACK and 0, so users can see
 	 * that this is the failure reason.
 	 */
 .Lswitch_stack_too_small:
 	/*
 	 * FROM: .Lswitch_stack_check_length
 	 * IRQ REQUIRE: any (TrustedStack spill frame is not precious)
 	 * LIVE IN: mtdc
 	 *
 	 * Atlas:
 	 *  mtdc: thread trusted stack pointer
 	 */
 	li                 a0, -ENOTENOUGHSTACK
 	li                 a1, 0
 	// Atlas update: a0, a1: error return values
 	// LIVE OUT: mtdc, a0, a1
 	j                  switcher_after_compartment_call

 	/*
 	 * If we have run out of trusted stack, then just restore the caller's state
 	 * (mostly, the callee-save registers from the spills we did at the top of
 	 * __Z26compartment_switcher_entryz) and return an error value.
 	 */
 .Lswitch_trusted_stack_exhausted:
 	/*
 	 * FROM: .Lswitch_trusted_stack_push
 	 * IRQ REQUIRE: any (all state is in registers, TrustedStack spill frame is
 	 *              not precious)
 	 * LIVE IN: mtdc, sp
 	 *
 	 * Atlas:
 	 *  mtdc: TrustedStack pointer
 	 *  sp: Caller stack pointer, pointing at switcher spill frame, after
 	 *      validation
 	 */
 	/*
 	 * Restore the spilled values.  Because csp has survived being spilled to
 	 * and the permission validations, these will not fault.
 	 */
 	clc                cs0, SPILL_SLOT_cs0(csp)
 	clc                cs1, SPILL_SLOT_cs1(csp)
 	clc                cra, SPILL_SLOT_pcc(csp)
 	clc                cgp, SPILL_SLOT_cgp(csp)
 	cincoffset         csp, csp, SPILL_SLOT_SIZE
 	// Set the first return register (a0) and zero the other (a1)
 	li                 a0, -ENOTENOUGHTRUSTEDSTACK
 	zeroOne            a1
 	j                  .Lswitch_callee_dead_zeros

 .size compartment_switcher_entry, . - compartment_switcher_entry

 	.global  exception_entry_asm
 	.p2align 2
 /**
  * The entry point of all exceptions and interrupts
  *
  * For now, the entire routine is run with interrupts disabled.
  */
 exception_entry_asm:
 	/*
 	 * FROM: malice
 	 * FROM: interrupt
 	 * FROM: error
 	 * IRQ ASSUME: deferred (sole entry is via architectural exception path,
 	 *             which unconditionally, atomically defers IRQs)
 	 * LIVE IN: mcause, mtval, mtdc, *
 	 *
 	 * Atlas:
 	 *  mtdc: either pointer to TrustedStack or zero
 	 *  mcause, mtval: architecture-specified exception information.  These are
 	 *                 assumed correct -- for example, that it is impossible for
 	 *                 untrusted code to enter the exception path with
 	 *                 arbitrarily chosen values.
 	 *  *: The GPRs at the time of exception.
 	 */
 	/*
 	 * We do not trust the interruptee's context. We cannot use its stack in any
 	 * way.  The save register frame we can use is fetched from the
 	 * TrustedStack.  In general, mtdc holds the trusted stack register.  We are
 	 * here with interrupts off and precious few registers available to us, so
 	 * swap it with the csp (we'll put it back, later).
 	 */
 	cspecialrw         csp, mtdc, csp

 	/*
 	 * If we read out zero, we've reentered the exception and are about to trap
 	 * (in spillRegisters, which uses sp as its authority).
 	 *
 	 * Failure to guard here would mean that the trap in spillRegisters below
 	 * would re-enter the trap-handler with an unknown value (the first trap's
 	 * sp) in mtdc, which the rest of this code would take to be a valid
 	 * TrustedStack.  Exactly what would happen then is hard to say; we'd try
 	 * spilling registers to a potentially attacker-controlled pointer, at the
 	 * very least, and that's something to avoid.
 	 */
 	beqz               sp, .Lexception_reentered

 	/*
 	 * The guest sp/csp (x2/c2) is now in mtdc. Will be spilled later, but we
 	 * spill all the other 14 registers now.
 	 */
 	trustedSpillRegisters     cra, cgp, ctp, ct0, ct1, ct2, cs0, cs1, ca0, ca1, ca2, ca3, ca4, ca5

 	/*
 	 * The control flow of an exiting thread rejoins us (that is, running
 	 * threads which have taken an exception, be that a trap or an interrupt)
 	 * here, as if it had taken an exception.  We even use the mcause register
 	 * to signal the exit "exception"; see .Lcommon_thread_exit.
 	 */
 .Lexception_exiting_threads_rejoin:
 	/*
 	 * FROM: above
 	 * FROM: .Lcommon_thread_exit
 	 * IRQ REQUIRE: deferred (about to set MTDC to nullptr)
 	 * LIVE IN: mcause, mtval, mtdc, sp
 	 *
 	 * Atlas:
 	 *  mtdc: the interrupted context's sp (or zero, if coming from
 	 *        .Lcommon_thread_exit)
 	 *  sp: TrustedStack pointer (and in particular a spill frame we can use)
 	 */

 	/*
 	 * mtdc got swapped with the thread's csp, store it and clobber mtdc with
 	 * zero (using t1 as a scratch register, because using source register index
 	 * 0 with cspecialrw means "don't write" rather than "write zero").  The
 	 * trusted stack pointer is solely in csp, now; if we take another trap
 	 * before a new one is installed, or if the scheduler enables interrupts and
 	 * we take one, we'll pull this zero out of mtdc, above.
 	 */
 	zeroOne            t1
 	cspecialrw         ct1, mtdc, ct1
 	csc                ct1, TrustedStack_offset_csp(csp)
 	/*
 	 * Atlas update:
 	 *  mtdc: zero
 	 *  sp: (still) TrustedStack pointer
 	 */

 	// Store the rest of the special registers
 	cspecialr          ct0, mepcc
 	csc                ct0, TrustedStack_offset_mepcc(csp)
 	csrr               t1, mstatus
 	csw                t1, TrustedStack_offset_mstatus(csp)
 #ifdef CONFIG_MSHWM
 	csrr               t1, CSR_MSHWM
 	csw                t1, TrustedStack_offset_mshwm(csp)
 	csrr               t1, CSR_MSHWMB
 	csw                t1, TrustedStack_offset_mshwmb(csp)
 #endif
 	csrr               t1, mcause
 	csw                t1, TrustedStack_offset_mcause(csp)
 	// Atlas update: t1: copy of mcause

 	/*
 	 * If we hit one of the exception conditions that we should let compartments
 	 * handle then maybe deliver it to the compartment (if it has a handler that
 	 * we have the resources to invoke).
 	 */
 //.Lexception_might_handle:
 	li                 a0, MCAUSE_CHERI
 	// LIVE OUT: sp
 	beq                a0, t1, .Lhandle_error
 	/*
 	 * A single test suffices to catch all of...
 	 *  - MCAUSE_INST_MISALINED (0),
 	 *  - MCAUSE_INST_ACCESS_FAULT (1),
 	 *  - MCAUSE_ILLEGAL_INSTRUCTION (2),
 	 *  - MCAUSE_BREAKPOINT (3),
 	 *  - MCAUSE_LOAD_MISALIGNED (4),
 	 *  - MCAUSE_LOAD_ACCESS_FAULT (5),
 	 *  - MCAUSE_STORE_MISALIGNED (6),
 	 *  - MCAUSE_STORE_ACCESS_FAULT (7)
 	 */
 	li                 a0, 0x8
 	// LIVE OUT: sp
 	bltu               t1, a0, .Lhandle_error

 //.Lexception_scheduler_call:
 	// TODO: On an ecall, we don't need to save any caller-save registers

 	/*
 	 * At this point, thread state is completely saved. Now prepare the
 	 * scheduler context.
 	 * Function signature of the scheduler entry point:
 	 * TrustedStack *exception_entry(TrustedStack *sealedTStack,
 	 *     size_t mcause, size_t mepc, size_t mtval)
 	 */
 	LoadCapPCC         ca0, .Lsealing_key_trusted_stacks
 	cseal              ca0, csp, ca0 // sealed trusted stack
 	mv                 a1, t1 // mcause
 	cgetaddr           a2, ct0 // mepcc address
 	csrr               a3, mtval
 	// Fetch the stack, cgp and the trusted stack for the scheduler.
 	LoadCapPCC         csp, switcher_scheduler_entry_csp
 	LoadCapPCC         cgp, switcher_scheduler_entry_cgp
 	LoadCapPCC         cra, switcher_scheduler_entry_pcc
 	/*
 	 * Atlas:
 	 *  ra, gp: scheduler compartment context
 	 *  sp: scheduler thread context
 	 *  a0: sealed trusted stack pointer (opaque thread handle)
 	 *  a1: copy of mcause
 	 *  a2: copy of mepc
 	 *  a3: copy of mtval
 	 *  tp, t0, t1, t2, s0, s1, a4, a5: dead
 	 */

 	// Zero everything apart from things explicitly passed to scheduler.
 	zeroAllRegistersExcept ra, sp, gp, a0, a1, a2, a3

 	// Call the scheduler.  This returns the new thread in ca0.
 	cjalr              cra

 //.Lexception_scheduler_return:
 	/*
 	 * IFROM: above
 	 * IRQ ASSUME: deferred (reachable only by IRQ-deferring reverse sentry)
 	 * IRQ REQUIRE: deferred (mtdc is zero)
 	 * LIVE IN: a0
 	 *
 	 * Atlas:
 	 *  mtdc: (still) zero
 	 *  a0: sealed trusted stack pointer to bring onto core
 	 */
 	/*
 	 * The interrupts-disabling return sentry handed to the scheduler as part of
 	 * that cjalr may be captured on its stack, but as the scheduler is the
 	 * topmost and only compartment in its thread (as it cannot make
 	 * cross-compartment calls without faulting, due to the null presently in
 	 * mtdc), there is very little that can go wrong as as a result of that
 	 * capture.
 	 */
 	/*
 	 * The scheduler may change interrupt posture or may trap (and infinite loop
 	 * if it does so; see the top of exception_entry_asm and recall that mtdc is
 	 * 0 at this point), but if it returns to us (that is, we reach here), the
 	 * use of the sentry created by cjalr will have restored us to deferring
 	 * interrupts, and we will remain in that posture until the mret in
 	 * .Lcommon_context_install.
 	 */

 	// Switch onto the new thread's trusted stack
 	LoadCapPCC         csp, .Lsealing_key_trusted_stacks
 	cunseal            csp, ca0, csp
 	// Atlas update: sp: unsealed target thread trusted stack pointer
 	/*
 	 * LOCAL SEAL: if the scheduler has shed GL(obal) of the reference it gave
 	 * us in a0, then sp will also lack GL(obal) after unsealing.  This
 	 * reference is not stored in memory (in the switcher, anyway), just mtdc.
 	 * However, when this reference is extracted and sealed for the next
 	 * context switch (in .Lexception_scheduler_call), the result will lack
 	 * GL(obal), which will likely prove challenging for the scheduler.  That
 	 * is, this is an elaborate way for the scheduler to crash itself.
 	 */

 	clw                t0, TrustedStack_offset_mcause(csp)
 	// Atlas update: t0: stored mcause for the target thread

 	/*
 	 * Only now that we have done something that actually requires the tag of
 	 * csp be set, put it into mtdc.  If the scheduler has returned something
 	 * untagged or something with the wrong otype, the cunseal will have left
 	 * csp untagged and clw will trap with mtdc still 0.  If we made it here,
 	 * though, csp is tagged and so was tagged and correctly typed, and so it
 	 * is safe to install it to mtdc.  We won't cause traps between here and
 	 * mret, so reentrancy is no longer a concern.
 	 */
 	cspecialw          mtdc, csp
 //.Lexception_scheduler_return_installed:
 	/*
 	 * IRQ REQUIRE: deferred (TrustedStack spill frame is precious)
 	 * Atlas update: mtdc: TrustedStack pointer
 	 */

 	/*
 	 * If mcause is MCAUSE_THREAD_INTERRUPT, then we will jump into the error
 	 * handler: another thread has signalled that this thread should be
 	 * interrupted.  MCAUSE_THREAD_INTERRUPT is a reserved exception number that
 	 * we repurpose to indicate explicit interruption.
 	 */
 	li                 t1, MCAUSE_THREAD_INTERRUPT
 	// LIVE OUT: mtdc, sp
 	beq                t0, t1, .Lhandle_injected_error

 	/*
 	 * Environment call from M-mode is exception code 11.
 	 * We need to skip the ecall instruction to avoid an infinite loop.
 	 */
 	li                 t1, 11
 	clc                ct2, TrustedStack_offset_mepcc(csp)
 	// Atlas update: t2: interrupted program counter to resume
 	// LIVE OUT: mtdc, sp, t2
 	bne                t0, t1, .Lcommon_context_install
 	cincoffset         ct2, ct2, 4
 	// Fall through to install context

 .Lcommon_context_install:
 	/*
 	 * FROM: above
 	 * FROM: .Lhandle_error_install_context
 	 * FROM: .Lhandle_return_context_install
 	 * IRQ REQUIRE: deferred (TrustedStack spill frame is precious)
 	 * LIVE IN: mtdc, sp, t2
 	 *
 	 * Atlas:
 	 *  mtdc, sp: TrustedStack pointer
 	 *  t2: target pcc to resume
 	 *  ra, gp, tp, t0, t1, s0, s1, a0, a1, a2, a3, a4, a5: dead
 	 */
 	/*
 	 * All registers other than sp and t2 are in unspecified states and will be
 	 * overwritten when we install the context.
 	 */
 	clw                ra, TrustedStack_offset_mstatus(csp)
 	csrw               mstatus, ra
 #ifdef CONFIG_MSHWM
 	clw                ra, TrustedStack_offset_mshwm(csp)
 	csrw               CSR_MSHWM, ra
 	clw                ra, TrustedStack_offset_mshwmb(csp)
 	csrw               CSR_MSHWMB, ra
 #endif
 	cspecialw          mepcc, ct2

 	/*
 	 * reloadRegisters restores registers in the order given, and we ensure that
 	 * sp/csp (x2/c2) will be loaded last and will overwrite the trusted stack
 	 * pointer with the thread's stack pointer.
 	 */
 	trustedReloadRegisters cra, cgp, ctp, ct0, ct1, ct2, cs0, cs1, ca0, ca1, ca2, ca3, ca4, ca5, csp
 	mret

 /**
  * We are starting a forced unwind.  This is reached either when we are unable
  * to run an error handler, or when we do run an error handler and it instructs
  * us to return.  This treats all register values as undefined on entry.
  */
 .Lcommon_force_unwind:
 	/*
 	 * FROM: .Lhandle_error_handler_return_irqs
 	 * FROM: .Lhandle_error_in_switcher
 	 * FROM: .Lhandle_error_test_double_fault
 	 * FROM: .Lhandle_error_test_too_many
 	 * FROM: .Lhandle_error_try_stackless
 	 * FROM: .Lswitch_csp_check
 	 * IRQ REQUIRE: any
 	 * LIVE IN: mtdc
 	 *
 	 * Atlas:
 	 *  mtdc:  pointer to TrustedStack
 	 */
 	li                 a0, -ECOMPARTMENTFAIL
 	li                 a1, 0
 	j                  switcher_after_compartment_call

 /**
  * If we have a possibly recoverable error, see if we have a useful error
  * handler.  At this point, the register state will have been saved in the
  * register-save area and so we just need to set up the environment.
  * The handler will have this type signature:
  *
  * enum ErrorRecoveryBehaviour
  * compartment_error_handler(struct ErrorState *frame,
  *                           size_t             mcause,
  *                           size_t             mtval);
  */
 .Lhandle_error:
 	/*
 	 * FROM: .Lexception_might_handle
 	 * FROM: .Lhandle_injected_error
 	 * IRQ REQUIRE: deferred (TrustedStack spill frame is precious)
 	 * LIVE IN: sp
 	 *
 	 * Atlas:
 	 *  sp: pointer to TrustedStack
 	 */
 	/*
 	 * We're now out of the exception path, so make sure that mtdc contains
 	 * the trusted stack pointer.
 	 */
 	cspecialw   mtdc, csp
 	/*
 	 * Atlas update:
 	 *  mtdc: pointer to TrustedStack
 	 *  sp: (still) pointer to TrustedStack
 	 */

 //.Lhandle_error_switcher_pcc:
 	/*
 	 * We want to make sure we can't leak any switcher state into error
 	 * handlers, so if we're faulting in the switcher then we should force
 	 * unwind.  We never change the base of PCC in the switcher, so we can
 	 * check for this case by ensuring that the spilled mepcc and our current
 	 * pcc have the same base.
 	 */
 	auipcc             ct0, 0
 	clc                ct1, TrustedStack_offset_mepcc(csp)
 	cgetbase           t0, ct0
 	cgetbase           tp, ct1
 	beq                t0, tp, .Lhandle_error_in_switcher
 	// Atlas update: t1: a copy of mepcc

 //.Lhandle_error_not_switcher:
 	// Load the interrupted thread's stack pointer into ct0
 	clc                ct0, TrustedStack_offset_csp(csp)
 	// Atlas update: t0: interrupted thread's stack pointer

 	/*
 	 * If we have already unwound so far that the TrustedStack::frameoffset is
 	 * pointing at TrustedStack::frames[0] -- that is, if the stack has no
 	 * active frames on it -- then just go back to the context we came from,
 	 * effectively parking this thread in a (slow) infinite loop.
 	 */
 	clhu               tp, TrustedStack_offset_frameoffset(csp)
 	li                 t1, TrustedStack_offset_frames
 	// LIVE OUT: sp
 	beq                tp, t1, .Lcommon_thread_exit

 	addi               tp, tp, -TrustedStackFrame_size
 	cincoffset         ctp, csp, tp
 	// Atlas update: tp: pointer to current TrustedStackFrame

 	// a0 indicates whether we're calling a stackless error handler (0: stack,
 	// 1: stackless)
 	li                 a0, 0
 	// Atlas update: a0: stackful (0) or stackless (1) indicator, currently 0

 	// Allocate space for the register save frame on the stack.
 	cincoffset         ct0, ct0, -(16*8)

 //.Lhandle_error_stack_oob:
 	/*
 	 * WARNING: ENCODING SPECIFIC.
 	 *
 	 * The following depends on the fact that before-the-start values are not
 	 * representable in the CHERIoT encoding and so will clear the tag.  If
 	 * this property changes then this will need to be replaced by a check that
 	 * against the base of the stack.  Note that this check can't be a simple
 	 * cgetbase on ct0, because moving the address below the base sufficiently
 	 * far that it's out of *representable* bounds will move the reported base
 	 * value (base is a displacement from the address).
 	 */
 	cgettag            t1, ct0

 	/*
 	 * A value of 0xffff indicates no error handler.  Both of our conditional
 	 * paths want this value, but we can load it once, now.
 	 */
 	li                 s1, 0xffff
 	// Atlas update: s1: 0xffff

 	/*
 	 * If there isn't enough space on the stack, see if there's a stackless
 	 * handler.
 	 */
 	// LIVE OUT: sp, tp, t0
 	beqz               t1, .Lhandle_error_try_stackless

 	clc                ct1, TrustedStackFrame_offset_calleeExportTable(ctp)
 	// Atlas: t1: pointer to callee's invoked export table entry
 	/*
 	 * Set the export table pointer to point to the *start* of the export
 	 * table.  It will currently point to the entry point that was raised.
 	 *
 	 * TODO: We might want to pass this to the error handler, it might be
 	 * useful for providing per-entry-point error results.
 	 */
 	cgetbase           s0, ct1
 	csetaddr           ct1, ct1, s0
 	clhu               s0, ExportTable_offset_errorHandler(ct1)

 //.Lhandle_error_try_stackful:
 	/*
 	 * A value of 0xffff indicates no error handler.  If we found one, use it,
 	 * otherwise fall through and try to find a stackless handler.
 	 */
 	// LIVE OUT: sp, tp, t0, t1, s0, a0
 	bne                s0, s1, .Lhandle_error_found

 .Lhandle_error_try_stackless:
 	/*
 	 * FROM: above
 	 * FROM: .Lhandle_error_stack_oob
 	 * IRQ REQUIRE: deferred (TrustedStack spill frame is precious)
 	 * LIVE IN: sp, tp, s1, t0
 	 * Atlas:
 	 *  sp: pointer to TrustedStack
 	 *  tp: pointer to current TrustedStackFrame
 	 *  t0: interrupted thread's stack pointer
 	 *  s1: 0xffff
 	 */

 	clc                ct1, TrustedStackFrame_offset_calleeExportTable(ctp)
 	/*
 	 * Set the export table pointer to point to the *start* of the export
 	 * table.  It will currently point to the entry point that was raised.
 	 */
 	cgetbase           s0, ct1
 	csetaddr           ct1, ct1, s0
 	// Atlas: t1: pointer to callee's export table
 	clhu               s0, ExportTable_offset_errorHandlerStackless(ct1)
 	/*
 	 * A value of 0xffff indicates no error handler.  Give up if there is no
 	 * error handler for this compartment, having already tried any stackful
 	 * handler.
 	 */
 	// LIVE OUT: mtdc
 	beq                s0, s1, .Lcommon_force_unwind

 	/*
 	 * The stack may have had its tag cleared at this point, so for stackless
 	 * handlers we need to restore the on-entry stack.
 	 */
 	clc                ct0, TrustedStackFrame_offset_csp(ctp)
 	// Atlas: t0: target invocation's stack pointer, as of invocation start

 	/*
 	 * If this is the top (initial) stack frame, then the csp field is the value
 	 * on entry and it is safe to use directly.  Otherwise, we reconstruct the
 	 * stack as it would have been on compartment invocation.
 	 */
 	cincoffset         cs1, csp, TrustedStack_offset_frames
 	beq                s1, tp, .Lhandle_stack_recovered

 //.Lhandle_stack_rebound:
 	/*
 	 * The address of the stack pointer will point to the bottom of the
 	 * caller's save area created by .Lswitch_entry_first_spill and following
 	 * instructions, so we set the bounds to be the base up to the current
 	 * address, giving the handler access to the entirety of this invocation's
 	 * activation frame (except the caller save registers we spilled).
 	 */
 	cgetaddr           a1, ct0
 	cgetbase           a2, ct0
 	sub                a1, a1, a2
 	csetaddr           ct0, ct0, a2
 	// The code that installs the context expects the target stack to be in ct0
 	csetboundsexact    ct0, ct0, a1
 .Lhandle_stack_recovered:
 	/*
 	 * FROM: above
 	 * FROM: .Lhandle_error_try_stackless
 	 * IRQ REQUIRE: deferred (TrustedStack spill frame is precious)
 	 * LIVE IN: sp, tp, t0, t1, s0
 	 *
 	 * Atlas:
 	 *  sp: pointer to TrustedStack
 	 *  tp: pointer to current TrustedStackFrame
 	 *  t0: pointer to the untrusted stack to use on invocation.  Either below
 	 *      all activations, in the stackful handler case, or the entire
 	 *      invocation's stack (below the spill frame created by
 	 *      .Lswitch_entry_first_spill and following instructions).
 	 *  t1: pointer to callee's export table
 	 *  s0: offset from compartment PCC base to handler
 	 */
 	li                 a0, 1

 .Lhandle_error_found:
 	/*
 	 * FROM: above
 	 * FROM: .Lhandle_error_try_stackful
 	 * IRQ REQUIRE: deferred (TrustedStack spill frame is precious)
 	 * LIVE IN: sp, tp, t0, t1, s0, a0
 	 *
 	 * Atlas:
 	 *  sp: pointer to TrustedStack
 	 *  tp: pointer to current TrustedStackFrame
 	 *  t0: pointer to the untrusted stack to use on invocation.  Either below
 	 *      all activations, in the stackful handler case, or the entire
 	 *      invocation's stack (below the spill frame created by
 	 *      .Lswitch_entry_first_spill and following instructions).
 	 *  t1: pointer to callee's export table
 	 *  s0: offset from compartment PCC base to handler
 	 *  a0: stackful (0) or stackless (1) indicator
 	 */

 	// Increment the handler invocation count.
 	clhu               s1, TrustedStackFrame_offset_errorHandlerCount(ctp)
 	addi               s1, s1, 1
 	csh                s1, TrustedStackFrame_offset_errorHandlerCount(ctp)

 	/*
 	 * The low bit should be 1 while we are handling a fault.  If we are in a
 	 * double fault (that is, the value we just wrote back has its low bit 0),
 	 * unwind now.
 	 */
 //.Lhandle_error_test_double_fault:
 	andi               ra, s1, 1
 	// LIVE OUT: mtdc
 	beqz               ra, .Lcommon_force_unwind

 	/*
 	 * If we have reached some arbitrary limit on the number of faults in a
 	 * singe compartment calls, give up now.
 	 *
 	 * TODO: Make this a number based on something sensible, possibly something
 	 * set per entry point.  Some compartments (especially top-level ones)
 	 * should be allowed to fault an unbounded number of times.
 	 */
 //.Lhandle_error_test_too_many:
 	li                 ra, MAX_FAULTS_PER_COMPARTMENT_CALL
 	// LIVE OUT: mtdc
 	bgtu               s1, ra, .Lcommon_force_unwind

 	// Load the pristine pcc and cgp for the invoked compartment.
 	clc                cra, ExportTable_offset_pcc(ct1)
 	clc                cgp, ExportTable_offset_cgp(ct1)
 	/*
 	 * Set the jump target to the error handler entry point.  This may result in
 	 * something out-of-bounds if the compartment has a malicious value for
 	 * their error handler (hopefully caught at link or load time), but if it
 	 * does then we will fault when attempting the cjalr below and force unwind
 	 * (either because the cjalr itself will raise a fault, because ra is
 	 * untagged, or because the resulting PCC is out of bounds and instruction
 	 * fetch fails; either case results in a forced unwind, albeit by slightly
 	 * different paths, with .Lhandle_error_switcher_pcc relevant for the former
 	 * and .Lhandle_error_test_double_fault for the latter.
 	 */
 	cgetbase           s1, cra
 	csetaddr           cra, cra, s1
 	cincoffset         cra, cra, s0

 	/*
 	 * If we're in an error handler with a stack, set up the stack, otherwise
 	 * we just need to set up argument registers.
 	 */
 //.Lhandle_error_test_stackful:
 	beqz               a0, .Lhandle_error_stack_setup

 //.Lhandle_error_stackless_setup:
 	clw                a0, TrustedStack_offset_mcause(csp)
 	csrr               a1, mtval
 	li                 a2, 0
 	cmove              csp, ct0
 	// Atlas: sp: taget compartment invocation stack pointer
 	j                  .Lhandle_error_handler_invoke

 .Lhandle_error_stack_setup:
 	/*
 	 * FROM: .Lhandle_error_test_stackful
 	 * IRQ REQUIRE: deferred (TrustedStack spill frame is precious)
 	 * LIVE IN: ra, sp, gp, t0
 	 *
 	 * Atlas:
 	 *  ra: handler entrypoint (with bounds of compartment's .text)
 	 *  sp: pointer to TrustedStack
 	 *  gp: target compartment cgp
 	 *  t0: pointer to the untrusted stack to use on invocation.  This is
 	 *      presently sufficiently below all activations to provide space for an
 	 *      ErrorState structure.
 	 */
 	/*
 	 * Set up the on-stack context, a compartment.h:/struct ErrorState value,
 	 * which has the same layout at a TrustedStack spill frame.
 	 *
 	 * These begin with a PCC.  To ensure that handlers do not have access to
 	 * values (especially, capabilities) reachable through the trapping PCC,
 	 * we clear the tag.  Handlers of course retain access to values reachable
 	 * through their own PCC and CGP.
 	 */
 	clc                cs1, TrustedStack_offset_mepcc(csp)
 	ccleartag          cs1, cs1
 	csc                cs1, TrustedStack_offset_mepcc(ct0)
 	/*
 	 * Now copy the 15 GPRs from the trusted stack (sp).  We use a2 as the
 	 * source of the copy and a3 as the destination, preserving sp (TrustedStack
 	 * pointer) and t0 (untrusted stack pointer to the base of the spill area).
 	 */
 	cincoffset         ca2, csp, TrustedStack_offset_cra
 	cincoffset         ca3, ct0, TrustedStack_offset_cra
 	copyContext        /* dst = */ ca3, /* src = */ ca2, /* scratch = */ cs1, /* counter = */ a4

 	// Set up the arguments for the call
 	cmove              ca0, ct0
 	clw                a1, TrustedStack_offset_mcause(csp)
 	csrr               a2, mtval
 	cmove              csp, ca0

 .Lhandle_error_handler_invoke:
 	/*
 	 * FROM: above
 	 * FROM: .Lhandle_error_stackless_setup
 	 * IRQ REQUIRE: any (see below)
 	 * LIVE IN: mtdc, ra, sp, gp, a0, a1, a2
 	 *
 	 * Atlas:
 	 *  mtdc: TrustedStack pointer
 	 *  ra: handler entrypoint (with bounds of compartment's .text)
 	 *  gp: target compartment cgp
 	 *  sp: target compartment invocation stack pointer
 	 *  a0, a1, a2: arguments to handler (see below)
 	 *  tp, t0, t1, t2, s0, s1, a3, a4, a5: dead (to be zeroed)
 	 */
 	/*
 	 * At this point, the TrustedStack spill frame is no longer precious: either
 	 * we have copied it down to the untrusted stack for the stackful handler's
 	 * use or we have abandoned it in deciding to use the stackless handler.
 	 * Thus, our "IRQ REQUIRE: any" above: it's safe to be preemptive here,
 	 * though all paths to us in fact run with IRQs deferred.
 	 *
 	 * Since we are not using a sentry, but rather a capability constructed from
 	 * the compartment's PCC (and handler offset value) to enter the
 	 * compartment, enable interrupts now.
 	 */
 	/*
 	 * For a stackful handler, the arguments are:
 	 *  - a0: equal to the invocation stack (sp), with a register spill frame
 	 *        here and above (the stack grows down!)
 	 *  - a1: mcause
 	 *  - a2: mtval
 	 *
 	 * While for stackless, the arguments are:
 	 *  - a0: mcause
 	 *  - a1: mtval
 	 *  - a2: zero
 	 */
 	csrsi              mstatus, 0x8
 //.Lhandle_error_handler_invoke_irqs:
 	// IRQ ASSUME: enabled

 	// Clear all other registers and invoke the handler
 	zeroAllRegistersExcept ra, sp, gp, a0, a1, a2
 	cjalr              cra
 //.Lhandle_error_handler_return:
 	/*
 	 * IFROM: above
 	 * FROM: malice
 	 * IRQ ASSUME: enabled (only IRQ-enabling reverse sentries given out)
 	 * LIVE IN: mtdc, a0, sp
 	 *
 	 * Atlas:
 	 *  mtdc: pointer to this thread's TrustedStack
 	 *  a0: handler return value
 	 *  sp: target compartment invocation stack pointer
 	 *  gp, tp, t0, t1, t2, s0, s1, a1, a2, a3, a4, a5: dead (to be clobbered
 	 *                                                  by replacement context
 	 *                                                  or .Lcommon_force_unwind)
 	 */
 	/*
 	 * The return sentry given to the handler as part of that cjalr could be
 	 * captured in that compartment or any of its callers (recall similar
 	 * commentary in switcher_after_compartment_call).  Invoking this sentry,
 	 * regardless of how one comes to hold it, and even if invocation is not
 	 * matched to the call that constructed any given instance of it, will
 	 * always result in popping the topmost trusted stack frame (at the time of
 	 * invocation) and returning to its caller.
 	 *
 	 * Being robust to malicious entry here is facilitated by the requirements
 	 * of the next block of code being minimal: mtdc must be a TrustedStack
 	 * pointer, and we may try to dereference the provided sp, but we are
 	 * prepared for that to trap (and induce forced-unwinding).
 	 */

 	/*
 	 * Now that we're back, defer interrupts again before we do anything that
 	 * manipulates the TrustedStack.
 	 *
 	 * TODO: Eventually we'd like to move this down onto the paths where it
 	 * actually matters and let most of this code run with IRQs enabled.
 	 */
 	csrci              mstatus, 0x8
 //.Lhandle_error_handler_return_irqs:
 	// IRQ ASSUME: deferred

 	/*
 	 * Return values are compartment.h's enum ErrorRecoveryBehaviour :
 	 *  - InstallContext (0)
 	 *  - ForceUnwind (1)
 	 * Other values are invalid and so we should do a forced unwind anyway.
 	 */
 	// LIVE OUT: mtdc
 	bnez               a0, .Lcommon_force_unwind

 //.Lhandle_error_install_context:
 	// IRQ REQUIRE: deferred (TrustedStack spill frame precious, once populated)
 	/*
 	 * We have been asked to install the new register context and resume.  We do
 	 * this by copying the register frame over the save area and entering the
 	 * exception resume path.  This may fault, but if it does then we will
 	 * detect it as a double fault and forcibly unwind.
 	 *
 	 * The state of the target stack (sp) is expected to be common across both
 	 * stackful and stackless handlers in the case of an InstallContext return.
 	 * Above, in .Lhandle_error_stack_setup, we arranged for sp to point to a
 	 * register spill frame (also passed in a0 for convenience from C).
 	 * Stackless handlers are expected to arrange for sp to point to a register
 	 * spill area before returning; compartments availing themselves of
 	 * stackless handlers must also manage reserving space for such.
 	 */

 	cspecialr          ct1, mtdc
 	// Atlas update: t1: pointer to TrustedStack
 #ifdef CONFIG_MSHWM
 	/*
 	 * Update the spilled copy of the stack high water mark to ensure that we
 	 * will clear all of the stack used by the error handler and the spilled
 	 * context.
 	 */
 	csrr               t0, CSR_MSHWM
 	csw                t0, TrustedStack_offset_mshwm(ct1)
 #endif
 	clhu               tp, TrustedStack_offset_frameoffset(ct1)
 	addi               tp, tp, -TrustedStackFrame_size
 	// Atlas update: tp: pointer to the current available trusted stack frame.
 	cincoffset         ctp, ct1, tp

 	/*
 	 * The PCC the handler has given to us is not particularly trusted and might
 	 * be an attempt to escape from the compartment.  Confine it to being
 	 * derived from the compartment's (static) PCC.  This is a multi-step
 	 * process, in which we...
 	 *
 	 * 1. Load the (tagged) PCC for the compartment, which is the 0th word in
 	 *    the ExportTable.
 	 */
 	clc                ct0, TrustedStackFrame_offset_calleeExportTable(ctp)
 	cgetbase           s0, ct0
 	csetaddr           ct0, ct0, s0
 	clc                ct0, ExportTable_offset_pcc(ct0)
 	// Atlas update: t0: compartment .text / PCC

 	// 2. Load the untrusted PCC from the handler's returned spill area (sp).
 	clc                cra, TrustedStack_offset_mepcc(csp)

 	/*
 	 * 3. Copy the address from the returned PCC into the compartment's PCC,
 	 *    which will result in an out-of-bounds capability if the handler was
 	 *    trying anything fishy.
 	 */
 	cgetaddr           ra, cra
 	csetaddr           ct2, ct0, ra
 	// Atlas update: t2: program counter to resume

 	/*
 	 * Now copy everything else from the stack up into the trusted saved
 	 * context, using a2 as the source and a3 as the destination, preserving sp
 	 * (the untrusted stack pointer) and t1 (TrustedStack pointer).
 	 */
 	cincoffset         ca2, csp, TrustedStack_offset_cra
 	cincoffset         ca3, ct1, TrustedStack_offset_cra
 	copyContext        /* dst = */ ca3, /* src = */ ca2, /* scratch = */ cs1, /* counter = */ a4

 	/*
 	 * Increment the handler invocation count.  We have now returned and
 	 * finished touching any data from the error handler that might cause a
 	 * fault.  Any subsequent fault is not treated as a double fault.  It might
 	 * be a fault loop, but that will be caught by the fault limit check.
 	 */
 	clh                s1, TrustedStackFrame_offset_errorHandlerCount(ctp)
 	addi               s1, s1, 1
 	csh                s1, TrustedStackFrame_offset_errorHandlerCount(ctp)

 	/*
 	 * Now that the context is set up, let the exception handler code deal with
 	 * it.  It expects the context to be in csp, so move the context pointer
 	 * there.
 	 */
 	cmove              csp, ct1
 	// LIVE OUT: mtdc, sp, t2
 	j                  .Lcommon_context_install

 .Lhandle_injected_error:
 	/*
 	 * FROM: .Lexception_scheduler_return_installed
 	 * IRQ REQUIRE: deferred (TrustedStack spill frame is precious)
 	 * LIVE IN: mtdc, sp
 	 *
 	 * Atlas:
 	 *  mtdc: TrustedStack pointer
 	 *  sp: TrustedStack pointer (a copy of mtdc)
 	 */
 #ifdef CONFIG_MSHWM
 	clw                ra, TrustedStack_offset_mshwm(csp)
 	csrw               CSR_MSHWM, ra
 	clw                ra, TrustedStack_offset_mshwmb(csp)
 	csrw               CSR_MSHWMB, ra
 #endif
 	j                  .Lhandle_error

 .Lcommon_defer_irqs_and_thread_exit:
 	/*
 	 * FROM: switcher_after_compartment_call
 	 * IRQ REQUIRE: any
 	 */
 	csrci               mstatus, 0x8
 //.Lcommon_deferred_irqs_and_thread_exit:
 	// IRQ ASSUME: deferred

 /**
  * Signal to the scheduler that the current thread is finished
  */
 .Lcommon_thread_exit:
 	/*
 	 * FROM: above
 	 * FROM: .Lhandle_error_not_switcher
 	 * IRQ REQUIRE: deferred (about to zero out MTDC and join exception path)
 	 * LIVE IN: mtdc
 	 *
 	 * Atlas:
 	 *  mtdc: pointer to TrustedStack
 	 */
 	csrw               mcause, MCAUSE_THREAD_EXIT
 	/*
 	 * mtval may have been updated by the action of other threads in the system
 	 * and holds the last value latched during an exception.  From the
 	 * scheduler's perspective, thread exits are a kind of exception, and
 	 * exceptions get to see mtval.  Write a constant value to mtval to act more
 	 * like an architectural fault and to close a small information leak to the
 	 * scheduler's event handler.
 	 */
 	csrw               mtval, MCAUSE_THREAD_EXIT
 	/*
 	 * The thread exit code expects the TrustedStack pointer to be in csp and
 	 * the thread's stack pointer to be in mtdc.  After thread exit, we don't
 	 * need the stack pointer so just put zero there.
 	 */
 	zeroOne            sp
 	cspecialrw         csp, mtdc, csp
 	// LIVE OUT: mtdc, sp
 	j                  .Lexception_exiting_threads_rejoin

 	/*
 	 * Some switcher instructions' traps are handled specially, by looking at
 	 * the offset of mepcc.  Otherwise, we're off to a force unwind.
 	 */
 .Lhandle_error_in_switcher:
 	/*
 	 * FROM: .Lhandle_error_switcher_pcc
 	 * IRQ REQUIRE: deferred (TrustedStack spill frame is precious)
 	 * LIVE IN: mtdc, t1
 	 *
 	 * Atlas:
 	 *  mtdc:  pointer to TrustedStack
 	 *  t1: A copy of mepcc, the faulting program counter
 	 */
 	auipcc             ctp, %cheriot_compartment_hi(.Lswitch_entry_first_spill)
 	cincoffset         ctp, ctp, %cheriot_compartment_lo_i(.Lhandle_error_in_switcher)
 	bne                t1, tp, .Lcommon_force_unwind
 	li                 a0, -ENOTENOUGHSTACK
 	li                 a1, 0

 	/*
 	 * Cause the interrupted thread to resume as if a return had just executed.
 	 * We do this by vectoring to a `cjalr ra` (`cret`) instruction through
 	 * `mepcc`; whee!  Overwrites the stored context a0 and a1 with the current
 	 * values of those registers, effectively passing them through
 	 * .Lcommon_context_install.
 	 */
 .Lhandle_return_context_install:
 	/*
 	 * FROM: above
 	 * IRQ REQUIRE: deferred (TrustedStack spill frame is precious)
 	 * LIVE IN: sp, a0, a1
 	 *
 	 * Atlas:
 	 *  sp: pointer to TrustedStack
 	 *  a0, a1: return values to the caller
 	 */
 	auipcc             ct2, %cheriot_compartment_hi(.Lswitch_just_return)
 	cincoffset         ct2, ct2, %cheriot_compartment_lo_i(.Lhandle_return_context_install)
 	csc                ca0, TrustedStack_offset_ca0(csp)
 	csc                ca1, TrustedStack_offset_ca1(csp)
 	// LIVE OUT: sp, t2
 	j                  .Lcommon_context_install

 .Lexception_reentered:
 	/*
 	 * FROM: exception_entry_asm
 	 * FROM: .Lexception_reentered
 	 * IRQ REQUIRE: deferred (an IRQ before we reprogram MTCC could escape
 	 *              looping)
 	 */
 	/*
 	 * We've reentered our exception handler, a "double fault" of sorts.  Make
 	 * sure that we end up in an architectural trap loop: clobber mtcc, so that
 	 * that trap attempts to vector to an untagged PCC, thereby causing another
 	 * trap, which immediately traps, and so on.
 	 *
 	 * We could instead zero mtdc, ensuring that we spin through several
 	 * instructions (taking a trap then running enough of exception_entry_asm
 	 * until we again trapped), but this is less architecturally visible.
 	 */
 	/*
 	 * Writing cnull to mtcc takes two instructions because cspecialw is an
 	 * alias for cspecialrw with a zero source, which means "don't write".  So,
 	 * put nullptr in a register with non-zero index, and then put that in mtcc.
 	 */
 	zeroOne            sp
 	cspecialw          mtcc, csp
 	// Take a trap and wedge the machine on that null MTCC
 	clc                csp, 0(csp)
 	j                  .Lexception_reentered

 .size exception_entry_asm, . - exception_entry_asm

 /*******************************************************************************
  * Switcher-exported library functions.
  *
  * These all provide some reflection on the switcher's state.
  *
  * At the moment, all of these avoid touching any registers except the argument
  * registers, which means that we can define an alternative calling convention
  * for them in the future to allow the compiler to preserve values in the
  * temporary registers across calls.
  *
  * These are all part of the switcher's PCC and so will be covered by the same
  * defence that the switcher has against being made to trap at unexpected
  * times: any trap in the switcher will force unwind the caller's trusted stack
  * frame.  As such, no trap here can leak data.
  *
  * These functions must not use the stack and must ensure that the clobber all
  * registers that hold sensitive state on the way out.
  ******************************************************************************/

 // Returns whether the trusted stack has space for N more calls.
 	.section .text, "ax", @progbits
 	.p2align 2
 	.type __Z23trusted_stack_has_spacei,@function
 __Z23trusted_stack_has_spacei:
 	/*
 	 * FROM: malice
 	 * IRQ ASSUME: deferred
 	 * LIVE IN: mtdc, callee-save, ra, a0
 	 *
 	 * Atlas:
 	 *  mtdc: pointer to TrustedStack (or nullptr if from buggy scheduler)
 	 *  ra: return pointer (guaranteed because this symbol is reachable only
 	 *      through an interrupt-disabling forward-arc sentry)
 	 *  a0: requested number of trusted stack frames
 	 */
 	li                 a2, TrustedStackFrame_size
 	mul                a2, a0, a2
 	// Atlas update: a2: requested number trusted stack frames, in bytes
 	/*
 	 * Load the trusted stack into the return register, so that we clobber it on
 	 * the way out.  Nothing here should trap, but if it does we'll forcibly
 	 * unwind (see .Lhandle_error_in_switcher) and also clobber this pointer.
 	 */
 	cspecialr          ca0, mtdc
 	/*
 	 * TrustedStack::frames[] is a FAM at the end of the structure, and
 	 * ::frameoffset codes for our current position therein (by counting bytes
 	 * relative to the start of the TrustedStack).  We have sufficiently many
 	 * frames if the TrustedStack length minus ::frameoffset is greater than
 	 * the requested number of bytes.
 	 */
 	clhu               a1, TrustedStack_offset_frameoffset(ca0)
 	// Atlas update: a1: this thread's TrustedStack::frameoffset
 	cgetlen            a0, ca0
 	// Atlas update: a0: length of this thread's TrustedStack
 	sub                a0, a0, a1
 	sltu               a0, a2, a0
 	// LIVE OUT: mtdc, a0
 	cret

 // Reveal the stack pointer given to this compartment invocation
 	.section .text, "ax", @progbits
 	.p2align 2
 	.type __Z22switcher_recover_stackv,@function
 __Z22switcher_recover_stackv:
 	/*
 	 * FROM: malice
 	 * IRQ ASSUME: deferred
 	 * LIVE IN: mtdc, callee-save, ra
 	 *
 	 * Atlas:
 	 *  mtdc: pointer to TrustedStack (or nullptr if buggy scheduler)
 	 *  ra: return pointer (guaranteed because this symbol is reachable only
 	 *      through an interrupt-disabling forward-arc sentry)
 	 */
 	/*
 	 * Load the trusted stack pointer into a register that we will clobber after
 	 * two instructions.
 	 */
 	cspecialr          ca0, mtdc
 	// Atlas update: a0: pointer to TrustedStack
 	clhu               a1, TrustedStack_offset_frameoffset(ca0)
 	// Atlas update: a1: TrustedStack::frameoffset
 	addi               a1, a1, -TrustedStackFrame_size
 	// Atlas update: a1: offset of current TrustedStackFrame
 	cincoffset         ca0, ca0, a1
 	// Atlas update: a0: pointer to current TrustedStackFrame
 	clc                ca0, TrustedStackFrame_offset_csp(ca0)
 	// Atlas update: a0: saved stack pointer at time of frame creation
 	/*
 	 * If this is the first frame, then the recovered stack will be the stack
 	 * on entry, and can be returned directly.
 	 */
 	li                 a2, TrustedStack_offset_frames
 	// Atlas update: a2: dead but exposed: TrustedStack_offset_frames
 	beq                a1, a2, 0f

 	/*
 	 * Otherwise, this is not the first frame, and the TrustedStackFrame::csp
 	 * value is pointing to the spills done at .Lswitch_entry_first_spill.  Redo
 	 * the stack chopping done at .Lswitch_stack_chop to recompute the bounds
 	 * we would have given to the callee.
 	 */
 	cgetaddr           a1, ca0
 	cgetbase           a2, ca0
 	sub                a1, a1, a2
 	csetaddr           ca0, ca0, a2
 	csetboundsexact    ca0, ca0, a1
 	/*
 	 * Atlas update:
 	 *  a1: dead but exposed: the length of the stack
 	 *  a2: dead but exposed: base address of the stack
 	 */
 0:
 	// LIVE OUT: mtdc, a0
 	cret

 	.section .text, "ax", @progbits
 	.p2align 2
 	.type __Z30trusted_stack_interrupt_threadPv,@function
 __Z25switcher_interrupt_threadPv:
 	/*
 	 * FROM: malice
 	 * IRQ ASSUME: deferred
 	 * LIVE IN: mtdc, callee-save, ra, a0
 	 *
 	 * Atlas:
 	 *   mtdc: pointer to TrustedStack (or nullptr if buggy scheduler)
 	 *   a0: sealed pointer to target thread TrustedStack
 	 *   ra: return pointer (guaranteed because this symbol is reachable only
 	 *       through an interrupt-disabling forward-arc sentry)
 	 */
 	/*
 	 * Because this function involves looking across two threads' states, it
 	 * needs to run with preemption prohibited, and that means IRQs deferred.
 	 */

 	// Load the unsealing key
 	LoadCapPCC         ca1, .Lsealing_key_trusted_stacks
 	/*
 	 * The target capability is in ca0.  Unseal, clobbering our authority;
 	 * check tag; and load the entry point offset.
 	 */
 	cunseal            ca1, ca0, ca1
 	// Atlas update: a1: unsealed pointer to target thread TrustedStack
 	/*
 	 * LOCAL SEAL: Nothing herein depends on a1 being GL(obal).
 	 */
 	cgettag            a0, ca1
 	// a0 (return register) now contains the tag.  We return false on failure
 	// so can just branch to the place where we zero non-return registers from
 	// here and it will contain faluse on failure.
 	beqz               a0, .Lswitcher_interrupt_thread_return

 	// A thread can't interrupt itself, return failure if it tries.
 	cspecialr          ca2, mtdc
 	li                 a0, 0
 	beq                a2, a1, .Lswitcher_interrupt_thread_return
 	// Atlas update: a2: unsealed pointer to current thread TrustedStack

 	/*
 	 * We allow the target thread to be interrupted if (and only if) the caller
 	 * is in the same compartment as the interrupted thread.  We will determine
 	 * this by checking if the base of the two export table entries from the
 	 * top of the trusted stack frames match.
 	 */

 // Helper macro that loads the export table from the register containing the
 // trusted stack.  The two arguments must be different registers.
 .macro LoadExportTable result, trustedStack
 	clhu               \result, TrustedStack_offset_frameoffset(\trustedStack)
 	addi               \result, \result, -TrustedStackFrame_size
 	cincoffset         c\result, \trustedStack, \result
 	clc                c\result, TrustedStackFrame_offset_calleeExportTable(c\result)
 	cgetbase           \result, c\result
 .endm

 	LoadExportTable    a3, ca1
 	cspecialr          ca0, mtdc
 	LoadExportTable    a2, ca0

 	// ca1 now contains the unsealed capability for the target thread, a3
 	// contains the base of the export table entry for that thread, a2 the base
 	// of the export table for our thread.
 	li                 a0, 42

 	// If the two export table entries differ, return.
 	bne                a2, a3, .Lswitcher_interrupt_thread_return
 	// Atlas update: a1, a2, a3: dead (to be zeroed)

 	/*
 	 * Mark the thread as interrupted.  Store a magic value in mcause.  This
 	 * value will not be overwritten by a trap before the scheduler sees the
 	 * target thread, since we are on core and it isn't.
 	 */
 	li                 a2, MCAUSE_THREAD_INTERRUPT
 	csw                a2, TrustedStack_offset_mcause(ca1)
 	// Return success
 	li                 a0, 1
 .Lswitcher_interrupt_thread_return:
 	zeroRegisters      a1, a2, a3
 	cret

 // Get a sealed pointer to the current thread's TrustedStack
 	.section .text, "ax", @progbits
 	.p2align 2
 	.type __Z23switcher_current_threadv,@function
 __Z23switcher_current_threadv:
 	/*
 	 * FROM: malice
 	 * IRQ ASSUME: deferred
 	 * LIVE IN: mtdc, callee-save, ra
 	 *
 	 * Atlas:
 	 *   mtdc: pointer to TrustedStack (or nullptr if buggy scheduler)
 	 *   ra: return pointer (guaranteed because this symbol is reachable only
 	 *       through an interrupt-disabling forward-arc sentry)
 	 */

 	LoadCapPCC         ca0, .Lsealing_key_trusted_stacks
 	// Atlas update: a0: sealing authority for trusted stacks
 	cspecialr          ca1, mtdc
 	// Atlas update: a1: copy of mtdc
 	cseal              ca0, ca1, ca0
 	li                 a1, 0
 	/*
 	 * Atlas update:
 	 *   a0: sealed copy of mtdc, this thread's TrustedStack
 	 *   a1: zero
 	 */
 	cret

 // Get a pointer to this thread's hazard pointers array
 	.section .text, "ax", @progbits
 	.p2align 2
 	.type __Z28switcher_thread_hazard_slotsv,@function
 __Z28switcher_thread_hazard_slotsv:
 	/*
 	 * FROM: malice
 	 * IRQ ASSUME: deferred
 	 * LIVE IN: mtdc, callee-save, ra
 	 *
 	 * Atlas:
 	 *   mtdc: pointer to TrustedStack (or nullptr if buggy scheduler)
 	 *   ra: return pointer (guaranteed because this symbol is reachable only
 	 *       through an interrupt-disabling forward-arc sentry)
 	 */

 	cspecialr          ca0, mtdc

 	// If this traps (from null mtdc, say), we'll forcibly unwind.
 	clc                ca0, TrustedStack_offset_hazardPointers(ca0)
 	// Atlas update: a0: pointer to hazard pointers

 	cret

 // Get the current thread's integer ID
 	.section .text, "ax", @progbits
 	.p2align 2
 	.type __Z13thread_id_getv,@function
 __Z13thread_id_getv:
 	/*
 	 * FROM: malice
 	 * IRQ ASSUME: deferred
 	 * LIVE IN: mtdc, callee-save, ra
 	 *
 	 * Atlas:
 	 *   mtdc: pointer to TrustedStack (or nullptr if buggy scheduler)
 	 *   ra: return pointer (guaranteed because this symbol is reachable only
 	 *       through an interrupt-disabling forward-arc sentry)
 	 */

 	cspecialr          ca0, mtdc
 	/*
 	 * If this is a null pointer, don't try to dereference it and report that
 	 * we are thread 0.  This permits the debug code to work even from things
 	 * that are not real threads.
 	 */
 	cgettag            a1, ca0
 	// Atlas update: a1: tag of a0/mtdc
 	beqz               a1, 0f
 	clh                a0, TrustedStack_offset_threadID(ca0)
 	// Atlas update: a0: integer ID of current thread
 0:
 	cret


 // Return the stack high-water mark
 	.section .text, "ax", @progbits
 	.p2align 2
 	.type __Z25stack_lowest_used_addressv,@function
 __Z25stack_lowest_used_addressv:
 	csrr               a0, CSR_MSHWM
 	cret

 // Reset the count of error handler invocations in this compartment invocation
 	.section .text, "ax", @progbits
 	.p2align 2
 	.type __Z39switcher_handler_invocation_count_resetv,@function
 __Z39switcher_handler_invocation_count_resetv:
 	/*
 	 * FROM: malice
 	 * IRQ ASSUME: deferred
 	 * LIVE IN: mtdc, callee-save, ra
 	 *
 	 * Atlas:
 	 *   mtdc: pointer to TrustedStack (or nullptr if buggy scheduler)
 	 *   ra: return pointer (guaranteed because this symbol is reachable only
 	 *       through an interrupt-disabling forward-arc sentry)
 	 */

 	cspecialr          ca1, mtdc
 	// Atlas update: a1: copy of mtdc
 	clhu               a0, TrustedStack_offset_frameoffset(ca1)
 	addi               a0, a0, -TrustedStackFrame_size
 	// Atlas update: a0: offset of the current trusted stack frame
 	cincoffset         ca1, ca1, a0
 	/*
 	 * Atlas update:
 	 *  a0: dead
 	 *  a1: pointer to current TrustedStack::frame
 	 */
 	clh                a0, TrustedStackFrame_offset_errorHandlerCount(ca1)
 	// Atlas update: a0: current invocation count (for return)
 	// Reset invocation count
 	csh                zero, TrustedStackFrame_offset_errorHandlerCount(ca1)
 	// Atlas update: a1: dead (to be zeroed)
 	li                 a1, 0
 	cret

 // The linker expects export tables to start with space for cgp and pcc, then
 // the compartment error handler.  We should eventually remove that requirement
 // for library export tables, but since they don't consume RAM after loading
 // it's not urgent.
 	.section	.compartment_export_table,"a",@progbits
 export_table_start:
 .space 20, 0

 /**
  * Helper that exports a switcher function as a library call.
  */
 .macro export function, prefix=__library_export_libcalls
 	.type	\prefix\function,@object
 	.globl	\prefix\function
 	.p2align	2
 \prefix\function:
 	.half	\function-switcher_code_start
 	// Number of registers to clear (ignored for library exports)
 	.byte	0
 	// Interrupts disabled.
 	.byte	16
 	.size	\prefix\function, 4
 .endm

 // Switcher entry point must be first.
 /*
  * We mangle the switcher export as if it were a compartment call, but see
  * loader/boot.cc's special handling of this entry.
  */
 export __Z26compartment_switcher_entryz, __export_switcher
 export __Z23trusted_stack_has_spacei
 export __Z22switcher_recover_stackv
 export __Z25switcher_interrupt_threadPv
 export __Z23switcher_current_threadv
 export __Z28switcher_thread_hazard_slotsv
 export __Z13thread_id_getv
 export __Z25stack_lowest_used_addressv
 export __Z39switcher_handler_invocation_count_resetv