Add the RISC-V entry point assembly to libtock_runtime, and a workflow to verify it is included correctly.
diff --git a/.github/workflows/assembly.yml b/.github/workflows/assembly.yml
new file mode 100644
index 0000000..3ac8ceb
--- /dev/null
+++ b/.github/workflows/assembly.yml
@@ -0,0 +1,51 @@
+# Builds the libtock_runtime entry point assembly, making sure the source code
+# and compiled library match. Outputs the library file as an artifact in case
+# users have a different toolchain version that produces a slightly different
+# library.
+
+name: assembly
+
+# We want to run this on all pull requests. Additionally, Bors needs workflows
+# to run on the `staging` and `trying` branches to block merges on them.
+on:
+  pull_request:
+  push:
+    branches:
+      - staging
+      - trying
+
+jobs:
+  assembly:
+    # Using ubuntu-latest can cause breakage when ubuntu-latest is updated to
+    # point at a new Ubuntu version. Instead, explicitly specify the version, so
+    # we can update when we need to. This *could* break if we don't update it
+    # until support for 20.04 is dropped, but it is likely we'll have a reason
+    # to update to a newer Ubuntu before then anyway.
+    runs-on: ubuntu-20.04
+
+    steps:
+      # Clones a single commit from the libtock-rs repository. The commit cloned
+      # is a merge commit between the PR's target branch and the PR's source.
+      # We'll later add another commit (the pre-merge target branch) to the
+      # repository.
+      - name: Clone repository
+        uses: actions/checkout@v2.3.0
+
+      # Build the entry point assembly.
+      - name: Build assembly
+        run: |
+          sudo apt-get install binutils-riscv64-linux-gnu
+          core/runtime/asm/assemble.sh
+
+      # Publish the built assembly library.
+      - name: Upload assembly library
+        uses: actions/upload-artifact@v2
+        with:
+          name: assembly
+          path: core/runtime/asm/*.a
+
+      # The main goal of this is to verify the built library matches the PR's
+      # library, although this check has the secondary benefit of catching any
+      # intermediate files left behind by assemble.sh
+      - name: Check assembly library diff
+        run: git diff --quiet
diff --git a/core/runtime/asm/asm_riscv32.S b/core/runtime/asm/asm_riscv32.S
new file mode 100644
index 0000000..c3f42c9
--- /dev/null
+++ b/core/runtime/asm/asm_riscv32.S
@@ -0,0 +1,57 @@
+.section .start, "ax"
+.globl start
+start:
+	/* First, verify the process binary was loaded at the correct address. The
+	 * check is performed by comparing the program counter at the start to the
+	 * address of `start`, which is stored in rt_header. */
+	auipc s0, 0            /* s0 = pc */
+	mv a5, a0              /* Save rt_header so syscalls don't overwrite it */
+	lw s1, 0(a5)           /* s1 = rt_header.start */
+	beq s0, s1, .Lset_brk  /* Skip error handling code if pc is correct */
+	/* If the beq on the previous line did not jump, then the binary is not at
+	 * the correct location. Report the error via LowLevelDebug then exit. */
+	li a0, 8  /* LowLevelDebug driver number */
+	li a1, 1  /* Command: Print alert code */
+	li a2, 2  /* Alert code 2 (incorrect location) */
+	li a4, 2  /* `command` class */
+	ecall
+	li a0, 0  /* exit-terminate */
+	/* TODO: Set a completion code, once completion codes are decided */
+	li a4, 6  /* `exit` class */
+	ecall
+
+	.Lset_brk:
+	/* memop(): set brk to rt_header's initial break value */
+	li a0, 0      /* operation: set break */
+	lw a1, 4(a5)  /* rt_header's initial process break */
+	li a4, 5      /* `memop` class */
+	ecall
+
+	/* Set the stack pointer */
+	lw sp, 8(a5)  /* sp = rt_header._stack_top */
+
+	/* Copy .data into place. */
+	lw a0, 12(a5)              /* remaining = rt_header.data_size */
+	beqz a0, .Lzero_bss        /* Jump to zero_bss if remaining is zero */
+	lw a1, 16(a5)              /* src = rt_header.data_flash_start */
+	lw a2, 20(a5)              /* dest = rt_header.data_ram_start */
+	.Ldata_loop_body:
+	lw a3, 0(a1)               /* a3 = *src */
+	sw a3, 0(a2)               /* *dest = a3 */
+	c.addi a0, -4              /* remaining -= 4 */
+	c.addi a1, 4               /* src += 4 */
+	c.addi a2, 4               /* dest += 4 */
+	bnez a0, .Ldata_loop_body  /* Iterate again if remaining != 0 */
+
+	.Lzero_bss:
+	lw a0, 24(a5)               /* remaining = rt_header.bss_size */
+	beqz a0, .Lcall_rust_start  /* Jump to call_Main if remaining is zero */
+	lw a1, 28(a5)               /* dest = rt_header.bss_start */
+	.Lbss_loop_body:
+	sb zero, 0(a1)              /* *dest = zero */
+	c.addi a0, -1               /* remaining -= 1 */
+	c.addi a1, 1                /* dest += 1 */
+	bnez a0, .Lbss_loop_body    /* Iterate again if remaining != 0 */
+
+	.Lcall_rust_start:
+	jal rust_start
diff --git a/core/runtime/asm/assemble.sh b/core/runtime/asm/assemble.sh
new file mode 100755
index 0000000..d07e2ab
--- /dev/null
+++ b/core/runtime/asm/assemble.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+set -e
+
+# Switch into the directory this script is in, in case it was run from another
+# location.
+cd "$(dirname "$0")"
+
+# Our CI runs in GitHub Actions' Ubuntu 20.04 image. The only RISC-V toolchain
+# in Ubuntu 20.04's repositories is for riscv64-linux-gnu. Fortunately, this
+# toolchain can output 32-bit RISC-V assembly using the -march= option, and the
+# fact it is targeted at GNU/Linux doesn't matter for our short handwritten
+# assembly segment.
+#
+# Although we also support rv32imac targets, we do not need to separately
+# assemble for it, as asm_riscv32.S does not use atomic instructions.
+riscv64-linux-gnu-as -march=rv32imc asm_riscv32.S -o riscv32.o
+# For some reason, riscv64-linux-gnu-as includes local symbols in its output.
+# This pollutes the output of `objdump`, making debugging more difficult. This
+# strips the extra symbols to keep the disassembly readable.
+riscv64-linux-gnu-strip -K start -K rust_start riscv32.o
+
+# Remove the archive file in case there is something unexpected in it (so that
+# issues cannot persist across calls to this script).
+rm -f libriscv32.a
+# c == do not complain if archive needs to be created
+# r == insert or replace file in archive
+riscv64-linux-gnu-ar cr libriscv32.a riscv32.o
+# Remove riscv32.o as it is an intermediate build artifact.
+rm riscv32.o
diff --git a/core/runtime/asm/libriscv32.a b/core/runtime/asm/libriscv32.a
new file mode 100644
index 0000000..f6139a7
--- /dev/null
+++ b/core/runtime/asm/libriscv32.a
Binary files differ
diff --git a/core/runtime/layout_generic.ld b/core/runtime/layout_generic.ld
index 0bbef87..38624a0 100644
--- a/core/runtime/layout_generic.ld
+++ b/core/runtime/layout_generic.ld
@@ -1,171 +1,124 @@
-/* Userland Generic Layout
+/* Layout file for Tock process binaries that use libtock-rs. This currently
+ * implements static linking, because we do not have a working
+ * position-independent relocation solution. This layout works for all
+ * platforms libtock-rs supports (ARM and RISC-V).
  *
- * Currently, due to incomplete ROPI-RWPI support in rustc (see
- * https://github.com/tock/libtock-rs/issues/28), this layout implements static
- * linking. An application init script must define the FLASH and SRAM address
- * ranges as well as MPU_MIN_ALIGN before including this layout file.
- *
- * Here is a an example application linker script to get started:
+ * This layout should be included by a script that defines the FLASH and RAM
+ * regions for the board as well as TBF_HEADER_SIZE. Here is a an example
+ * process binary linker script to get started:
  *     MEMORY {
- *         /* FLASH memory region must start immediately *after* the Tock
- *          * Binary Format headers, which means you need to offset the
- *          * beginning of FLASH memory region relative to where the
- *          * application is loaded.
- *         FLASH (rx) : ORIGIN = 0x10030, LENGTH = 0x0FFD0
- *         SRAM (RWX) : ORIGIN = 0x20000, LENGTH = 0x10000
+ *         FLASH (X) : ORIGIN = 0x10000, LENGTH = 0x10000
+ *         RAM   (W) : ORIGIN = 0x20000, LENGTH = 0x10000
  *     }
- *     MPU_MIN_ALIGN = 8K;
+ *     TBF_HEADER_SIZE = 0x40;
  *     INCLUDE ../libtock-rs/layout.ld
+ *
+ * FLASH refers to the area the process binary occupies in flash, including TBF
+ * headers. RAM refers to the area the process will have access to in memory.
+ * STACK_SIZE is the size of the process' stack (this layout file may round the
+ * stack size up for alignment purposes). TBF_HEADER_SIZE must correspond to the
+ * --protected-region-size flag passed to elf2tab.
+ *
+ * This places the flash sections in the following order:
+ *     1. .rt_header -- Constants used by runtime initialization.
+ *     2. .text      -- Executable code.
+ *     3. .rodata    -- Read-only global data (e.g. most string constants).
+ *     4. .data      -- Read-write data, copied to RAM at runtime.
+ *
+ * This places the RAM sections in the following order:
+ *     1. .stack -- The stack grows downward. Putting it first gives us
+ *                  MPU-based overflow detection.
+ *     2. .data  -- Read-write data, initialized by copying from flash.
+ *     3. .bss   -- Zero-initialized read-write global data.
+ *     4. Heap   -- The heap (optional) comes after .bss and grows upwards to
+ *                  the process break.
  */
 
-ENTRY(_start)
+/* GNU LD looks for `start` as an entry point by default, while LLVM's LLD looks
+ * for `_start`. To be compatible with both, we manually specify an entry point.
+ */
+ENTRY(start)
 
 SECTIONS {
-    /* Section for just the app crt0 header.
-     * This must be first so that the app can find it.
+    /* Sections located in FLASH at runtime.
      */
-    .crt0_header :
-    {
-        _beginning = .; /* Start of the app in flash. */
-        /**
-         * Populate the header expected by `crt0`:
-         *
-         *  struct hdr {
-         *    uint32_t got_sym_start;
-         *    uint32_t got_start;
-         *    uint32_t got_size;
-         *    uint32_t data_sym_start;
-         *    uint32_t data_start;
-         *    uint32_t data_size;
-         *    uint32_t bss_start;
-         *    uint32_t bss_size;
-         *    uint32_t reldata_start;
-         *    uint32_t stack_size;
-         *  };
-         */
-        /* Offset of GOT symbols in flash */
-        LONG(LOADADDR(.got) - _beginning);
-        /* Offset of GOT section in memory */
-        LONG(_got);
-        /* Size of GOT section */
-        LONG(SIZEOF(.got));
-        /* Offset of data symbols in flash */
-        LONG(LOADADDR(.data) - _beginning);
-        /* Offset of data section in memory */
-        LONG(_data);
-        /* Size of data section */
+
+    /* Add a section where elf2tab will place the TBF headers, so that the rest
+     * of the FLASH sections are in the right locations. */
+    .tbf_header (NOLOAD) : {
+        . = . + TBF_HEADER_SIZE;
+    } > FLASH
+
+    /* Runtime header. Contains values the linker knows that the runtime needs
+     * to look up.
+     */
+    .rt_header : {
+        rt_header = .;
+        LONG(start);
+        LONG(ADDR(.bss) + SIZEOF(.bss)); /* Initial process break */
+        LONG(_stack_top);
         LONG(SIZEOF(.data));
-        /* Offset of BSS section in memory */
-        LONG(_bss);
-        /* Size of BSS section */
+        LONG(LOADADDR(.data));
+        LONG(ADDR(.data));
         LONG(SIZEOF(.bss));
-        /* First address offset after program flash, where elf2tab places
-         * .rel.data section */
-        LONG(LOADADDR(.endflash) - _beginning);
-        /* The size of the stack requested by this application */
-        LONG(_stack_top_aligned - _sstack);
-        /* Pad the header out to a multiple of 32 bytes so there is not a gap
-         * between the header and subsequent .data section. It's unclear why,
-         * but LLD is aligning sections to a multiple of 32 bytes. */
-        . = ALIGN(32);
-    } > FLASH =0xFF
+        LONG(ADDR(.bss));
+    } > FLASH
 
-    /* Text section, Code! */
-    .text :
-    {
-        . = ALIGN(4);
-        _text = .;
-        KEEP (*(.start))
-        *(.text*)
-        *(.rodata*)
-        KEEP (*(.syscalls))
-        *(.ARM.extab*)
-        . = ALIGN(4); /* Make sure we're word-aligned here */
-        _etext = .;
-    } > FLASH =0xFF
+    /* Text section -- the application's code. */
+    .text ALIGN(4) : {
+        *(.start)
+        *(.text)
+    } > FLASH
 
-    /* Application stack */
-    .stack (NOLOAD) :
-    {
-        /* elf2tab requires that the `_sram_origin` symbol be present to
-         * mark the first address in the SRAM memory. Since ELF files do
-         * not really need to specify this address as they only care about
-         * loading into flash, we need to manually mark this address for
-         * elf2tab. elf2tab will use it to add a fixed address header in the
-         * TBF header if needed.
+    /* Read-only data section. Contains strings and other global constants. */
+    .rodata ALIGN(4) : {
+        *(.rodata)
+        /* .data is placed after .rodata in flash. data_flash_start is used by
+         * AT() to place .data in flash as well as in rt_header.
          */
-        _sram_origin = .;
-        _sstack = .;
+        _data_flash_start = .;
+    } > FLASH
+
+    /* Sections located in RAM at runtime.
+     */
+
+    /* Reserve space for the stack. Aligned to a multiple of 16 bytes for the
+     * RISC-V calling convention:
+     * https://riscv.org/wp-content/uploads/2015/01/riscv-calling.pdf
+     */
+    .stack (NOLOAD) : {
         KEEP(*(.stack_buffer))
-        _stack_top_unaligned = .;
-        . = ALIGN(8);
-        _stack_top_aligned = .;
-    } > SRAM
+        _stack_top = ALIGN(16);  /* Used in rt_header */
+    } > RAM
 
-    /* Data section, static initialized variables
-     *  Note: This is placed in Flash after the text section, but needs to be
-     *  moved to SRAM at runtime
+    /* Read-write data section. This is deployed as part of FLASH but is copied
+     * into RAM at runtime.
      */
-    .data : AT (_etext)
-    {
-        . = ALIGN(4); /* Make sure we're word-aligned here */
-        _data = .;
-        KEEP(*(.data*))
-        *(.sdata*) /* RISC-V small-pointer data section */
-        . = ALIGN(4); /* Make sure we're word-aligned at the end of flash */
-    } > SRAM
-
-    /* Global Offset Table */
-    .got :
-    {
-        . = ALIGN(4); /* Make sure we're word-aligned here */
-        _got = .;
-        *(.got*)
-        *(.got.plt*)
+    .data ALIGN(4) : AT(_data_flash_start) {
+        data_ram_start = .;
+        /* .sdata is the RISC-V small data section */
+        *(.sdata .data)
+        /* Pad to word alignment so the relocation loop can use word-sized
+         * copies.
+         */
         . = ALIGN(4);
-    } > SRAM
+    } > RAM
 
-    /* BSS section, static uninitialized variables */
-    .bss :
-    {
-        . = ALIGN(4); /* Make sure we're word-aligned here */
-        _bss = .;
-        KEEP(*(.bss* .sbss*))
-        *(COMMON)
-        . = ALIGN(4);
-    } > SRAM
-
-    /* End of flash. */
-    .endflash :
-    {
-    } > FLASH
-
-    /* ARM Exception support
-     *
-     * This contains compiler-generated support for unwinding the stack,
-     * consisting of key-value pairs of function addresses and information on
-     * how to unwind stack frames.
-     * https://wiki.linaro.org/KenWerner/Sandbox/libunwind?action=AttachFile&do=get&target=libunwind-LDS.pdf
-     *
-     * .ARM.exidx is sorted, so has to go in its own output section.
-     *
-     * __NOTE__: It's at the end because we currently don't actually serialize
-     * it to the binary in elf2tbf. If it was before the RAM sections, it would
-     * through off our calculations of the header.
+    /* BSS section. These are zero-initialized static variables. This section is
+     * not copied from FLASH into RAM but rather directly initialized, and is
+     * mainly put in this linker script so that we get an error if it overflows
+     * the RAM region.
      */
-    PROVIDE_HIDDEN (__exidx_start = .);
-    .ARM.exidx :
-    {
-      /* (C++) Index entries for section unwinding */
-      *(.ARM.exidx* .gnu.linkonce.armexidx.*)
-    } > FLASH
-    PROVIDE_HIDDEN (__exidx_end = .);
+    .bss ALIGN(4) (NOLOAD) : {
+        /* .sbss is the RISC-V small data section */
+        *(.sbss .bss)
+    } > RAM
 
+    _heap_start = ADDR(.bss) + SIZEOF(.bss);  /* Used by rt_header */
+
+    /* Sections we do not need. */
     /DISCARD/ :
     {
-      *(.eh_frame)
+      *(.ARM.exidx .eh_frame)
     }
 }
-
-ASSERT((_stack_top_aligned - _stack_top_unaligned) == 0, "
-STACK_SIZE must be 8 byte multiple")