More asm comments, riscv32-unknown-elf support, action cleanup, stack align fix.
diff --git a/.github/workflows/mac_os.yml b/.github/workflows/mac_os.yml
index 3124b35..79bb1f2 100644
--- a/.github/workflows/mac_os.yml
+++ b/.github/workflows/mac_os.yml
@@ -18,17 +18,10 @@
     steps:
       # Clones a single commit from the libtock-rs repository. The commit cloned
       # is a merge commit between the PR's target branch and the PR's source.
-      # Note that we checkout submodules so that we can invoke Tock's CI setup
-      # scripts, but we do not recursively checkout submodules as we need Tock's
-      # makefile to set up the qemu submodule itself.
       - name: Clone repository
         uses: actions/checkout@v2.3.0
-        with:
-          submodules: true
 
-      # Install the toolchains we need, the run the Makefile's test action. We
-      # let the makefile do most of the work because the makefile can be tested
-      # locally. Using -j2 because the Actions VMs have 2 cores.
+      # Install the toolchains we need, then run `cargo build`.
       - name: Build and Test
         run: |
           brew tap riscv/riscv
diff --git a/core/runtime/asm/asm_riscv32.S b/core/runtime/asm/asm_riscv32.S
index d95ca12..db1036e 100644
--- a/core/runtime/asm/asm_riscv32.S
+++ b/core/runtime/asm/asm_riscv32.S
@@ -1,3 +1,31 @@
+/* rt_header is defined by the general linker script (libtock_layout.ld). It has
+ * the following layout:
+ *
+ *     Field                       | Offset
+ *     ------------------------------------
+ *     Address of the start symbol |      0
+ *     Initial process break       |      4
+ *     Top of the stack            |      8
+ *     Size of .data               |     12
+ *     Start of .data in flash     |     16
+ *     Start of .data in ram       |     20
+ *     Size of .bss                |     24
+ *     Start of .bss in ram        |     28
+ */
+
+/* start is the entry point -- the first code executed by the kernel. The kernel
+ * passes arguments through 4 registers:
+ *
+ *     a0  Pointer to beginning of the process binary's code. The linker script
+ *         locates rt_header at this address.
+ *
+ *     a1  Address of the beginning of the process's usable memory region.
+ *     a2  Size of the process' allocated memory region (including grant region)
+ *     a3  Process break provided by the kernel.
+ *
+ * We currently only use the value in a0. It is copied into a5 early on because
+ * a0-a4 are needed to invoke system calls.
+ */
 .section .start, "ax"
 .globl start
 start:
diff --git a/core/runtime/extern_asm.rs b/core/runtime/extern_asm.rs
index 1f9419a..d0488f8 100644
--- a/core/runtime/extern_asm.rs
+++ b/core/runtime/extern_asm.rs
@@ -12,14 +12,21 @@
     let build_configs = match arch.as_str() {
         "riscv32" => &[
             // First try riscv64-unknown-elf, as it is the toolchain used by
-            // libtock-c.
+            // libtock-c and the toolchain used in the CI environment.
             AsmBuildConfig {
                 triple: "riscv64-unknown-elf",
                 as_extra_args: &["-march=rv32imc"],
                 strip: true,
             },
-            // Second try riscv64-linux-gnu, as riscv64-unknown-elf is
-            // unavailable on Debian 10.
+            // Second try riscv32-unknown-elf. This is the best match for Tock's
+            // risc-v targets, but is not as widely available (and has not been
+            // tested with libtock-rs yet).
+            AsmBuildConfig {
+                triple: "riscv32-unknown-elf",
+                as_extra_args: &[],
+                strip: false, // Untested, may need to change.
+            },
+            // Last try riscv64-linux-gnu, as it is the only option on Debian 10
             AsmBuildConfig {
                 triple: "riscv64-linux-gnu",
                 as_extra_args: &["-march=rv32imc"],
diff --git a/core/runtime/libtock_layout.ld b/core/runtime/libtock_layout.ld
index 38624a0..fcff09a 100644
--- a/core/runtime/libtock_layout.ld
+++ b/core/runtime/libtock_layout.ld
@@ -34,6 +34,10 @@
  *                  the process break.
  */
 
+/* TODO: Should TBF_HEADER_SIZE be configured via a similar mechanism to the
+ * stack size? We should see if that is possible.
+ */
+
 /* GNU LD looks for `start` as an entry point by default, while LLVM's LLD looks
  * for `_start`. To be compatible with both, we manually specify an entry point.
  */
@@ -88,7 +92,8 @@
      */
     .stack (NOLOAD) : {
         KEEP(*(.stack_buffer))
-        _stack_top = ALIGN(16);  /* Used in rt_header */
+        . = ALIGN(16);
+        _stack_top = .;  /* Used in rt_header */
     } > RAM
 
     /* Read-write data section. This is deployed as part of FLASH but is copied