[otbn] Split instruction groups into their own YAML files No functional change: this is just trying to tame the enormous insns.yml (whose data will grow further as we add more documentation or other instruction information). Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>

commit: 124c94faac8c98a86208f400540ecb94c6f0375d [log] [tgz]
author: Rupert Swarbrick <rswarbrick@lowrisc.org> Thu Aug 20 12:49:19 2020 +0100
committer: Rupert Swarbrick <rswarbrick@gmail.com> Thu Aug 20 13:52:10 2020 +0100
tree: b6016c7603a72c4037fedf4e18d9431592bc6d10
parent: 9d04bdadbb15d6db7a2542cdba34ae9f411adacd [diff]
diff --git a/hw/ip/otbn/data/base-insns.yml b/hw/ip/otbn/data/base-insns.yml
new file mode 100644
index 0000000..92afbba
--- /dev/null
+++ b/hw/ip/otbn/data/base-insns.yml

@@ -0,0 +1,468 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+# Definitions for the base group of instructions. See insns.yml for
+# the detailed format.
+
+- mnemonic: add
+  rv32i: true
+  synopsis: Add
+  operands: [grd, grs1, grs2]
+  encoding:
+    scheme: R
+    mapping:
+      funct7: b0000000
+      rs2: grs2
+      rs1: grs1
+      funct3: b000
+      rd: grd
+      opcode: b01100
+
+- mnemonic: addi
+  rv32i: true
+  synopsis: Add Immediate
+  operands: [grd, grs1, imm]
+  encoding:
+    scheme: I
+    mapping:
+      imm: imm
+      rs1: grs1
+      funct3: b000
+      rd: grd
+      opcode: b00100
+
+- mnemonic: lui
+  rv32i: true
+  synopsis: Load Upper Immediate
+  operands:
+    - grd
+    - name: imm
+      type: uimm
+  encoding:
+    scheme: U
+    mapping:
+      imm: imm
+      rd: grd
+      opcode: b01101
+
+- mnemonic: sub
+  rv32i: true
+  synopsis: Subtract
+  operands: [grd, grs1, grs2]
+  encoding:
+    scheme: R
+    mapping:
+      funct7: b0100000
+      rs2: grs2
+      rs1: grs1
+      funct3: b000
+      rd: grd
+      opcode: b01100
+
+- mnemonic: sll
+  rv32i: true
+  synopsis: Logical left shift
+  operands: [grd, grs1, grs2]
+  encoding:
+    scheme: R
+    mapping:
+      funct7: b0000000
+      rs2: grs2
+      rs1: grs1
+      funct3: b001
+      rd: grd
+      opcode: b01100
+
+- mnemonic: slli
+  rv32i: true
+  synopsis: Logical left shift with Immediate
+  operands:
+    - grd
+    - grs1
+    - &shamt-operand
+      name: shamt
+      type: uimm
+  encoding:
+    scheme: Is
+    mapping:
+      arithmetic: b0
+      shamt: shamt
+      rs1: grs1
+      funct3: b001
+      rd: grd
+      opcode: b00100
+
+- mnemonic: srl
+  rv32i: true
+  synopsis: Logical right shift
+  operands: [grd, grs1, grs2]
+  encoding:
+    scheme: R
+    mapping:
+      funct7: b0000000
+      rs2: grs2
+      rs1: grs1
+      funct3: b101
+      rd: grd
+      opcode: b01100
+
+- mnemonic: srli
+  rv32i: true
+  synopsis: Logical right shift with Immediate
+  operands:
+    - grd
+    - grs1
+    - *shamt-operand
+  encoding:
+    scheme: Is
+    mapping:
+      arithmetic: b0
+      shamt: shamt
+      rs1: grs1
+      funct3: b101
+      rd: grd
+      opcode: b00100
+
+- mnemonic: sra
+  rv32i: true
+  synopsis: Arithmetic right shift
+  operands: [grd, grs1, grs2]
+  encoding:
+    scheme: R
+    mapping:
+      funct7: b0100000
+      rs2: grs2
+      rs1: grs1
+      funct3: b101
+      rd: grd
+      opcode: b01100
+
+- mnemonic: srai
+  rv32i: true
+  synopsis: Arithmetic right shift with Immediate
+  operands:
+    - grd
+    - grs1
+    - *shamt-operand
+  encoding:
+    scheme: Is
+    mapping:
+      arithmetic: b1
+      shamt: shamt
+      rs1: grs1
+      funct3: b101
+      rd: grd
+      opcode: b00100
+
+- mnemonic: and
+  rv32i: true
+  synopsis: Bitwise AND
+  operands: [grd, grs1, grs2]
+  encoding:
+    scheme: R
+    mapping:
+      funct7: b0000000
+      rs2: grs2
+      rs1: grs1
+      funct3: b111
+      rd: grd
+      opcode: b01100
+
+- mnemonic: andi
+  rv32i: true
+  synopsis: Bitwise AND with Immediate
+  operands: [grd, grs1, imm]
+  encoding:
+    scheme: I
+    mapping:
+      imm: imm
+      rs1: grs1
+      funct3: b111
+      rd: grd
+      opcode: b00100
+
+- mnemonic: or
+  rv32i: true
+  synopsis: Bitwise OR
+  operands: [grd, grs1, grs2]
+  encoding:
+    scheme: R
+    mapping:
+      funct7: b0000000
+      rs2: grs2
+      rs1: grs1
+      funct3: b110
+      rd: grd
+      opcode: b01100
+
+- mnemonic: ori
+  rv32i: true
+  synopsis: Bitwise OR with Immediate
+  operands: [grd, grs1, imm]
+  encoding:
+    scheme: I
+    mapping:
+      imm: imm
+      rs1: grs1
+      funct3: b110
+      rd: grd
+      opcode: b00100
+
+- mnemonic: xor
+  rv32i: true
+  synopsis: Bitwise XOR
+  operands: [grd, grs1, grs2]
+  encoding:
+    scheme: R
+    mapping:
+      funct7: b0000000
+      rs2: grs2
+      rs1: grs1
+      funct3: b100
+      rd: grd
+      opcode: b01100
+
+- mnemonic: xori
+  rv32i: true
+  synopsis: Bitwise XOR with Immediate
+  operands: [grd, grs, imm]
+  encoding:
+    scheme: I
+    mapping:
+      imm: imm
+      rs1: grs
+      funct3: b100
+      rd: grd
+      opcode: b00100
+
+- mnemonic: lw
+  rv32i: true
+  synopsis: Load Word
+  operands: [grd, offset, grs1]
+  syntax: <grd>, <offset>(<grs1>)
+  encoding:
+    scheme: I
+    mapping:
+      imm: offset
+      rs1: grs1
+      funct3: b010
+      rd: grd
+      opcode: b00000
+  doc: |
+    Load a 32b word from address `<offset> + <grs1>` in data memory, writing the result to `<grd>`.
+    Unaligned loads are not supported.
+    Any address that is unaligned or is above the top of memory will result in an error (with error code `ErrCodeBadDataAddr`).
+  lsu:
+    type: mem-load
+    target: [offset, grs1]
+    bytes: 4
+
+- mnemonic: sw
+  rv32i: true
+  synopsis: Store Word
+  operands: [grs2, offset, grs1]
+  syntax: <grs2>, <offset>(<grs1>)
+  encoding:
+    scheme: S
+    mapping:
+      imm: offset
+      rs2: grs2
+      rs1: grs1
+      funct3: b010
+      opcode: b01000
+  doc: |
+    Store a 32b word in `<grs2>` to address `<offset> + <grs1>` in data memory.
+    Unaligned stores are not supported.
+    Any address that is unaligned or is above the top of memory will result in an error (with error code `ErrCodeBadDataAddr`).
+  lsu:
+    type: mem-store
+    target: [offset, grs1]
+    bytes: 4
+
+- mnemonic: beq
+  rv32i: true
+  synopsis: Branch Equal
+  operands: [grs1, grs2, offset]
+  straight-line: false
+  encoding:
+    scheme: B
+    mapping:
+      imm: offset
+      rs2: grs2
+      rs1: grs1
+      funct3: b000
+      opcode: b11000
+
+- mnemonic: bne
+  rv32i: true
+  synopsis: Branch Not Equal
+  operands: [grs1, grs2, offset]
+  straight-line: false
+  encoding:
+    scheme: B
+    mapping:
+      imm: offset
+      rs2: grs2
+      rs1: grs1
+      funct3: b001
+      opcode: b11000
+
+- mnemonic: jal
+  rv32i: true
+  synopsis: Jump And Link
+  operands: [grd, offset]
+  straight-line: false
+  trailing-doc: |
+    The JAL instruction has the same behavior as in RV32I, jumping by the given offset and writing `PC+4` as a link address to the destination register.
+    OTBN has a hardware managed call stack, accessed through `x1`, which should be used when calling subroutines.
+    Do so by using `x1` as the link register: `jal x1, <offset>`.
+  encoding:
+    scheme: J
+    mapping:
+      imm: offset
+      rd: grd
+      opcode: b11011
+
+- mnemonic: jalr
+  rv32i: true
+  synopsis: Jump And Link Register
+  operands: [grd, grs1, offset]
+  straight-line: false
+  trailing-doc: |
+    The JALR instruction has the same behavior as in RV32I, jumping by `<grs1> + <offset>` and writing `PC+4` as a link address to the destination register.
+    OTBN has a hardware managed call stack, accessed through `x1`, which should be used when calling and returning from subroutines.
+    To return from a subroutine, use `jalr x0, x1, 0`.
+    This pops a link address from the call stack and branches to it.
+    To call a subroutine through a function pointer, use `jalr x1, <grs1>, 0`.
+    This jumps to the address in `<grs1>` and pushes the link address onto the call stack.
+  encoding:
+    scheme: I
+    mapping:
+      imm: offset
+      rs1: grs1
+      funct3: b000
+      rd: grd
+      opcode: b11001
+
+- mnemonic: csrrs
+  rv32i: true
+  synopsis: Atomic Read and Set bits in CSR
+  operands: [grd, csr, grs]
+  encoding:
+    scheme: I
+    mapping:
+      imm: csr
+      rs1: grs
+      funct3: b010
+      rd: grd
+      opcode: b11100
+  lsu:
+    type: csr
+    target: [csr]
+
+- mnemonic: csrrw
+  rv32i: true
+  synopsis: Atomic Read/Write CSR
+  operands: [grd, csr, grs]
+  encoding:
+    scheme: I
+    mapping:
+      imm: csr
+      rs1: grs
+      funct3: b001
+      rd: grd
+      opcode: b11100
+  lsu:
+    type: csr
+    target: [csr]
+
+- mnemonic: ecall
+  rv32i: true
+  synopsis: Environment Call
+  operands: []
+  straight-line: false
+  doc: |
+    Triggers the `done` interrupt to indicate the completion of the
+    operation.
+  encoding:
+    scheme: I
+    mapping:
+      imm: b000000000000
+      rs1: b00000
+      funct3: b000
+      rd: b00000
+      opcode: b11100
+
+- mnemonic: loop
+  synopsis: Loop (indirect)
+  operands:
+    - name: grs
+      doc: Name of the GPR containing the number of iterations
+    - &bodysize-operand
+      name: bodysize
+      type: uimm
+      doc: Number of instructions in the loop body
+  straight-line: false
+  note: &loop-note |
+    The LOOP and LOOPI instructions are under-specified, and improvements
+    to them are being discussed. See
+    https://github.com/lowRISC/opentitan/issues/2496 for up-to-date
+    information.
+  doc: |
+    Repeat a sequence of code multiple times. The number of iterations is
+    read from `<grs>`, treated as an unsigned value. The number of
+    instructions in the loop is given in the `<bodysize>` immediate.
+  encoding:
+    scheme: loop
+    mapping:
+      bodysize: bodysize
+      grs: grs
+
+- mnemonic: loopi
+  synopsis: Loop Immediate
+  operands:
+    - name: iterations
+      type: uimm
+      doc: Number of iterations
+    - *bodysize-operand
+  straight-line: false
+  note: *loop-note
+  doc: |
+    Repeat a sequence of code multiple times. The `<iterations>`
+    unsigned immediate operand gives the number of iterations and
+    the `<bodysize>` unsigned immediate operand gives the number of
+    instructions in the body.
+  encoding:
+    scheme: loopi
+    mapping:
+      bodysize: bodysize
+      iterations: iterations
+
+- mnemonic: nop
+  synopsis: No Operation
+  rv32i: true
+  operands: []
+  doc: A pseudo-operation that has no effect.
+  literal-pseudo-op:
+    - addi x0, x0, 0
+
+- mnemonic: li
+  synopsis: Load Immediate
+  rv32i: true
+  operands: [grd, imm]
+  doc: |
+    Load a 32b signed immediate value into a GPR. This uses ADDI and LUI,
+    expanding to one or two instructions, depending on the immediate (small
+    non-negative immediates or immediates with all lower bits zero can be
+    loaded with just ADDI or LUI, respectively; general immediates need a LUI
+    followed by an ADDI).
+  python-pseudo-op: true
+
+- mnemonic: ret
+  synopsis: Return from subroutine
+  rv32i: true
+  operands: []
+  straight-line: false
+  literal-pseudo-op:
+    - JALR x0, x1, 0

diff --git a/hw/ip/otbn/data/bignum-insns.yml b/hw/ip/otbn/data/bignum-insns.yml
new file mode 100644
index 0000000..a4c3ef7
--- /dev/null
+++ b/hw/ip/otbn/data/bignum-insns.yml

@@ -0,0 +1,959 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+# Definitions for the big number group of instructions. See insns.yml
+# for the detailed format.
+
+- mnemonic: bn.add
+  synopsis: Add
+  operands: &bn-add-operands
+    - name: wrd
+      doc: Name of the destination WDR
+    - name: wrs1
+      doc: Name of the first source WDR
+    - name: wrs2
+      doc: Name of the second source WDR
+    - &bn-shift-type-operand
+      name: shift_type
+      type: enum(<<, >>)
+      doc: |
+        The direction of an optional shift applied to `<wrs2>`.
+    - &bn-shift-bytes-operand
+      name: shift_bytes
+      type: uimm5
+      doc: |
+        Number of bytes by which to shift `<wrs2>`. Defaults to 0.
+    - &bn-flag-group-operand
+      name: flag_group
+      type: uimm1
+      doc: Flag group to use. Defaults to 0.
+  syntax: &bn-add-syntax |
+    <wrd>, <wrs1>, <wrs2>[<shift_type> <shift_bytes>B][, FG<flag_group>]
+  doc: |
+    Adds two WDR values, writes the result to the destination WDR and updates
+    flags. The content of the second source WDR can be shifted by an unsigned
+    immediate before it is consumed by the operation.
+  decode: |
+    d = UInt(wrd)
+    a = UInt(wrs1)
+    b = UInt(wrs2)
+
+    fg = DecodeFlagGroup(flag_group)
+    sb = UInt(shift_bytes)
+    st = DecodeShiftType(shift_type)
+  operation: |
+    b_shifted = ShiftReg(b, st, sb)
+    (result, flags_out) = AddWithCarry(a, b_shifted, "0")
+
+    WDR[d] = result
+    FLAGS[flag_group] = flags_out
+  encoding:
+    scheme: bnaf
+    mapping:
+      fg: flag_group
+      shift_type: shift_type
+      shift_bytes: shift_bytes
+      wrs2: wrs2
+      wrs1: wrs1
+      funct3: b000
+      wrd: wrd
+
+- mnemonic: bn.addc
+  synopsis: Add with Carry
+  operands: *bn-add-operands
+  syntax: *bn-add-syntax
+  doc: |
+    Adds two WDR values and the Carry flag value, writes the result to the
+    destination WDR, and updates the flags. The content of the second source
+    WDR can be shifted by an unsigned immediate before it is consumed by the
+    operation.
+  decode: |
+    d = UInt(wrd)
+    a = UInt(wrs1)
+    b = UInt(wrs2)
+
+    fg = DecodeFlagGroup(flag_group)
+    sb = UInt(shift_bytes)
+    st = DecodeShiftType(shift_type)
+  operation: |
+    b_shifted = ShiftReg(b, st, sb)
+    (result, flags_out) = AddWithCarry(a, b_shifted, FLAGS[flag_group].C)
+
+    WDR[d] = result
+    FLAGS[flag_group] = flags_out
+  encoding:
+    scheme: bnaf
+    mapping:
+      fg: flag_group
+      shift_type: shift_type
+      shift_bytes: shift_bytes
+      wrs2: wrs2
+      wrs1: wrs1
+      funct3: b010
+      wrd: wrd
+
+- mnemonic: bn.addi
+  synopsis: Add Immediate
+  operands:
+    - name: wrd
+      doc: Name of the destination WDR
+    - name: wrs
+      doc: Name of the source WDR
+    - name: imm
+      type: uimm
+      doc: Immediate value
+    - *bn-flag-group-operand
+  syntax: |
+    <wrd>, <wrs>, <imm> [, FG<flag_group>]
+  doc: |
+    Adds a zero-extended unsigned immediate to the value of a WDR, writes the
+    result to the destination WDR, and updates the flags.
+  decode: |
+    d = UInt(wrd)
+    a = UInt(wrs1)
+
+    fg = DecodeFlagGroup(flag_group)
+    i = ZeroExtend(imm, WLEN)
+  operation: |
+    (result, flags_out) = AddWithCarry(a, i, "0")
+
+    WDR[d] = result
+    FLAGS[flag_group] = flags_out
+  encoding:
+    scheme: bnai
+    mapping:
+      fg: flag_group
+      sub: b0
+      imm: imm
+      wrs: wrs
+      funct3: b100
+      wrd: wrd
+
+- mnemonic: bn.addm
+  synopsis: Pseudo-Modulo Add
+  operands: [wrd, wrs1, wrs2]
+  doc: |
+    Adds two WDR values, subtracts the value of the MOD WSR once if
+    the result is equal or larger than MOD, and writes the result to
+    the destination WDR. This operation is a modulo addition if the
+    sum of the two input registers is smaller than twice the value
+    of the MOD WSR. Flags are not used or saved.
+  decode: |
+    d = UInt(wrd)
+    a = UInt(wrs1)
+    b = UInt(wrs2)
+  operation: |
+    (result, ) = AddWithCarry(a, b, "0")
+
+    if result >= MOD:
+      result = result - MOD
+
+    WDR[d] = result
+  encoding:
+    scheme: bnam
+    mapping:
+      sub: b0
+      wrs2: wrs2
+      wrs1: wrs1
+      funct3: b101
+      wrd: wrd
+
+- mnemonic: bn.mulqacc
+  synopsis: Quarter-word Multiply and Accumulate
+  operands:
+    - &mulqacc-zero-acc
+      name: zero_acc
+      type: option(.Z)
+      doc: Zero the accumulator before accumulating the multiply result.
+    - &mulqacc-wrs1
+      name: wrs1
+      doc: First source WDR
+    - &mulqacc-wrs1-qwsel
+      name: wrs1_qwsel
+      type: uimm2
+      doc: |
+        Quarter-word select for `<wrs1>`.
+
+        Valid values:
+        - `0`: Select `wrs1[WLEN/4-1:0]` (least significant quarter-word)
+        - `1`: Select `wrs1[WLEN/2:WLEN/4]`
+        - `2`: Select `wrs1[WLEN/4*3-1:WLEN/2]`
+        - `3`: Select `wrs1[WLEN-1:WLEN/4*3]` (most significant quarter-word)
+    - &mulqacc-wrs2
+      name: wrs2
+      doc: Second source WDR
+    - &mulqacc-wrs2-qwsel
+      name: wrs2_qwsel
+      type: uimm2
+      doc: |
+        Quarter-word select for `<wrs2>`.
+
+        Valid values:
+        - `0`: Select `wrs1[WLEN/4-1:0]` (least significant quarter-word)
+        - `1`: Select `wrs1[WLEN/2:WLEN/4]`
+        - `2`: Select `wrs1[WLEN/4*3-1:WLEN/2]`
+        - `3`: Select `wrs1[WLEN-1:WLEN/4*3]` (most significant quarter-word)
+    - &mulqacc-acc-shift-imm
+      name: acc_shift_imm
+      type: uimm2
+      doc: |
+        The number of quarter-words (`WLEN/4` bits) to shift the `WLEN/2`-bit
+        multiply result before accumulating.
+  syntax: |
+    [<zero_acc>] <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>
+  glued-ops: true
+  doc: |
+    Multiplies two `WLEN/4` WDR values, shifts the product by `<acc_shift_imm>` and adds the result to the accumulator.
+
+    For versions of the instruction with writeback, see `BN.MULQACC.WO` and `BN.MULQACC.SO`.
+  decode: |
+    writeback_variant = None
+    zero_accumulator = DecodeMulqaccZeroacc(zero_acc)
+
+    d = None
+    a = UInt(wrs1)
+    b = UInt(wrs2)
+
+    d_hwsel = None
+    a_qwsel = DecodeQuarterWordSelect(wrs1_qwsel)
+    b_qwsel = DecodeQuarterWordSelect(wrs2_qwsel)
+  operation: &mulqacc-operation |
+    a_qw = GetQuarterWord(a, a_qwsel)
+    b_qw = GetQuarterWord(b, b_qwsel)
+
+    mul_res = a_qw * b_qw
+
+    if zero_accumulator:
+      ACC = 0
+
+    ACC = ACC + (mul_res << (acc_shift_imm * WLEN / 4))
+
+    if writeback_variant == 'shiftout':
+      if d_hwsel == 'L':
+        WDR[d][WLEN/2-1:0] = ACC[WLEN/2-1:0]
+      elif d_hwsel == 'U':
+        WDR[d][WLEN-1:WLEN/2] = ACC[WLEN/2-1:0]
+      ACC = ACC >> (WLEN/2)
+
+    elif writeback_variant == 'writeout':
+      WDR[d] = ACC
+  encoding:
+    scheme: bnaq
+    mapping:
+      wb: b00
+      dh: bx
+      qs2: wrs2_qwsel
+      qs1: wrs1_qwsel
+      wrs2: wrs2
+      wrs1: wrs1
+      acc: acc_shift_imm
+      z: zero_acc
+      wrd: bxxxxx
+
+- mnemonic: bn.mulqacc.wo
+  synopsis: Quarter-word Multiply and Accumulate with half-word writeback
+  operands:
+    - *mulqacc-zero-acc
+    - &mulqacc-wrd
+      name: wrd
+      doc: Destination WDR.
+    - *mulqacc-wrs1
+    - *mulqacc-wrs1-qwsel
+    - *mulqacc-wrs2
+    - *mulqacc-wrs2-qwsel
+    - *mulqacc-acc-shift-imm
+  syntax: |
+    [<zero_acc>] <wrd>, <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>
+  glued-ops: true
+  doc: |
+    Multiplies two `WLEN/4` WDR values, shifts the product by `<acc_shift_imm>` and adds the result to the accumulator.
+    Writes the resulting accumulator to `<wrd>`.
+  decode: |
+    writeback_variant = 'writeout'
+    zero_accumulator = DecodeMulqaccZeroacc(zero_acc)
+
+    d = UInt(wrd)
+    a = UInt(wrs1)
+    b = UInt(wrs2)
+
+    d_hwsel = None
+    a_qwsel = DecodeQuarterWordSelect(wrs1_qwsel)
+    b_qwsel = DecodeQuarterWordSelect(wrs2_qwsel)
+  operation: *mulqacc-operation
+  encoding:
+    scheme: bnaq
+    mapping:
+      wb: b01
+      dh: bx
+      qs2: wrs2_qwsel
+      qs1: wrs1_qwsel
+      wrs2: wrs2
+      wrs1: wrs1
+      acc: acc_shift_imm
+      z: zero_acc
+      wrd: wrd
+
+- mnemonic: bn.mulqacc.so
+  synopsis: Quarter-word Multiply and Accumulate with half-word writeback
+  operands:
+    - *mulqacc-zero-acc
+    - *mulqacc-wrd
+    - name: wrd_hwsel
+      type: enum(L,U)
+      doc: |
+        Half-word select for `<wrd>`.
+        A value of `L` means the less significant half-word; `U` means the more significant half-word.
+    - *mulqacc-wrs1
+    - *mulqacc-wrs1-qwsel
+    - *mulqacc-wrs2
+    - *mulqacc-wrs2-qwsel
+    - *mulqacc-acc-shift-imm
+  syntax: |
+    [<zero_acc>] <wrd>.<wrd_hwsel>,
+    <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>
+  glued-ops: true
+  doc: |
+    Multiplies two `WLEN/4` WDR values, shifts the product by `<acc_shift_imm>` and adds the result to the accumulator.
+    Next, shifts the resulting accumulator right by half a word.
+    The bits that are shifted out are written to a half-word of `<wrd>`, selected with `<wrd_hwsel>`.
+
+  decode: |
+    writeback_variant = 'shiftout'
+    zero_accumulator = DecodeMulqaccZeroacc(zero_acc)
+
+    d = UInt(wrd)
+    a = UInt(wrs1)
+    b = UInt(wrs2)
+
+    d_hwsel = DecodeHalfWordSelect(wrd_hwsel)
+    a_qwsel = DecodeQuarterWordSelect(wrs1_qwsel)
+    b_qwsel = DecodeQuarterWordSelect(wrs2_qwsel)
+  operation: *mulqacc-operation
+  encoding:
+    scheme: bnaq
+    mapping:
+      wb: b1x
+      dh: wrd_hwsel
+      qs2: wrs2_qwsel
+      qs1: wrs1_qwsel
+      wrs2: wrs2
+      wrs1: wrs1
+      acc: acc_shift_imm
+      z: zero_acc
+      wrd: wrd
+
+- mnemonic: bn.sub
+  synopsis: Subtraction
+  operands: &bn-sub-operands
+    - name: wrd
+      doc: Name of the destination WDR
+    - name: wrs1
+      doc: Name of the first source WDR
+    - name: wrs2
+      doc: Name of the second source WDR
+    - *bn-shift-type-operand
+    - *bn-shift-bytes-operand
+    - *bn-flag-group-operand
+  syntax: *bn-add-syntax
+  doc: |
+    Subtracts the second WDR value from the first one, writes the result to the destination WDR and updates flags.
+    The content of the second source WDR can be shifted by an unsigned immediate before it is consumed by the operation.
+  decode: &bn-sub-decode |
+    d = UInt(wrd)
+    a = UInt(wrs1)
+    b = UInt(wrs2)
+
+    fg = DecodeFlagGroup(flag_group)
+    sb = UInt(shift_bytes)
+    st = DecodeShiftType(shift_type)
+  operation: |
+    b_shifted = ShiftReg(b, st, sb)
+    (result, flags_out) = AddWithCarry(a, -b_shifted, "0")
+
+    WDR[d] = result
+    FLAGS[flag_group] = flags_out
+  encoding:
+    scheme: bnaf
+    mapping:
+      fg: flag_group
+      shift_type: shift_type
+      shift_bytes: shift_bytes
+      wrs2: wrs2
+      wrs1: wrs1
+      funct3: b001
+      wrd: wrd
+
+- mnemonic: bn.subb
+  synopsis: Subtract with borrow
+  operands: *bn-sub-operands
+  syntax: *bn-add-syntax
+  doc: |
+    Subtracts the second WDR value and the Carry from the first one, writes the result to the destination WDR, and updates the flags.
+    The content of the second source WDR can be shifted by an unsigned immediate before it is consumed by the operation.
+  decode: *bn-sub-decode
+  operation: |
+    b_shifted = ShiftReg(b, st, sb)
+    (result, flags_out) = AddWithCarry(a, -b_shifted, ~FLAGS[flag_group].C)
+
+    WDR[d] = result
+    FLAGS[flag_group] = flags_out
+  encoding:
+    scheme: bnaf
+    mapping:
+      fg: flag_group
+      shift_type: shift_type
+      shift_bytes: shift_bytes
+      wrs2: wrs2
+      wrs1: wrs1
+      funct3: b011
+      wrd: wrd
+
+- mnemonic: bn.subi
+  synopsis: Subtract Immediate
+  operands:
+    - name: wrd
+      doc: Name of the destination WDR
+    - name: wrs
+      doc: Name of the source WDR
+    - name: imm
+      type: uimm
+      doc: Immediate value
+    - *bn-flag-group-operand
+  syntax: <wrd>, <wrs>, <imm> [, FG<flag_group>]
+  doc: |
+    Subtracts a zero-extended unsigned immediate from the value of a WDR,
+    writes the result to the destination WDR, and updates the flags.
+  decode: |
+    d = UInt(wrd)
+    a = UInt(wrs1)
+
+    fg = DecodeFlagGroup(flag_group)
+    i = ZeroExtend(imm, WLEN)
+  operation: |
+    (result, flags_out) = AddWithCarry(a, -i, "0")
+
+    WDR[d] = result
+    FLAGS[flag_group] = flags_out
+  encoding:
+    scheme: bnai
+    mapping:
+      fg: flag_group
+      sub: b1
+      imm: imm
+      wrs: wrs
+      funct3: b100
+      wrd: wrd
+
+- mnemonic: bn.subm
+  synopsis: Pseudo-modulo subtraction
+  operands: [wrd, wrs1, wrs2]
+  doc: |
+    Subtracts the second WDR value from the first WDR value, performs a modulo operation with the MOD WSR, and writes the result to the destination WDR.
+    This operation is equivalent to a modulo subtraction as long as `wrs1 - wrs2 >= -MOD` holds.
+    This constraint is not checked in hardware.
+    Flags are not used or saved.
+  decode: |
+    d = UInt(wrd)
+    a = UInt(wrs1)
+    b = UInt(wrs2)
+  operation: |
+    (result, ) = AddWithCarry(a, -b, "0")
+
+    if result >= MOD:
+      result = result - MOD
+
+    WDR[d] = result
+  encoding:
+    scheme: bnam
+    mapping:
+      sub: b1
+      wrs2: wrs2
+      wrs1: wrs1
+      funct3: b101
+      wrd: wrd
+
+- mnemonic: bn.and
+  synopsis: Bitwise AND
+  operands: &bn-and-operands
+    - name: wrd
+      doc: Name of the destination WDR
+    - name: wrs1
+      doc: Name of the first source WDR
+    - name: wrs2
+      doc: Name of the second source WDR
+    - *bn-shift-type-operand
+    - *bn-shift-bytes-operand
+  syntax: &bn-and-syntax |
+    <wrd>, <wrs1>, <wrs2> [, <shift_type> <shift_bytes>B]
+  doc: |
+    Performs a bitwise and operation.
+    Takes the values stored in registers referenced by `wrs1` and `wrs2` and stores the result in the register referenced by `wrd`.
+    The content of the second source register can be shifted by an immediate before it is consumed by the operation.
+  decode: &bn-and-decode |
+    d = UInt(wrd)
+    a = UInt(wrs1)
+    b = UInt(wrs2)
+
+    sb = UInt(shift_bytes)
+    st = DecodeShiftType(shift_type)
+  operation: |
+    b_shifted = ShiftReg(b, st, sb)
+    result = a & b_shifted
+
+    WDR[d] = result
+  encoding:
+    scheme: bna
+    mapping:
+      funct31: b0
+      shift_type: shift_type
+      shift_bytes: shift_bytes
+      wrs2: wrs2
+      wrs1: wrs1
+      funct3: b110
+      wrd: wrd
+
+- mnemonic: bn.or
+  synopsis: Bitwise OR
+  operands: *bn-and-operands
+  syntax: *bn-and-syntax
+  doc: |
+    Performs a bitwise or operation.
+    Takes the values stored in WDRs referenced by `wrs1` and `wrs2` and stores the result in the WDR referenced by `wrd`.
+    The content of the second source WDR can be shifted by an immediate before it is consumed by the operation.
+  decode: *bn-and-decode
+  operation: |
+    b_shifted = ShiftReg(b, st, sb)
+    result = a | b_shifted
+
+    WDR[d] = result
+  encoding:
+    scheme: bna
+    mapping:
+      funct31: b1
+      shift_type: shift_type
+      shift_bytes: shift_bytes
+      wrs2: wrs2
+      wrs1: wrs1
+      funct3: b110
+      wrd: wrd
+
+- mnemonic: bn.not
+  synopsis: Bitwise NOT
+  operands:
+    - name: wrd
+      doc: Name of the destination WDR
+    - name: wrs
+      doc: Name of the source WDR
+    - *bn-shift-type-operand
+    - *bn-shift-bytes-operand
+  syntax: |
+    <wrd>, <wrs> [, <shift_type> <shift_bytes>B]
+  doc: |
+    Negates the value in `<wrs>`, storing the result into `<wrd>`.
+    The source value can be shifted by an immediate before it is consumed by the operation.
+  decode: |
+    d = UInt(wrd)
+    a = UInt(wrs1)
+
+    sb = UInt(shift_bytes)
+    st = DecodeShiftType(shift_type)
+  operation: |
+    a_shifted = ShiftReg(a, st, sb)
+    result = ~a_shifted
+
+    WDR[d] = result
+  encoding:
+    scheme: bna
+    mapping:
+      funct31: b0
+      shift_type: shift_type
+      shift_bytes: shift_bytes
+      wrs2: wrs
+      wrs1: bxxxxx
+      funct3: b111
+      wrd: wrd
+
+- mnemonic: bn.xor
+  synopsis: Bitwise XOR
+  operands: *bn-and-operands
+  syntax: *bn-and-syntax
+  doc: |
+    Performs a bitwise xor operation.
+    Takes the values stored in WDRs referenced by `wrs1` and `wrs2` and stores the result in the WDR referenced by `wrd`.
+    The content of the second source WDR can be shifted by an immediate before it is consumed by the operation.
+  decode: *bn-and-decode
+  operation: |
+    b_shifted = ShiftReg(b, st, sb)
+    result = a ^ b_shifted
+
+    WDR[d] = result
+  encoding:
+    scheme: bnaf
+    mapping:
+      fg: b1
+      shift_type: shift_type
+      shift_bytes: shift_bytes
+      wrs2: wrs2
+      wrs1: wrs1
+      funct3: b111
+      wrd: wrd
+
+- mnemonic: bn.rshi
+  synopsis: Concatenate and right shift immediate
+  operands:
+    - name: wrd
+      doc: Name of the destination WDR
+    - name: wrs1
+      doc: Name of the first source WDR
+    - name: wrs2
+      doc: Name of the second source WDR
+    - name: imm
+      type: uimm
+      doc: |
+        Number of bits to shift the second source register by. Valid range: 0..(WLEN-1).
+  syntax: |
+    <wrd>, <wrs1>, <wrs2> >> <imm>
+  doc: |
+    The concatenation of the content from the WDRs referenced by `wrs1` and `wrs2` (`wrs1` forms the upper part) is right shifted by an immediate value and truncated to WLEN bit.
+    The result is stored in the WDR referenced by `wrd`.
+  decode: |
+    d = UInt(wrd)
+    a = UInt(wrs1)
+    b = UInt(wrs2)
+    shift_bit = Uint(imm)
+  operation: |
+    WDR[d] = (((a << WLEN) | b) >> shift_bit)[WLEN-1:0]
+  encoding:
+    scheme: bnr
+    mapping:
+      imm: imm
+      wrs2: wrs2
+      wrs1: wrs1
+      funct2: b11
+      wrd: wrd
+
+- mnemonic: bn.sel
+  synopsis: Flag Select
+  operands:
+    - name: wrd
+      doc: Name of the destination WDR
+    - name: wrs1
+      doc: Name of the first source WDR
+    - name: wrs2
+      doc: Name of the second source WDR
+    - *bn-flag-group-operand
+    - name: flag
+      type: enum(C, M, L, Z)
+      doc: |
+        Flag to check. Valid values:
+        - C: Carry flag
+        - M: MSB flag
+        - L: LSB flag
+        - Z: Zero flag
+  syntax: |
+    <wrd>, <wrs1>, <wrs2>, [FG<flag_group>.]<flag>
+  doc: |
+    Returns in the destination WDR the value of the first source WDR if the flag in the chosen flag group is set, otherwise returns the value of the second source WDR.
+  decode: |
+    d = UInt(wrd)
+    a = UInt(wrs1)
+    b = UInt(wrs2)
+    fg = DecodeFlagGroup(flag_group)
+    flag = DecodeFlag(flag)
+  operation: |
+    flag_is_set = FLAGS[fg].get(flag)
+
+    WDR[d] = wrs1 if flag_is_set else wrs2
+  encoding:
+    scheme: bns
+    mapping:
+      fg: flag_group
+      flag: flag
+      wrs2: wrs2
+      wrs1: wrs1
+      wrd: wrd
+
+- mnemonic: bn.cmp
+  synopsis: Compare
+  operands: &bn-cmp-operands
+    - name: wrs1
+      doc: Name of the first source WDR
+    - name: wrs2
+      doc: Name of the second source WDR
+    - *bn-shift-type-operand
+    - *bn-shift-bytes-operand
+    - *bn-flag-group-operand
+  syntax: &bn-cmp-syntax |
+    <wrs1>, <wrs2>[, <shift_type> <shift_bytes>B][, FG<flag_group>]
+  doc: |
+    Subtracts the second WDR value from the first one and updates flags.
+    This instruction is identical to BN.SUB, except that no result register is written.
+  decode: &bn-cmp-decode |
+    a = UInt(wrs1)
+    b = UInt(wrs2)
+
+    fg = DecodeFlagGroup(flag_group)
+    sb = UInt(shift_bytes)
+    st = DecodeShiftType(shift_type)
+  operation: |
+    b_shifted = ShiftReg(b, st, sb)
+    (, flags_out) = AddWithCarry(a, -b_shifted, "0")
+
+    FLAGS[flag_group] = flags_out
+  encoding:
+    scheme: bnc
+    mapping:
+      fg: flag_group
+      shift_type: shift_type
+      shift_bytes: shift_bytes
+      wrs2: wrs2
+      wrs1: wrs1
+      funct3: b001
+
+- mnemonic: bn.cmpb
+  synopsis: Compare with Borrow
+  operands: *bn-cmp-operands
+  syntax: *bn-cmp-syntax
+  doc: |
+    Subtracts the second WDR value from the first one and updates flags.
+    This instruction is identical to BN.SUBB, except that no result register is written.
+  decode: *bn-cmp-decode
+  operation: |
+    (, flags_out) = AddWithCarry(a, -b, ~FLAGS[flag_group].C)
+
+    FLAGS[flag_group] = flags_out
+  encoding:
+    scheme: bnc
+    mapping:
+      fg: flag_group
+      shift_type: shift_type
+      shift_bytes: shift_bytes
+      wrs2: wrs2
+      wrs1: wrs1
+      funct3: b011
+
+- mnemonic: bn.lid
+  synopsis: Load Word (indirect source, indirect destination)
+  operands:
+    - name: grd
+      doc: Name of the GPR referencing the destination WDR
+    - name: grs1
+      doc: |
+        Name of the GPR containing the memory byte address.
+        The value contained in the referenced GPR must be WLEN-aligned.
+    - name: offset
+      doc: |
+        Offset value.
+        Must be WLEN-aligned.
+    - name: grs1_inc
+      type: option(++)
+      doc: |
+        Increment the value in `<grs1>` by WLEN/8 (one word).
+        Cannot be specified together with `grd_inc`.
+    - name: grd_inc
+      type: option(++)
+      doc: |
+        Increment the value in `<grd>` by one.
+        Cannot be specified together with `grs1_inc`.
+  syntax: |
+    <grd>[<grd_inc>], <offset>(<grs1>[<grs1_inc>])
+  doc: |
+    Calculates a byte memory address by adding the offset to the value in the GPR `grs1`.
+    The value from this memory address is then copied into the WDR pointed to by the value in GPR `grd`.
+
+    After the operation, either the value in the GPR `grs1`, or the value in `grd` can be optionally incremented.
+
+    - If `grs1_inc` is set, the value in `grs1` is incremented by the value WLEN/8 (one word).
+    - If `grd_inc` is set, the value in `grd` is incremented by the value 1.
+
+    The memory address must be aligned to WLEN bytes.
+    Any address that is unaligned or is above the top of memory will result in an error (with error code `ErrCodeBadDataAddr`).
+  decode: |
+    rd = UInt(grd)
+    rs1 = UInt(grs1)
+    offset = UInt(offset)
+  operation: |
+    mem_addr = GPR[rs1] + offset
+    wdr_dest = GPR[rd]
+
+    assert not (grs1_inc and grd_inc)  # prevented in encoding
+    if mem_addr % (WLEN / 8) or mem_addr + WLEN > DMEM_SIZE:
+        raise BadDataAddr()
+
+    mem_index = mem_addr // (WLEN / 8)
+
+    WDR[wdr_dest] = LoadWlenWordFromMemory(mem_index)
+
+    if grs1_inc:
+        GPR[rs1] = GPR[rs1] + (WLEN / 8)
+    if grd_inc:
+        GPR[rd] = GPR[rd] + 1
+  lsu:
+    type: mem-load
+    target: [offset, grs1]
+    bytes: 32
+  encoding:
+    scheme: bnxid
+    mapping:
+      imm: offset
+      spp: grs1_inc
+      dpp: grd_inc
+      rs: grs1
+      funct3: b100
+      rd: grd
+
+- mnemonic: bn.sid
+  synopsis: Store Word (indirect source, indirect destination)
+  operands:
+    - name: grs1
+      doc: |
+        Name of the GPR containing the memory byte address.
+        The value contained in the referenced GPR must be WLEN-aligned.
+    - name: grs2
+      doc: Name of the GPR referencing the source WDR.
+    - name: offset
+      doc: |
+        Offset value.
+        Must be WLEN-aligned.
+    - name: grs1_inc
+      type: option(++)
+      doc: |
+        Increment the value in `<grs1>` by WLEN/8 (one word).
+        Cannot be specified together with `grs2_inc`.
+    - name: grs2_inc
+      type: option(++)
+      doc: |
+        Increment the value in `<grs2>` by one.
+        Cannot be specified together with `grs1_inc`.
+  syntax: |
+    <grs1>[<grs1_inc>], <offset>(<grs2>[<grs2_inc>])
+  doc: |
+    Calculates a byte memory address by adding the offset to the value in the GPR `grs1`.
+    The value from the WDR pointed to by `grs2` is then copied into the memory.
+
+    After the operation, either the value in the GPR `grs1`, or the value in `grs2` can be optionally incremented.
+
+    - If `grs1_inc` is set, the value in `grs1` is incremented by the value WLEN/8 (one word).
+    - If `grs2_inc` is set, the value in `grs2` is incremented by the value 1.
+
+    The memory address must be aligned to WLEN bytes.
+    Any address that is unaligned or is above the top of memory will result in an error (with error code `ErrCodeBadDataAddr`).
+  decode: |
+    rs1 = UInt(grs1)
+    rs2 = UInt(grs2)
+    offset = UInt(offset)
+  operation: |
+    mem_addr = GPR[rs1] + offset
+    wdr_src = GPR[rs2]
+
+    assert not (grs1_inc and grd_inc)  # prevented in encoding
+    if mem_addr % (WLEN / 8) or mem_addr + WLEN > DMEM_SIZE:
+        raise BadDataAddr()
+
+    mem_index = mem_addr // (WLEN / 8)
+
+    StoreWlenWordToMemory(mem_index, WDR[wdr_src])
+
+    if grs1_inc:
+        GPR[rs1] = GPR[rs1] + (WLEN / 8)
+    if grs2_inc:
+        GPR[rs2] = GPR[rs2] + 1
+  lsu:
+    type: mem-store
+    target: [offset, grs1]
+    bytes: 32
+  encoding:
+    scheme: bnxid
+    mapping:
+      imm: offset
+      spp: grs1_inc
+      dpp: grs2_inc
+      rs: grs1
+      funct3: b101
+      rd: grs2
+
+- mnemonic: bn.mov
+  synopsis: Copy content between WDRs (direct addressing)
+  operands: [wrd, wrs]
+  decode: |
+    s = UInt(wrs)
+    d = UInt(wrd)
+  operation: WDR[d] = WDR[s]
+  encoding:
+    scheme: bnmov
+    mapping:
+      indirect: b0
+      spp: bx
+      dpp: bx
+      src: wrs
+      dst: wrd
+
+- mnemonic: bn.movr
+  synopsis: Copy content between WDRs (register-indirect addressing)
+  operands:
+    - name: grd
+      doc: Name of the GPR containing the destination WDR.
+    - name: grs
+      doc: Name of the GPR referencing the source WDR.
+    - name: grd_inc
+      type: option(++)
+      doc: |
+        Increment the value in `<grd>` by one.
+        Cannot be specified together with `grs_inc`.
+    - name: grs_inc
+      type: option(++)
+      doc: |
+        Increment the value in `<grs>` by one.
+        Cannot be specified together with `grd_inc`.
+  syntax: |
+    <grd>[<grd_inc>], <grs>[<grs_inc>]
+  doc: |
+    Copy WDR contents between registers with indirect addressing.
+    Optionally, either the source or the destination register address can be incremented by 1.
+  decode: |
+    s = UInt(grs)
+    d = UInt(grd)
+  operation: |
+    WDR[GPR[d]] = WDR[GPR[s]]
+
+    if grs_inc:
+      GPR[s] = GPR[s] + 1
+    if grd_inc:
+      GPR[d] = GPR[d] + 1
+  encoding:
+    scheme: bnmov
+    mapping:
+      indirect: b1
+      spp: grs_inc
+      dpp: grd_inc
+      src: grs
+      dst: grd
+
+- mnemonic: bn.wsrrs
+  synopsis: Atomic Read and Set Bits in WSR
+  operands: [wrd, wsr, wrs]
+  encoding:
+    scheme: wcsr
+    mapping:
+      write: b0
+      wcsr: wsr
+      wrs: wrs
+      wrd: wrd
+  lsu:
+    type: wsr
+    target: [wsr]
+
+- mnemonic: bn.wsrrw
+  synopsis: Atomic Read/Write WSR
+  operands: [wrd, wsr, wrs]
+  encoding:
+    scheme: wcsr
+    mapping:
+      write: b1
+      wcsr: wsr
+      wrs: wrs
+      wrd: wrd
+  lsu:
+    type: wsr
+    target: [wsr]

diff --git a/hw/ip/otbn/data/insns.yml b/hw/ip/otbn/data/insns.yml
index 959076e..7d9d897 100644
--- a/hw/ip/otbn/data/insns.yml
+++ b/hw/ip/otbn/data/insns.yml

@@ -7,34 +7,23 @@
 # This is used for generating documentation, but also for random test
 # and decoder generation.
 
-# The instruction groups (valid values for the "group" field in
-# instruction entries). Used for documentation where groups appear in
-# the order of this list and instructions are listed by group. This
-# list must be nonempty.
-insn-groups:
-  - key: base
-    title: Base Instruction Subset
-    doc: |
-      The base instruction set of OTBN is a limited 32b instruction set.
-      It is used together with the 32b wide General Purpose Register file.
-      The primary use of the base instruction set is the control flow in applications.
-
-      The base instruction set is an extended subset of [RISC-V's RV32I_Zcsr](https://riscv.org/specifications/isa-spec-pdf/).
-      Refer to the [RISC-V Unprivileged Specification](https://riscv.org/specifications/isa-spec-pdf/) for a detailed instruction specification.
-      Not all RV32 instructions are implemented.
-      The implemented subset is shown below.
-      Many instructions in the base instruction set have an equivalent in the big number instruction subset, enabling processor logic to be shared between the instruction subsets.
-  - key: bignum
-    title: Big Number Instruction Subset
-    doc: |
-      All Big Number (BN) instructions operate on the Wide Data Registers (WDRs).
-
 # The relative path to a YAML file defining the different instruction
 # encoding schemes
 encoding-schemes: enc-schemes.yml
 
-# The instructions. Instructions are listed in the given order within
-# each instruction group. There are the following fields:
+# Instructions are divided into a list of groups (under insn-groups). Each
+# group has the following fields, all of which are required.
+#
+#  key:     An internal name
+#  title:   A title used for documentation
+#  doc:     Markdown-format documentation for the group
+#  insns:   The relative path to the instructions in this group
+#
+# Groups appear in the documentation in list order. The insns key points at
+# another YAML file. That file should contain a list of instruction objects.
+# Again, the instructions appear in the documentation in list order.
+#
+# Each instruction object has the following fields:
 #
 #  mnemonic:  Instruction mnemonic (required)
 #
@@ -160,1443 +149,23 @@
 #  bytes:   An integer giving the width of the operation in bytes. This is
 #           required if type is mem-* and cannot be used otherwise.
 
-insns:
-  - mnemonic: add
-    rv32i: true
-    synopsis: Add
-    operands: [grd, grs1, grs2]
-    encoding:
-      scheme: R
-      mapping:
-        funct7: b0000000
-        rs2: grs2
-        rs1: grs1
-        funct3: b000
-        rd: grd
-        opcode: b01100
-
-  - mnemonic: addi
-    rv32i: true
-    synopsis: Add Immediate
-    operands: [grd, grs1, imm]
-    encoding:
-      scheme: I
-      mapping:
-        imm: imm
-        rs1: grs1
-        funct3: b000
-        rd: grd
-        opcode: b00100
-
-  - mnemonic: lui
-    rv32i: true
-    synopsis: Load Upper Immediate
-    operands:
-      - grd
-      - name: imm
-        type: uimm
-    encoding:
-      scheme: U
-      mapping:
-        imm: imm
-        rd: grd
-        opcode: b01101
-
-  - mnemonic: sub
-    rv32i: true
-    synopsis: Subtract
-    operands: [grd, grs1, grs2]
-    encoding:
-      scheme: R
-      mapping:
-        funct7: b0100000
-        rs2: grs2
-        rs1: grs1
-        funct3: b000
-        rd: grd
-        opcode: b01100
-
-  - mnemonic: sll
-    rv32i: true
-    synopsis: Logical left shift
-    operands: [grd, grs1, grs2]
-    encoding:
-      scheme: R
-      mapping:
-        funct7: b0000000
-        rs2: grs2
-        rs1: grs1
-        funct3: b001
-        rd: grd
-        opcode: b01100
-
-  - mnemonic: slli
-    rv32i: true
-    synopsis: Logical left shift with Immediate
-    operands:
-      - grd
-      - grs1
-      - &shamt-operand
-        name: shamt
-        type: uimm
-    encoding:
-      scheme: Is
-      mapping:
-        arithmetic: b0
-        shamt: shamt
-        rs1: grs1
-        funct3: b001
-        rd: grd
-        opcode: b00100
-
-  - mnemonic: srl
-    rv32i: true
-    synopsis: Logical right shift
-    operands: [grd, grs1, grs2]
-    encoding:
-      scheme: R
-      mapping:
-        funct7: b0000000
-        rs2: grs2
-        rs1: grs1
-        funct3: b101
-        rd: grd
-        opcode: b01100
-
-  - mnemonic: srli
-    rv32i: true
-    synopsis: Logical right shift with Immediate
-    operands:
-      - grd
-      - grs1
-      - *shamt-operand
-    encoding:
-      scheme: Is
-      mapping:
-        arithmetic: b0
-        shamt: shamt
-        rs1: grs1
-        funct3: b101
-        rd: grd
-        opcode: b00100
-
-  - mnemonic: sra
-    rv32i: true
-    synopsis: Arithmetic right shift
-    operands: [grd, grs1, grs2]
-    encoding:
-      scheme: R
-      mapping:
-        funct7: b0100000
-        rs2: grs2
-        rs1: grs1
-        funct3: b101
-        rd: grd
-        opcode: b01100
-
-  - mnemonic: srai
-    rv32i: true
-    synopsis: Arithmetic right shift with Immediate
-    operands:
-      - grd
-      - grs1
-      - *shamt-operand
-    encoding:
-      scheme: Is
-      mapping:
-        arithmetic: b1
-        shamt: shamt
-        rs1: grs1
-        funct3: b101
-        rd: grd
-        opcode: b00100
-
-  - mnemonic: and
-    rv32i: true
-    synopsis: Bitwise AND
-    operands: [grd, grs1, grs2]
-    encoding:
-      scheme: R
-      mapping:
-        funct7: b0000000
-        rs2: grs2
-        rs1: grs1
-        funct3: b111
-        rd: grd
-        opcode: b01100
-
-  - mnemonic: andi
-    rv32i: true
-    synopsis: Bitwise AND with Immediate
-    operands: [grd, grs1, imm]
-    encoding:
-      scheme: I
-      mapping:
-        imm: imm
-        rs1: grs1
-        funct3: b111
-        rd: grd
-        opcode: b00100
-
-  - mnemonic: or
-    rv32i: true
-    synopsis: Bitwise OR
-    operands: [grd, grs1, grs2]
-    encoding:
-      scheme: R
-      mapping:
-        funct7: b0000000
-        rs2: grs2
-        rs1: grs1
-        funct3: b110
-        rd: grd
-        opcode: b01100
-
-  - mnemonic: ori
-    rv32i: true
-    synopsis: Bitwise OR with Immediate
-    operands: [grd, grs1, imm]
-    encoding:
-      scheme: I
-      mapping:
-        imm: imm
-        rs1: grs1
-        funct3: b110
-        rd: grd
-        opcode: b00100
-
-  - mnemonic: xor
-    rv32i: true
-    synopsis: Bitwise XOR
-    operands: [grd, grs1, grs2]
-    encoding:
-      scheme: R
-      mapping:
-        funct7: b0000000
-        rs2: grs2
-        rs1: grs1
-        funct3: b100
-        rd: grd
-        opcode: b01100
-
-  - mnemonic: xori
-    rv32i: true
-    synopsis: Bitwise XOR with Immediate
-    operands: [grd, grs, imm]
-    encoding:
-      scheme: I
-      mapping:
-        imm: imm
-        rs1: grs
-        funct3: b100
-        rd: grd
-        opcode: b00100
-
-  - mnemonic: lw
-    rv32i: true
-    synopsis: Load Word
-    operands: [grd, offset, grs1]
-    syntax: <grd>, <offset>(<grs1>)
-    encoding:
-      scheme: I
-      mapping:
-        imm: offset
-        rs1: grs1
-        funct3: b010
-        rd: grd
-        opcode: b00000
+insn-groups:
+  - key: base
+    title: Base Instruction Subset
     doc: |
-      Load a 32b word from address `<offset> + <grs1>` in data memory, writing the result to `<grd>`.
-      Unaligned loads are not supported.
-      Any address that is unaligned or is above the top of memory will result in an error (with error code `ErrCodeBadDataAddr`).
-    lsu:
-      type: mem-load
-      target: [offset, grs1]
-      bytes: 4
+      The base instruction set of OTBN is a limited 32b instruction set.
+      It is used together with the 32b wide General Purpose Register file.
+      The primary use of the base instruction set is the control flow in applications.
 
-  - mnemonic: sw
-    rv32i: true
-    synopsis: Store Word
-    operands: [grs2, offset, grs1]
-    syntax: <grs2>, <offset>(<grs1>)
-    encoding:
-      scheme: S
-      mapping:
-        imm: offset
-        rs2: grs2
-        rs1: grs1
-        funct3: b010
-        opcode: b01000
+      The base instruction set is an extended subset of [RISC-V's RV32I_Zcsr](https://riscv.org/specifications/isa-spec-pdf/).
+      Refer to the [RISC-V Unprivileged Specification](https://riscv.org/specifications/isa-spec-pdf/) for a detailed instruction specification.
+      Not all RV32 instructions are implemented.
+      The implemented subset is shown below.
+      Many instructions in the base instruction set have an equivalent in the big number instruction subset, enabling processor logic to be shared between the instruction subsets.
+    insns: base-insns.yml
+
+  - key: bignum
+    title: Big Number Instruction Subset
     doc: |
-      Store a 32b word in `<grs2>` to address `<offset> + <grs1>` in data memory.
-      Unaligned stores are not supported.
-      Any address that is unaligned or is above the top of memory will result in an error (with error code `ErrCodeBadDataAddr`).
-    lsu:
-      type: mem-store
-      target: [offset, grs1]
-      bytes: 4
-
-  - mnemonic: beq
-    rv32i: true
-    synopsis: Branch Equal
-    operands: [grs1, grs2, offset]
-    straight-line: false
-    encoding:
-      scheme: B
-      mapping:
-        imm: offset
-        rs2: grs2
-        rs1: grs1
-        funct3: b000
-        opcode: b11000
-
-  - mnemonic: bne
-    rv32i: true
-    synopsis: Branch Not Equal
-    operands: [grs1, grs2, offset]
-    straight-line: false
-    encoding:
-      scheme: B
-      mapping:
-        imm: offset
-        rs2: grs2
-        rs1: grs1
-        funct3: b001
-        opcode: b11000
-
-  - mnemonic: jal
-    rv32i: true
-    synopsis: Jump And Link
-    operands: [grd, offset]
-    straight-line: false
-    trailing-doc: |
-      The JAL instruction has the same behavior as in RV32I, jumping by the given offset and writing `PC+4` as a link address to the destination register.
-      OTBN has a hardware managed call stack, accessed through `x1`, which should be used when calling subroutines.
-      Do so by using `x1` as the link register: `jal x1, <offset>`.
-    encoding:
-      scheme: J
-      mapping:
-        imm: offset
-        rd: grd
-        opcode: b11011
-
-  - mnemonic: jalr
-    rv32i: true
-    synopsis: Jump And Link Register
-    operands: [grd, grs1, offset]
-    straight-line: false
-    trailing-doc: |
-      The JALR instruction has the same behavior as in RV32I, jumping by `<grs1> + <offset>` and writing `PC+4` as a link address to the destination register.
-      OTBN has a hardware managed call stack, accessed through `x1`, which should be used when calling and returning from subroutines.
-      To return from a subroutine, use `jalr x0, x1, 0`.
-      This pops a link address from the call stack and branches to it.
-      To call a subroutine through a function pointer, use `jalr x1, <grs1>, 0`.
-      This jumps to the address in `<grs1>` and pushes the link address onto the call stack.
-    encoding:
-      scheme: I
-      mapping:
-        imm: offset
-        rs1: grs1
-        funct3: b000
-        rd: grd
-        opcode: b11001
-
-  - mnemonic: csrrs
-    rv32i: true
-    synopsis: Atomic Read and Set bits in CSR
-    operands: [grd, csr, grs]
-    encoding:
-      scheme: I
-      mapping:
-        imm: csr
-        rs1: grs
-        funct3: b010
-        rd: grd
-        opcode: b11100
-    lsu:
-      type: csr
-      target: [csr]
-
-  - mnemonic: csrrw
-    rv32i: true
-    synopsis: Atomic Read/Write CSR
-    operands: [grd, csr, grs]
-    encoding:
-      scheme: I
-      mapping:
-        imm: csr
-        rs1: grs
-        funct3: b001
-        rd: grd
-        opcode: b11100
-    lsu:
-      type: csr
-      target: [csr]
-
-  - mnemonic: ecall
-    rv32i: true
-    synopsis: Environment Call
-    operands: []
-    straight-line: false
-    doc: |
-      Triggers the `done` interrupt to indicate the completion of the
-      operation.
-    encoding:
-      scheme: I
-      mapping:
-        imm: b000000000000
-        rs1: b00000
-        funct3: b000
-        rd: b00000
-        opcode: b11100
-
-  - mnemonic: loop
-    synopsis: Loop (indirect)
-    operands:
-      - name: grs
-        doc: Name of the GPR containing the number of iterations
-      - &bodysize-operand
-        name: bodysize
-        type: uimm
-        doc: Number of instructions in the loop body
-    straight-line: false
-    note: &loop-note |
-      The LOOP and LOOPI instructions are under-specified, and improvements
-      to them are being discussed. See
-      https://github.com/lowRISC/opentitan/issues/2496 for up-to-date
-      information.
-    doc: |
-      Repeat a sequence of code multiple times. The number of iterations is
-      read from `<grs>`, treated as an unsigned value. The number of
-      instructions in the loop is given in the `<bodysize>` immediate.
-    encoding:
-      scheme: loop
-      mapping:
-        bodysize: bodysize
-        grs: grs
-
-  - mnemonic: loopi
-    synopsis: Loop Immediate
-    operands:
-      - name: iterations
-        type: uimm
-        doc: Number of iterations
-      - *bodysize-operand
-    straight-line: false
-    note: *loop-note
-    doc: |
-      Repeat a sequence of code multiple times. The `<iterations>`
-      unsigned immediate operand gives the number of iterations and
-      the `<bodysize>` unsigned immediate operand gives the number of
-      instructions in the body.
-    encoding:
-      scheme: loopi
-      mapping:
-        bodysize: bodysize
-        iterations: iterations
-
-  - mnemonic: nop
-    synopsis: No Operation
-    rv32i: true
-    operands: []
-    doc: A pseudo-operation that has no effect.
-    literal-pseudo-op:
-      - addi x0, x0, 0
-
-  - mnemonic: li
-    synopsis: Load Immediate
-    rv32i: true
-    operands: [grd, imm]
-    doc: |
-      Load a 32b signed immediate value into a GPR. This uses ADDI and LUI,
-      expanding to one or two instructions, depending on the immediate (small
-      non-negative immediates or immediates with all lower bits zero can be
-      loaded with just ADDI or LUI, respectively; general immediates need a LUI
-      followed by an ADDI).
-    python-pseudo-op: true
-
-  - mnemonic: ret
-    synopsis: Return from subroutine
-    rv32i: true
-    operands: []
-    straight-line: false
-    literal-pseudo-op:
-      - JALR x0, x1, 0
-
-  - mnemonic: bn.add
-    group: bignum
-    synopsis: Add
-    operands: &bn-add-operands
-      - name: wrd
-        doc: Name of the destination WDR
-      - name: wrs1
-        doc: Name of the first source WDR
-      - name: wrs2
-        doc: Name of the second source WDR
-      - &bn-shift-type-operand
-        name: shift_type
-        type: enum(<<, >>)
-        doc: |
-          The direction of an optional shift applied to `<wrs2>`.
-      - &bn-shift-bytes-operand
-        name: shift_bytes
-        type: uimm5
-        doc: |
-          Number of bytes by which to shift `<wrs2>`. Defaults to 0.
-      - &bn-flag-group-operand
-        name: flag_group
-        type: uimm1
-        doc: Flag group to use. Defaults to 0.
-    syntax: &bn-add-syntax |
-      <wrd>, <wrs1>, <wrs2>[<shift_type> <shift_bytes>B][, FG<flag_group>]
-    doc: |
-      Adds two WDR values, writes the result to the destination WDR and updates
-      flags. The content of the second source WDR can be shifted by an unsigned
-      immediate before it is consumed by the operation.
-    decode: |
-      d = UInt(wrd)
-      a = UInt(wrs1)
-      b = UInt(wrs2)
-
-      fg = DecodeFlagGroup(flag_group)
-      sb = UInt(shift_bytes)
-      st = DecodeShiftType(shift_type)
-    operation: |
-      b_shifted = ShiftReg(b, st, sb)
-      (result, flags_out) = AddWithCarry(a, b_shifted, "0")
-
-      WDR[d] = result
-      FLAGS[flag_group] = flags_out
-    encoding:
-      scheme: bnaf
-      mapping:
-        fg: flag_group
-        shift_type: shift_type
-        shift_bytes: shift_bytes
-        wrs2: wrs2
-        wrs1: wrs1
-        funct3: b000
-        wrd: wrd
-
-  - mnemonic: bn.addc
-    group: bignum
-    synopsis: Add with Carry
-    operands: *bn-add-operands
-    syntax: *bn-add-syntax
-    doc: |
-      Adds two WDR values and the Carry flag value, writes the result to the
-      destination WDR, and updates the flags. The content of the second source
-      WDR can be shifted by an unsigned immediate before it is consumed by the
-      operation.
-    decode: |
-      d = UInt(wrd)
-      a = UInt(wrs1)
-      b = UInt(wrs2)
-
-      fg = DecodeFlagGroup(flag_group)
-      sb = UInt(shift_bytes)
-      st = DecodeShiftType(shift_type)
-    operation: |
-      b_shifted = ShiftReg(b, st, sb)
-      (result, flags_out) = AddWithCarry(a, b_shifted, FLAGS[flag_group].C)
-
-      WDR[d] = result
-      FLAGS[flag_group] = flags_out
-    encoding:
-      scheme: bnaf
-      mapping:
-        fg: flag_group
-        shift_type: shift_type
-        shift_bytes: shift_bytes
-        wrs2: wrs2
-        wrs1: wrs1
-        funct3: b010
-        wrd: wrd
-
-  - mnemonic: bn.addi
-    group: bignum
-    synopsis: Add Immediate
-    operands:
-      - name: wrd
-        doc: Name of the destination WDR
-      - name: wrs
-        doc: Name of the source WDR
-      - name: imm
-        type: uimm
-        doc: Immediate value
-      - *bn-flag-group-operand
-    syntax: |
-      <wrd>, <wrs>, <imm> [, FG<flag_group>]
-    doc: |
-      Adds a zero-extended unsigned immediate to the value of a WDR, writes the
-      result to the destination WDR, and updates the flags.
-    decode: |
-      d = UInt(wrd)
-      a = UInt(wrs1)
-
-      fg = DecodeFlagGroup(flag_group)
-      i = ZeroExtend(imm, WLEN)
-    operation: |
-      (result, flags_out) = AddWithCarry(a, i, "0")
-
-      WDR[d] = result
-      FLAGS[flag_group] = flags_out
-    encoding:
-      scheme: bnai
-      mapping:
-        fg: flag_group
-        sub: b0
-        imm: imm
-        wrs: wrs
-        funct3: b100
-        wrd: wrd
-
-  - mnemonic: bn.addm
-    group: bignum
-    synopsis: Pseudo-Modulo Add
-    operands: [wrd, wrs1, wrs2]
-    doc: |
-      Adds two WDR values, subtracts the value of the MOD WSR once if
-      the result is equal or larger than MOD, and writes the result to
-      the destination WDR. This operation is a modulo addition if the
-      sum of the two input registers is smaller than twice the value
-      of the MOD WSR. Flags are not used or saved.
-    decode: |
-      d = UInt(wrd)
-      a = UInt(wrs1)
-      b = UInt(wrs2)
-    operation: |
-      (result, ) = AddWithCarry(a, b, "0")
-
-      if result >= MOD:
-        result = result - MOD
-
-      WDR[d] = result
-    encoding:
-      scheme: bnam
-      mapping:
-        sub: b0
-        wrs2: wrs2
-        wrs1: wrs1
-        funct3: b101
-        wrd: wrd
-
-  - mnemonic: bn.mulqacc
-    group: bignum
-    synopsis: Quarter-word Multiply and Accumulate
-    operands:
-      - &mulqacc-zero-acc
-        name: zero_acc
-        type: option(.Z)
-        doc: Zero the accumulator before accumulating the multiply result.
-      - &mulqacc-wrs1
-        name: wrs1
-        doc: First source WDR
-      - &mulqacc-wrs1-qwsel
-        name: wrs1_qwsel
-        type: uimm2
-        doc: |
-          Quarter-word select for `<wrs1>`.
-
-          Valid values:
-          - `0`: Select `wrs1[WLEN/4-1:0]` (least significant quarter-word)
-          - `1`: Select `wrs1[WLEN/2:WLEN/4]`
-          - `2`: Select `wrs1[WLEN/4*3-1:WLEN/2]`
-          - `3`: Select `wrs1[WLEN-1:WLEN/4*3]` (most significant quarter-word)
-      - &mulqacc-wrs2
-        name: wrs2
-        doc: Second source WDR
-      - &mulqacc-wrs2-qwsel
-        name: wrs2_qwsel
-        type: uimm2
-        doc: |
-          Quarter-word select for `<wrs2>`.
-
-          Valid values:
-          - `0`: Select `wrs1[WLEN/4-1:0]` (least significant quarter-word)
-          - `1`: Select `wrs1[WLEN/2:WLEN/4]`
-          - `2`: Select `wrs1[WLEN/4*3-1:WLEN/2]`
-          - `3`: Select `wrs1[WLEN-1:WLEN/4*3]` (most significant quarter-word)
-      - &mulqacc-acc-shift-imm
-        name: acc_shift_imm
-        type: uimm2
-        doc: |
-          The number of quarter-words (`WLEN/4` bits) to shift the `WLEN/2`-bit
-          multiply result before accumulating.
-    syntax: |
-      [<zero_acc>] <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>
-    glued-ops: true
-    doc: |
-      Multiplies two `WLEN/4` WDR values, shifts the product by `<acc_shift_imm>` and adds the result to the accumulator.
-
-      For versions of the instruction with writeback, see `BN.MULQACC.WO` and `BN.MULQACC.SO`.
-    decode: |
-      writeback_variant = None
-      zero_accumulator = DecodeMulqaccZeroacc(zero_acc)
-
-      d = None
-      a = UInt(wrs1)
-      b = UInt(wrs2)
-
-      d_hwsel = None
-      a_qwsel = DecodeQuarterWordSelect(wrs1_qwsel)
-      b_qwsel = DecodeQuarterWordSelect(wrs2_qwsel)
-    operation: &mulqacc-operation |
-      a_qw = GetQuarterWord(a, a_qwsel)
-      b_qw = GetQuarterWord(b, b_qwsel)
-
-      mul_res = a_qw * b_qw
-
-      if zero_accumulator:
-        ACC = 0
-
-      ACC = ACC + (mul_res << (acc_shift_imm * WLEN / 4))
-
-      if writeback_variant == 'shiftout':
-        if d_hwsel == 'L':
-          WDR[d][WLEN/2-1:0] = ACC[WLEN/2-1:0]
-        elif d_hwsel == 'U':
-          WDR[d][WLEN-1:WLEN/2] = ACC[WLEN/2-1:0]
-        ACC = ACC >> (WLEN/2)
-
-      elif writeback_variant == 'writeout':
-        WDR[d] = ACC
-    encoding:
-      scheme: bnaq
-      mapping:
-        wb: b00
-        dh: bx
-        qs2: wrs2_qwsel
-        qs1: wrs1_qwsel
-        wrs2: wrs2
-        wrs1: wrs1
-        acc: acc_shift_imm
-        z: zero_acc
-        wrd: bxxxxx
-
-  - mnemonic: bn.mulqacc.wo
-    group: bignum
-    synopsis: Quarter-word Multiply and Accumulate with half-word writeback
-    operands:
-      - *mulqacc-zero-acc
-      - &mulqacc-wrd
-        name: wrd
-        doc: Destination WDR.
-      - *mulqacc-wrs1
-      - *mulqacc-wrs1-qwsel
-      - *mulqacc-wrs2
-      - *mulqacc-wrs2-qwsel
-      - *mulqacc-acc-shift-imm
-    syntax: |
-      [<zero_acc>] <wrd>, <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>
-    glued-ops: true
-    doc: |
-      Multiplies two `WLEN/4` WDR values, shifts the product by `<acc_shift_imm>` and adds the result to the accumulator.
-      Writes the resulting accumulator to `<wrd>`.
-    decode: |
-      writeback_variant = 'writeout'
-      zero_accumulator = DecodeMulqaccZeroacc(zero_acc)
-
-      d = UInt(wrd)
-      a = UInt(wrs1)
-      b = UInt(wrs2)
-
-      d_hwsel = None
-      a_qwsel = DecodeQuarterWordSelect(wrs1_qwsel)
-      b_qwsel = DecodeQuarterWordSelect(wrs2_qwsel)
-    operation: *mulqacc-operation
-    encoding:
-      scheme: bnaq
-      mapping:
-        wb: b01
-        dh: bx
-        qs2: wrs2_qwsel
-        qs1: wrs1_qwsel
-        wrs2: wrs2
-        wrs1: wrs1
-        acc: acc_shift_imm
-        z: zero_acc
-        wrd: wrd
-
-  - mnemonic: bn.mulqacc.so
-    group: bignum
-    synopsis: Quarter-word Multiply and Accumulate with half-word writeback
-    operands:
-      - *mulqacc-zero-acc
-      - *mulqacc-wrd
-      - name: wrd_hwsel
-        type: enum(L,U)
-        doc: |
-          Half-word select for `<wrd>`.
-          A value of `L` means the less significant half-word; `U` means the more significant half-word.
-      - *mulqacc-wrs1
-      - *mulqacc-wrs1-qwsel
-      - *mulqacc-wrs2
-      - *mulqacc-wrs2-qwsel
-      - *mulqacc-acc-shift-imm
-    syntax: |
-      [<zero_acc>] <wrd>.<wrd_hwsel>,
-      <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>
-    glued-ops: true
-    doc: |
-      Multiplies two `WLEN/4` WDR values, shifts the product by `<acc_shift_imm>` and adds the result to the accumulator.
-      Next, shifts the resulting accumulator right by half a word.
-      The bits that are shifted out are written to a half-word of `<wrd>`, selected with `<wrd_hwsel>`.
-
-    decode: |
-      writeback_variant = 'shiftout'
-      zero_accumulator = DecodeMulqaccZeroacc(zero_acc)
-
-      d = UInt(wrd)
-      a = UInt(wrs1)
-      b = UInt(wrs2)
-
-      d_hwsel = DecodeHalfWordSelect(wrd_hwsel)
-      a_qwsel = DecodeQuarterWordSelect(wrs1_qwsel)
-      b_qwsel = DecodeQuarterWordSelect(wrs2_qwsel)
-    operation: *mulqacc-operation
-    encoding:
-      scheme: bnaq
-      mapping:
-        wb: b1x
-        dh: wrd_hwsel
-        qs2: wrs2_qwsel
-        qs1: wrs1_qwsel
-        wrs2: wrs2
-        wrs1: wrs1
-        acc: acc_shift_imm
-        z: zero_acc
-        wrd: wrd
-
-  - mnemonic: bn.sub
-    group: bignum
-    synopsis: Subtraction
-    operands: &bn-sub-operands
-      - name: wrd
-        doc: Name of the destination WDR
-      - name: wrs1
-        doc: Name of the first source WDR
-      - name: wrs2
-        doc: Name of the second source WDR
-      - *bn-shift-type-operand
-      - *bn-shift-bytes-operand
-      - *bn-flag-group-operand
-    syntax: *bn-add-syntax
-    doc: |
-      Subtracts the second WDR value from the first one, writes the result to the destination WDR and updates flags.
-      The content of the second source WDR can be shifted by an unsigned immediate before it is consumed by the operation.
-    decode: &bn-sub-decode |
-      d = UInt(wrd)
-      a = UInt(wrs1)
-      b = UInt(wrs2)
-
-      fg = DecodeFlagGroup(flag_group)
-      sb = UInt(shift_bytes)
-      st = DecodeShiftType(shift_type)
-    operation: |
-      b_shifted = ShiftReg(b, st, sb)
-      (result, flags_out) = AddWithCarry(a, -b_shifted, "0")
-
-      WDR[d] = result
-      FLAGS[flag_group] = flags_out
-    encoding:
-      scheme: bnaf
-      mapping:
-        fg: flag_group
-        shift_type: shift_type
-        shift_bytes: shift_bytes
-        wrs2: wrs2
-        wrs1: wrs1
-        funct3: b001
-        wrd: wrd
-
-  - mnemonic: bn.subb
-    group: bignum
-    synopsis: Subtract with borrow
-    operands: *bn-sub-operands
-    syntax: *bn-add-syntax
-    doc: |
-      Subtracts the second WDR value and the Carry from the first one, writes the result to the destination WDR, and updates the flags.
-      The content of the second source WDR can be shifted by an unsigned immediate before it is consumed by the operation.
-    decode: *bn-sub-decode
-    operation: |
-      b_shifted = ShiftReg(b, st, sb)
-      (result, flags_out) = AddWithCarry(a, -b_shifted, ~FLAGS[flag_group].C)
-
-      WDR[d] = result
-      FLAGS[flag_group] = flags_out
-    encoding:
-      scheme: bnaf
-      mapping:
-        fg: flag_group
-        shift_type: shift_type
-        shift_bytes: shift_bytes
-        wrs2: wrs2
-        wrs1: wrs1
-        funct3: b011
-        wrd: wrd
-
-  - mnemonic: bn.subi
-    group: bignum
-    synopsis: Subtract Immediate
-    operands:
-      - name: wrd
-        doc: Name of the destination WDR
-      - name: wrs
-        doc: Name of the source WDR
-      - name: imm
-        type: uimm
-        doc: Immediate value
-      - *bn-flag-group-operand
-    syntax: <wrd>, <wrs>, <imm> [, FG<flag_group>]
-    doc: |
-      Subtracts a zero-extended unsigned immediate from the value of a WDR,
-      writes the result to the destination WDR, and updates the flags.
-    decode: |
-      d = UInt(wrd)
-      a = UInt(wrs1)
-
-      fg = DecodeFlagGroup(flag_group)
-      i = ZeroExtend(imm, WLEN)
-    operation: |
-      (result, flags_out) = AddWithCarry(a, -i, "0")
-
-      WDR[d] = result
-      FLAGS[flag_group] = flags_out
-    encoding:
-      scheme: bnai
-      mapping:
-        fg: flag_group
-        sub: b1
-        imm: imm
-        wrs: wrs
-        funct3: b100
-        wrd: wrd
-
-  - mnemonic: bn.subm
-    group: bignum
-    synopsis: Pseudo-modulo subtraction
-    operands: [wrd, wrs1, wrs2]
-    doc: |
-      Subtracts the second WDR value from the first WDR value, performs a modulo operation with the MOD WSR, and writes the result to the destination WDR.
-      This operation is equivalent to a modulo subtraction as long as `wrs1 - wrs2 >= -MOD` holds.
-      This constraint is not checked in hardware.
-      Flags are not used or saved.
-    decode: |
-      d = UInt(wrd)
-      a = UInt(wrs1)
-      b = UInt(wrs2)
-    operation: |
-      (result, ) = AddWithCarry(a, -b, "0")
-
-      if result >= MOD:
-        result = result - MOD
-
-      WDR[d] = result
-    encoding:
-      scheme: bnam
-      mapping:
-        sub: b1
-        wrs2: wrs2
-        wrs1: wrs1
-        funct3: b101
-        wrd: wrd
-
-  - mnemonic: bn.and
-    group: bignum
-    synopsis: Bitwise AND
-    operands: &bn-and-operands
-      - name: wrd
-        doc: Name of the destination WDR
-      - name: wrs1
-        doc: Name of the first source WDR
-      - name: wrs2
-        doc: Name of the second source WDR
-      - *bn-shift-type-operand
-      - *bn-shift-bytes-operand
-    syntax: &bn-and-syntax |
-      <wrd>, <wrs1>, <wrs2> [, <shift_type> <shift_bytes>B]
-    doc: |
-      Performs a bitwise and operation.
-      Takes the values stored in registers referenced by `wrs1` and `wrs2` and stores the result in the register referenced by `wrd`.
-      The content of the second source register can be shifted by an immediate before it is consumed by the operation.
-    decode: &bn-and-decode |
-      d = UInt(wrd)
-      a = UInt(wrs1)
-      b = UInt(wrs2)
-
-      sb = UInt(shift_bytes)
-      st = DecodeShiftType(shift_type)
-    operation: |
-      b_shifted = ShiftReg(b, st, sb)
-      result = a & b_shifted
-
-      WDR[d] = result
-    encoding:
-      scheme: bna
-      mapping:
-        funct31: b0
-        shift_type: shift_type
-        shift_bytes: shift_bytes
-        wrs2: wrs2
-        wrs1: wrs1
-        funct3: b110
-        wrd: wrd
-
-  - mnemonic: bn.or
-    group: bignum
-    synopsis: Bitwise OR
-    operands: *bn-and-operands
-    syntax: *bn-and-syntax
-    doc: |
-      Performs a bitwise or operation.
-      Takes the values stored in WDRs referenced by `wrs1` and `wrs2` and stores the result in the WDR referenced by `wrd`.
-      The content of the second source WDR can be shifted by an immediate before it is consumed by the operation.
-    decode: *bn-and-decode
-    operation: |
-      b_shifted = ShiftReg(b, st, sb)
-      result = a | b_shifted
-
-      WDR[d] = result
-    encoding:
-      scheme: bna
-      mapping:
-        funct31: b1
-        shift_type: shift_type
-        shift_bytes: shift_bytes
-        wrs2: wrs2
-        wrs1: wrs1
-        funct3: b110
-        wrd: wrd
-
-  - mnemonic: bn.not
-    group: bignum
-    synopsis: Bitwise NOT
-    operands:
-      - name: wrd
-        doc: Name of the destination WDR
-      - name: wrs
-        doc: Name of the source WDR
-      - *bn-shift-type-operand
-      - *bn-shift-bytes-operand
-    syntax: |
-      <wrd>, <wrs> [, <shift_type> <shift_bytes>B]
-    doc: |
-      Negates the value in `<wrs>`, storing the result into `<wrd>`.
-      The source value can be shifted by an immediate before it is consumed by the operation.
-    decode: |
-      d = UInt(wrd)
-      a = UInt(wrs1)
-
-      sb = UInt(shift_bytes)
-      st = DecodeShiftType(shift_type)
-    operation: |
-      a_shifted = ShiftReg(a, st, sb)
-      result = ~a_shifted
-
-      WDR[d] = result
-    encoding:
-      scheme: bna
-      mapping:
-        funct31: b0
-        shift_type: shift_type
-        shift_bytes: shift_bytes
-        wrs2: wrs
-        wrs1: bxxxxx
-        funct3: b111
-        wrd: wrd
-
-  - mnemonic: bn.xor
-    group: bignum
-    synopsis: Bitwise XOR
-    operands: *bn-and-operands
-    syntax: *bn-and-syntax
-    doc: |
-      Performs a bitwise xor operation.
-      Takes the values stored in WDRs referenced by `wrs1` and `wrs2` and stores the result in the WDR referenced by `wrd`.
-      The content of the second source WDR can be shifted by an immediate before it is consumed by the operation.
-    decode: *bn-and-decode
-    operation: |
-      b_shifted = ShiftReg(b, st, sb)
-      result = a ^ b_shifted
-
-      WDR[d] = result
-    encoding:
-      scheme: bnaf
-      mapping:
-        fg: b1
-        shift_type: shift_type
-        shift_bytes: shift_bytes
-        wrs2: wrs2
-        wrs1: wrs1
-        funct3: b111
-        wrd: wrd
-
-  - mnemonic: bn.rshi
-    group: bignum
-    synopsis: Concatenate and right shift immediate
-    operands:
-      - name: wrd
-        doc: Name of the destination WDR
-      - name: wrs1
-        doc: Name of the first source WDR
-      - name: wrs2
-        doc: Name of the second source WDR
-      - name: imm
-        type: uimm
-        doc: |
-          Number of bits to shift the second source register by. Valid range: 0..(WLEN-1).
-    syntax: |
-      <wrd>, <wrs1>, <wrs2> >> <imm>
-    doc: |
-      The concatenation of the content from the WDRs referenced by `wrs1` and `wrs2` (`wrs1` forms the upper part) is right shifted by an immediate value and truncated to WLEN bit.
-      The result is stored in the WDR referenced by `wrd`.
-    decode: |
-      d = UInt(wrd)
-      a = UInt(wrs1)
-      b = UInt(wrs2)
-      shift_bit = Uint(imm)
-    operation: |
-      WDR[d] = (((a << WLEN) | b) >> shift_bit)[WLEN-1:0]
-    encoding:
-      scheme: bnr
-      mapping:
-        imm: imm
-        wrs2: wrs2
-        wrs1: wrs1
-        funct2: b11
-        wrd: wrd
-
-  - mnemonic: bn.sel
-    group: bignum
-    synopsis: Flag Select
-    operands:
-      - name: wrd
-        doc: Name of the destination WDR
-      - name: wrs1
-        doc: Name of the first source WDR
-      - name: wrs2
-        doc: Name of the second source WDR
-      - *bn-flag-group-operand
-      - name: flag
-        type: enum(C, M, L, Z)
-        doc: |
-          Flag to check. Valid values:
-          - C: Carry flag
-          - M: MSB flag
-          - L: LSB flag
-          - Z: Zero flag
-    syntax: |
-      <wrd>, <wrs1>, <wrs2>, [FG<flag_group>.]<flag>
-    doc: |
-      Returns in the destination WDR the value of the first source WDR if the flag in the chosen flag group is set, otherwise returns the value of the second source WDR.
-    decode: |
-      d = UInt(wrd)
-      a = UInt(wrs1)
-      b = UInt(wrs2)
-      fg = DecodeFlagGroup(flag_group)
-      flag = DecodeFlag(flag)
-    operation: |
-      flag_is_set = FLAGS[fg].get(flag)
-
-      WDR[d] = wrs1 if flag_is_set else wrs2
-    encoding:
-      scheme: bns
-      mapping:
-        fg: flag_group
-        flag: flag
-        wrs2: wrs2
-        wrs1: wrs1
-        wrd: wrd
-
-  - mnemonic: bn.cmp
-    group: bignum
-    synopsis: Compare
-    operands: &bn-cmp-operands
-      - name: wrs1
-        doc: Name of the first source WDR
-      - name: wrs2
-        doc: Name of the second source WDR
-      - *bn-shift-type-operand
-      - *bn-shift-bytes-operand
-      - *bn-flag-group-operand
-    syntax: &bn-cmp-syntax |
-      <wrs1>, <wrs2>[, <shift_type> <shift_bytes>B][, FG<flag_group>]
-    doc: |
-      Subtracts the second WDR value from the first one and updates flags.
-      This instruction is identical to BN.SUB, except that no result register is written.
-    decode: &bn-cmp-decode |
-      a = UInt(wrs1)
-      b = UInt(wrs2)
-
-      fg = DecodeFlagGroup(flag_group)
-      sb = UInt(shift_bytes)
-      st = DecodeShiftType(shift_type)
-    operation: |
-      b_shifted = ShiftReg(b, st, sb)
-      (, flags_out) = AddWithCarry(a, -b_shifted, "0")
-
-      FLAGS[flag_group] = flags_out
-    encoding:
-      scheme: bnc
-      mapping:
-        fg: flag_group
-        shift_type: shift_type
-        shift_bytes: shift_bytes
-        wrs2: wrs2
-        wrs1: wrs1
-        funct3: b001
-
-  - mnemonic: bn.cmpb
-    group: bignum
-    synopsis: Compare with Borrow
-    operands: *bn-cmp-operands
-    syntax: *bn-cmp-syntax
-    doc: |
-      Subtracts the second WDR value from the first one and updates flags.
-      This instruction is identical to BN.SUBB, except that no result register is written.
-    decode: *bn-cmp-decode
-    operation: |
-      (, flags_out) = AddWithCarry(a, -b, ~FLAGS[flag_group].C)
-
-      FLAGS[flag_group] = flags_out
-    encoding:
-      scheme: bnc
-      mapping:
-        fg: flag_group
-        shift_type: shift_type
-        shift_bytes: shift_bytes
-        wrs2: wrs2
-        wrs1: wrs1
-        funct3: b011
-
-  - mnemonic: bn.lid
-    group: bignum
-    synopsis: Load Word (indirect source, indirect destination)
-    operands:
-      - name: grd
-        doc: Name of the GPR referencing the destination WDR
-      - name: grs1
-        doc: |
-          Name of the GPR containing the memory byte address.
-          The value contained in the referenced GPR must be WLEN-aligned.
-      - name: offset
-        doc: |
-          Offset value.
-          Must be WLEN-aligned.
-      - name: grs1_inc
-        type: option(++)
-        doc: |
-          Increment the value in `<grs1>` by WLEN/8 (one word).
-          Cannot be specified together with `grd_inc`.
-      - name: grd_inc
-        type: option(++)
-        doc: |
-          Increment the value in `<grd>` by one.
-          Cannot be specified together with `grs1_inc`.
-    syntax: |
-      <grd>[<grd_inc>], <offset>(<grs1>[<grs1_inc>])
-    doc: |
-      Calculates a byte memory address by adding the offset to the value in the GPR `grs1`.
-      The value from this memory address is then copied into the WDR pointed to by the value in GPR `grd`.
-
-      After the operation, either the value in the GPR `grs1`, or the value in `grd` can be optionally incremented.
-
-      - If `grs1_inc` is set, the value in `grs1` is incremented by the value WLEN/8 (one word).
-      - If `grd_inc` is set, the value in `grd` is incremented by the value 1.
-
-      The memory address must be aligned to WLEN bytes.
-      Any address that is unaligned or is above the top of memory will result in an error (with error code `ErrCodeBadDataAddr`).
-    decode: |
-      rd = UInt(grd)
-      rs1 = UInt(grs1)
-      offset = UInt(offset)
-    operation: |
-      mem_addr = GPR[rs1] + offset
-      wdr_dest = GPR[rd]
-
-      assert not (grs1_inc and grd_inc)  # prevented in encoding
-      if mem_addr % (WLEN / 8) or mem_addr + WLEN > DMEM_SIZE:
-          raise BadDataAddr()
-
-      mem_index = mem_addr // (WLEN / 8)
-
-      WDR[wdr_dest] = LoadWlenWordFromMemory(mem_index)
-
-      if grs1_inc:
-          GPR[rs1] = GPR[rs1] + (WLEN / 8)
-      if grd_inc:
-          GPR[rd] = GPR[rd] + 1
-    lsu:
-      type: mem-load
-      target: [offset, grs1]
-      bytes: 32
-    encoding:
-      scheme: bnxid
-      mapping:
-        imm: offset
-        spp: grs1_inc
-        dpp: grd_inc
-        rs: grs1
-        funct3: b100
-        rd: grd
-
-  - mnemonic: bn.sid
-    group: bignum
-    synopsis: Store Word (indirect source, indirect destination)
-    operands:
-      - name: grs1
-        doc: |
-          Name of the GPR containing the memory byte address.
-          The value contained in the referenced GPR must be WLEN-aligned.
-      - name: grs2
-        doc: Name of the GPR referencing the source WDR.
-      - name: offset
-        doc: |
-          Offset value.
-          Must be WLEN-aligned.
-      - name: grs1_inc
-        type: option(++)
-        doc: |
-          Increment the value in `<grs1>` by WLEN/8 (one word).
-          Cannot be specified together with `grs2_inc`.
-      - name: grs2_inc
-        type: option(++)
-        doc: |
-          Increment the value in `<grs2>` by one.
-          Cannot be specified together with `grs1_inc`.
-    syntax: |
-      <grs1>[<grs1_inc>], <offset>(<grs2>[<grs2_inc>])
-    doc: |
-      Calculates a byte memory address by adding the offset to the value in the GPR `grs1`.
-      The value from the WDR pointed to by `grs2` is then copied into the memory.
-
-      After the operation, either the value in the GPR `grs1`, or the value in `grs2` can be optionally incremented.
-
-      - If `grs1_inc` is set, the value in `grs1` is incremented by the value WLEN/8 (one word).
-      - If `grs2_inc` is set, the value in `grs2` is incremented by the value 1.
-
-      The memory address must be aligned to WLEN bytes.
-      Any address that is unaligned or is above the top of memory will result in an error (with error code `ErrCodeBadDataAddr`).
-    decode: |
-      rs1 = UInt(grs1)
-      rs2 = UInt(grs2)
-      offset = UInt(offset)
-    operation: |
-      mem_addr = GPR[rs1] + offset
-      wdr_src = GPR[rs2]
-
-      assert not (grs1_inc and grd_inc)  # prevented in encoding
-      if mem_addr % (WLEN / 8) or mem_addr + WLEN > DMEM_SIZE:
-          raise BadDataAddr()
-
-      mem_index = mem_addr // (WLEN / 8)
-
-      StoreWlenWordToMemory(mem_index, WDR[wdr_src])
-
-      if grs1_inc:
-          GPR[rs1] = GPR[rs1] + (WLEN / 8)
-      if grs2_inc:
-          GPR[rs2] = GPR[rs2] + 1
-    lsu:
-      type: mem-store
-      target: [offset, grs1]
-      bytes: 32
-    encoding:
-      scheme: bnxid
-      mapping:
-        imm: offset
-        spp: grs1_inc
-        dpp: grs2_inc
-        rs: grs1
-        funct3: b101
-        rd: grs2
-
-  - mnemonic: bn.mov
-    group: bignum
-    synopsis: Copy content between WDRs (direct addressing)
-    operands: [wrd, wrs]
-    decode: |
-      s = UInt(wrs)
-      d = UInt(wrd)
-    operation: WDR[d] = WDR[s]
-    encoding:
-      scheme: bnmov
-      mapping:
-        indirect: b0
-        spp: bx
-        dpp: bx
-        src: wrs
-        dst: wrd
-
-  - mnemonic: bn.movr
-    group: bignum
-    synopsis: Copy content between WDRs (register-indirect addressing)
-    operands:
-      - name: grd
-        doc: Name of the GPR containing the destination WDR.
-      - name: grs
-        doc: Name of the GPR referencing the source WDR.
-      - name: grd_inc
-        type: option(++)
-        doc: |
-          Increment the value in `<grd>` by one.
-          Cannot be specified together with `grs_inc`.
-      - name: grs_inc
-        type: option(++)
-        doc: |
-          Increment the value in `<grs>` by one.
-          Cannot be specified together with `grd_inc`.
-    syntax: |
-      <grd>[<grd_inc>], <grs>[<grs_inc>]
-    doc: |
-      Copy WDR contents between registers with indirect addressing.
-      Optionally, either the source or the destination register address can be incremented by 1.
-    decode: |
-      s = UInt(grs)
-      d = UInt(grd)
-    operation: |
-      WDR[GPR[d]] = WDR[GPR[s]]
-
-      if grs_inc:
-        GPR[s] = GPR[s] + 1
-      if grd_inc:
-        GPR[d] = GPR[d] + 1
-    encoding:
-      scheme: bnmov
-      mapping:
-        indirect: b1
-        spp: grs_inc
-        dpp: grd_inc
-        src: grs
-        dst: grd
-
-  - mnemonic: bn.wsrrs
-    group: bignum
-    synopsis: Atomic Read and Set Bits in WSR
-    operands: [wrd, wsr, wrs]
-    encoding:
-      scheme: wcsr
-      mapping:
-        write: b0
-        wcsr: wsr
-        wrs: wrs
-        wrd: wrd
-    lsu:
-      type: wsr
-      target: [wsr]
-
-  - mnemonic: bn.wsrrw
-    group: bignum
-    synopsis: Atomic Read/Write WSR
-    operands: [wrd, wsr, wrs]
-    encoding:
-      scheme: wcsr
-      mapping:
-        write: b1
-        wcsr: wsr
-        wrs: wrs
-        wrd: wrd
-    lsu:
-      type: wsr
-      target: [wsr]
+      All Big Number (BN) instructions operate on the Wide Data Registers (WDRs).
+    insns: bignum-insns.yml

diff --git a/hw/ip/otbn/util/shared/insn_yaml.py b/hw/ip/otbn/util/shared/insn_yaml.py
index 0065567..1c7690d 100644
--- a/hw/ip/otbn/util/shared/insn_yaml.py
+++ b/hw/ip/otbn/util/shared/insn_yaml.py

@@ -7,7 +7,7 @@
 import itertools
 import os
 import re
-from typing import Dict, List, Optional, Tuple, cast
+from typing import List, Optional, Tuple, cast
 
 from .encoding import Encoding
 from .encoding_scheme import EncSchemes
@@ -19,33 +19,9 @@
                                  load_yaml)
 
 
-class InsnGroup:
-    def __init__(self, yml: object) -> None:
-        yd = check_keys(yml, 'insn-group', ['key', 'title', 'doc'], [])
-        self.key = check_str(yd['key'], 'insn-group key')
-        self.title = check_str(yd['title'], 'insn-group title')
-        self.doc = check_str(yd['doc'], 'insn-group doc')
-
-
-class InsnGroups:
-    def __init__(self, yml: object) -> None:
-        self.groups = [InsnGroup(y) for y in check_list(yml, 'insn-groups')]
-        if not self.groups:
-            raise ValueError('Empty list of instruction groups: '
-                             'we need at least one as a base group.')
-        self.key_to_group = index_list('insn-groups',
-                                       self.groups, lambda ig: ig.key)
-
-    def default_group(self) -> str:
-        '''Get the name of the default instruction group'''
-        assert self.groups
-        return self.groups[0].key
-
-
 class Insn:
     def __init__(self,
                  yml: object,
-                 groups: InsnGroups,
                  encoding_schemes: Optional[EncSchemes]) -> None:
         yd = check_keys(yml, 'instruction',
                         ['mnemonic', 'operands'],
@@ -65,14 +41,6 @@
                                           self.operands,
                                           lambda op: op.name)
 
-        raw_group = get_optional_str(yd, 'group', what)
-        self.group = groups.default_group() if raw_group is None else raw_group
-
-        if self.group not in groups.key_to_group:
-            raise ValueError('Unknown instruction group, {!r}, '
-                             'for mnemonic {!r}.'
-                             .format(self.group, self.mnemonic))
-
         self.rv32i = check_bool(yd.get('rv32i', False),
                                 'rv32i flag for ' + what)
         self.glued_ops = check_bool(yd.get('glued-ops', False),
@@ -207,14 +175,51 @@
     return ret
 
 
+class InsnGroup:
+    def __init__(self,
+                 path: str,
+                 encoding_schemes: Optional[EncSchemes],
+                 yml: object) -> None:
+
+        yd = check_keys(yml, 'insn-group',
+                        ['key', 'title', 'doc', 'insns'], [])
+        self.key = check_str(yd['key'], 'insn-group key')
+        self.title = check_str(yd['title'], 'insn-group title')
+        self.doc = check_str(yd['doc'], 'insn-group doc')
+
+        insns_what = 'insns field for {!r} instruction group'.format(self.key)
+        insns_rel_path = check_str(yd['insns'], insns_what)
+        insns_path = os.path.normpath(os.path.join(os.path.dirname(path),
+                                                   insns_rel_path))
+        insns_yaml = load_yaml(insns_path, insns_what)
+        try:
+            self.insns = [Insn(i, encoding_schemes)
+                          for i in check_list(insns_yaml, insns_what)]
+        except ValueError as err:
+            raise RuntimeError('Invalid schema in YAML file at {!r}: {}'
+                               .format(insns_path, err)) from None
+
+
+class InsnGroups:
+    def __init__(self,
+                 path: str,
+                 encoding_schemes: Optional[EncSchemes],
+                 yml: object) -> None:
+        self.groups = [InsnGroup(path, encoding_schemes, y)
+                       for y in check_list(yml, 'insn-groups')]
+        if not self.groups:
+            raise ValueError('Empty list of instruction groups: '
+                             'we need at least one as a base group.')
+        self.key_to_group = index_list('insn-groups',
+                                       self.groups, lambda ig: ig.key)
+
+
 class InsnsFile:
     def __init__(self, path: str, yml: object) -> None:
         yd = check_keys(yml, 'top-level',
-                        ['insn-groups', 'insns'],
+                        ['insn-groups'],
                         ['encoding-schemes'])
 
-        self.groups = InsnGroups(yd['insn-groups'])
-
         enc_scheme_path = get_optional_str(yd, 'encoding-schemes', 'top-level')
         if enc_scheme_path is None:
             self.encoding_schemes = None
@@ -228,8 +233,17 @@
                 raise RuntimeError('Invalid schema in YAML file at {!r}: {}'
                                    .format(es_path, err)) from None
 
-        self.insns = [Insn(i, self.groups, self.encoding_schemes)
-                      for i in check_list(yd['insns'], 'insns')]
+        self.groups = InsnGroups(path,
+                                 self.encoding_schemes,
+                                 yd['insn-groups'])
+
+        # The instructions are grouped by instruction group and stored in
+        # self.groups. Most of the time, however, we just want "an OTBN
+        # instruction" and don't care about the group. Retrieve them here.
+        self.insns = []
+        for grp in self.groups.groups:
+            self.insns += grp.insns
+
         self.mnemonic_to_insn = index_list('insns', self.insns,
                                            lambda insn: insn.mnemonic.lower())
 
@@ -245,22 +259,7 @@
 
     def grouped_insns(self) -> List[Tuple[InsnGroup, List[Insn]]]:
         '''Return the instructions in groups'''
-        grp_to_insns = {}  # type: Dict[str, List[Insn]]
-        for insn in self.insns:
-            grp_to_insns.setdefault(insn.group, []).append(insn)
-
-        ret = []
-        for grp in self.groups.groups:
-            ret.append((grp, grp_to_insns.get(grp.key, [])))
-
-        # We should have picked up all the instructions, because we checked
-        # that each instruction has a valid group in the Insn constructor. Just
-        # in case something went wrong, check that the counts match.
-        gti_count = sum(len(insns) for insns in grp_to_insns.values())
-        ret_count = sum(len(insns) for _, insns in ret)
-        assert ret_count == gti_count
-
-        return ret
+        return [(grp, grp.insns) for grp in self.groups.groups]
 
 
 def load_file(path: str) -> InsnsFile:
commit	124c94faac8c98a86208f400540ecb94c6f0375d	[log] [tgz]
author	Rupert Swarbrick <rswarbrick@lowrisc.org>	Thu Aug 20 12:49:19 2020 +0100
committer	Rupert Swarbrick <rswarbrick@gmail.com>	Thu Aug 20 13:52:10 2020 +0100
tree	b6016c7603a72c4037fedf4e18d9431592bc6d10
parent	9d04bdadbb15d6db7a2542cdba34ae9f411adacd [diff]