hw/ip/otbn/data/insns.yml - 3p/lowrisc/opentitan - Git at Google

 # Copyright lowRISC contributors.
 # Licensed under the Apache License, Version 2.0, see LICENSE for details.
 # SPDX-License-Identifier: Apache-2.0

 # A machine readable description of the ISA
 #
 # This is used for generating documentation, but also for random test
 # and decoder generation.

 # The instruction groups (valid values for the "group" field in
 # instruction entries). Used for documentation where groups appear in
 # the order of this list and instructions are listed by group. This
 # list must be nonempty.
 insn-groups:
   - key: base
     title: Base Instruction Subset
     doc: |
       The base instruction set of OTBN is a limited 32b instruction set.
       It is used together with the 32b wide General Purpose Register file.
       The primary use of the base instruction set is the control flow in applications.

       The base instruction set is an extended subset of [RISC-V's RV32I_Zcsr](https://riscv.org/specifications/isa-spec-pdf/).
       Refer to the [RISC-V Unprivileged Specification](https://riscv.org/specifications/isa-spec-pdf/) for a detailed instruction specification.
       Not all RV32 instructions are implemented.
       The implemented subset is shown below.
       Many instructions in the base instruction set have an equivalent in the big number instruction subset, enabling processor logic to be shared between the instruction subsets.
   - key: bignum
     title: Big Number Instruction Subset
     doc: |
       All Big Number (BN) instructions operate on the wide register file WREG.

 # Instruction encoding schemes
 #
 # These define the mapping between instruction operands and bits in the
 # encoding. A scheme names zero or more named fields. It can also inherit from
 # zero or more other schemes.
 #
 # The direct fields of a scheme are defined as a dictionary, mapping a field
 # name (which will be matched up with instruction operands) to a value. In
 # general, this value is itself a dictionary with the following keys:
 #
 #  bits: A list of ranges of bits. A range is written <msb>-<lsb>, where both
 #        are integers (and msb >= lsb). Multiple ranges can be separated by
 #        commas. A degenerate range (with msb == lsb) can be written as a bare
 #        integer. Required.
 #
 #  value: Optional. If specified, this should be a binary string for a fixed
 #         value for this field, prefixed with a "b" (to avoid the YAML parser
 #         reading it as a decimal number). Underscores in the string are
 #         ignored (to make it easier to show grouping) and 'x' means don't
 #         care.
 #
 #  shift: Optional. If specified, this is the number of bits to shift the
 #         encoded value left to get the logical value.
 #
 # For brevity, if value and shift have their default values, the bits string
 # can be used as the value for the field.
 #
 # A scheme can inherit from other schemes by listing their names in a 'parents'
 # attribute. If the child scheme needs to set the value of a parents' field to
 # something fixed, it can do so with the following syntax:
 #
 #     parent_name(field_name=b11101, field_name2=b111)
 #
 # The fields of a scheme are recursively defined to be its direct fields plus
 # the fields all its ancestors.
 #
 # A scheme is called complete if its fields cover the entire range of bits
 # (0-31) and partial otherwise.

 encoding-schemes:
   # A partial scheme that sets the bottom two bits to 2'b11 (as for all RISC-V
   # uncompressed instructions) and defines an 'opcode' field for bits 6-2
   # (standard for RV32I instructions)
   rv:
     fields:
       opcode: 6-2
       uncomp:
         bits: 1-0
         value: b11

   # A partial scheme defining a funct3 field in bits 14-12 (used in most RV32I
   # instructions, and most BN.* custom instructions)
   funct3:
     fields:
       funct3: 14-12

   # RISC-V "R-type" encoding (reg <- fun(reg, reg))
   R:
     parents:
       - rv
       - funct3
     fields:
       funct7: 31-25
       rs2: 24-20
       rs1: 19-15
       rd: 11-7

   # RISC-V "I-type" encoding (reg <- fun(imm, reg))
   I:
     parents:
       - rv
       - funct3
     fields:
       imm: 31-20
       rs1: 19-15
       rd: 11-7

   # RISC-V "I-type" encoding sub-type for shifts (reg <- fun(imm, reg))
   Is:
     parents:
       - rv
       - funct3
     fields:
       arithmetic: 30
       shamt: 24-20
       rs1: 19-15
       rd: 11-7
       unused:
         bits: 31,29-25
         value: b000000

   # RISC-V "S-type" encoding (_ <- fun(reg, imm))
   S:
     parents:
       - rv
       - funct3
     fields:
       imm: 31-25,11-7
       rs2: 24-20
       rs1: 19-15

   # RISC-V "B-type" encoding (like S, but different immediate layout; used for
   # branches)
   B:
     parents:
       - rv
       - funct3
     fields:
       imm:
         bits: 31,7,30-25,11-8
         shift: 1
       rs2: 24-20
       rs1: 19-15

   # RISC-V "U-type" encoding (reg <- fun(imm))
   U:
     parents:
       - rv
     fields:
       imm:
         bits: 31-12
         shift: 12
       rd: 11-7

   # RISC-V "J-type" encoding (like U, but different immediate layout; used for
   # jumps)
   J:
     parents:
       - rv
     fields:
       imm:
         bits: 31,19-12,20,30-21
         shift: 1
       rd: 11-7

   # A partial scheme for custom instructions with opcode b00010
   custom0:
     parents:
       - rv(opcode=b00010)

   # A partial scheme for custom instructions with opcode b01010
   custom1:
     parents:
       - rv(opcode=b01010)

   # A partial scheme for custom instructions with opcode b01110
   custom2:
     parents:
       - rv(opcode=b01110)

   # A partial scheme for custom instructions with opcode b11110
   custom3:
     parents:
       - rv(opcode=b11110)

   # A partial scheme for instructions that produce a dest WDR.
   wrd:
     fields:
       wrd: 11-7

   # A partial scheme for instructions that take two source WDRs and produce a
   # dest WDR.
   wdr3:
     parents:
       - wrd
     fields:
       wrs2: 24-20
       wrs1: 19-15

   # A partial scheme that defines the 'fg' field (for <flag_group> operands)
   fg:
     fields:
       fg: 31

   # A partial scheme that defines the shift fields (type and bytes)
   shift:
     fields:
       shift_type: 30
       shift_bytes: 29-25

   # A partial scheme that defines a function field at bit 31 for OTBN logical
   # operations
   funct31:
     fields:
       funct31: 31

   # A partial scheme for specialized 2 bit function field, we need a reduced
   # size in the lower two bits of funct3 as RSHI spills over 1 bit from its
   # immediate
   funct2:
     fields:
       funct2: 13-12

   # A specialised encoding for the loop instruction (only one source, no
   # destination)
   loop:
     parents:
       - custom3
       - funct2(funct2=b00)
     fields:
       bodysize: 31-20
       grs: 19-15
       fixed:
         bits: 14,11-7
         value: bxxxxxx

   # A specialised encoding for the loopi instruction (which, unusually, has 2
   # immediates)
   loopi:
     parents:
       - custom3
       - funct2(funct2=b01)
     fields:
       bodysize: 31-20
       iterations: 19-15,11-7
       fixed:
         bits: 14
         value: bx

   # Used wide logical operations (bn.and, bn.or, bn.xor).
   bna:
     parents:
       - custom1
       - wdr3
       - funct3
       - shift
       - funct31

   # Used for bn.not (no second source reg).
   bnan:
     parents:
       - custom1
       - shift
       - funct31
       - wrd
     fields:
       wrs1: 24-20
       fixed:
         bits: 19-15
         value: bxxxxx

   # Used for the wide reg/reg ALU instructions.
   bnaf:
     parents:
       - custom1
       - wdr3
       - funct3
       - shift
       - fg

   # Used for the wide bn.addi and bn.subi instructions.
   bnai:
     parents:
       - custom1
       - wrd
       - funct3
       - fg
     fields:
       sub: 30
       imm: 29-20
       wrs: 19-15

   # Used for bn.addm, bn.subm
   bnam:
     parents:
       - custom1
       - wdr3
       - funct3
     fields:
       sub: 30
       fixed:
         bits: 31,29-25
         value: bxxxxxx

   # Used for bn.mulqacc
   bnaq:
     parents:
       - custom2
       - wdr3
     fields:
       wb: 31-30
       dh: 29
       qs2: 28-27
       qs1: 26-25
       acc:
         bits: 14-13
         shift: 6
       z: 12

   # Unusual scheme used for bn.rshi (the immediate bleeds into the usual funct3
   # field)
   bnr:
     parents:
       - custom3
       - wdr3
     fields:
       imm: 31-25,14
       funct2: 13-12

   # Used by bn.sel.
   bns:
     parents:
       - custom0
       - wdr3
       - funct3(funct3=b000)
       - fg
     fields:
       fixed:
         bits: 30-27
         value: bxxxx
       flag: 26-25

   # Used by bn.cmp and bn.cmpb
   bnc:
     parents:
       - custom0
       - wdr3(wrd=bxxxxx)
       - funct3
       - shift
       - fg

   # Used by bn.lid and bn.sid
   bnxid:
     parents:
       - custom0
       - funct3
     fields:
       imm:
         bits: 24-22,31-25
         shift: 4
       spp: 21
       dpp: 20
       rs: 19-15
       rd: 11-7

   # Used by bn.mov and bn.movr
   bnmov:
     parents:
       - custom0
       - funct3(funct3=b110)
     fields:
       indirect: 31
       fixed_top:
         bits: 30-22
         value: bxxxxxxxxx
       spp: 21
       dpp: 20
       src: 19-15
       dst: 11-7

   # Used by bn.wsrrs and bn.wsrrw
   wcsr:
     parents:
       - custom0
       - funct3(funct3=b111)
     fields:
       write: 31
       wcsr: 27-20
       wrs: 19-15
       wrd: 11-7
       fixed:
         bits: 30-28
         value: bxxx

 # The instructions. Instructions are listed in the given order within
 # each instruction group. There are the following fields:
 #
 #  mnemonic:  Instruction mnemonic (required)
 #
 #  group:     The instruction group in which this instruction should
 #             appear. Defaults to the first entry in the insn-groups
 #             list. (optional)
 #
 #  rv32i:     A boolean. If true, this instruction came from the RV32I ISA.
 #             Optional, defaults to false.
 #
 #  synopsis:  A longer name for this instruction. If set, used as a subtitle in
 #             the generated documentation. (optional)
 #
 #  operands:  A list of operand names. These have a special syntax, described
 #             below. (required)
 #
 #  syntax:    The syntax for the operands to the instruction. If not given,
 #             this is assumed to be the operands in order, separated by commas.
 #             If given, it should be a string with operand names in angle
 #             brackets ("<foo>, <bar>"). Any other non-whitespace characters
 #             are taken to be required literal syntax. So "foo<bar>" means "the
 #             string 'foo', followed by the bar operand".
 #
 #  doc:       Documentation for the instruction in markdown. (optional)
 #
 #  decode:    Python pseudocode for decoding instruction objects (optional)
 #
 #  operation: Python pseudocode for the operation of the instruction (optional)
 #
 #  note:      Text that should appear in a callout banner at the top of the
 #             instruction documentation. (optional)
 #
 #  glued-ops: A boolean. If true, the first operand in the syntax can appear
 #             immediately after the mnemonic (with no space). Optional,
 #             defaults to false.
 #
 #  trailing-doc: Documentation that should appear after the syntax example but
 #                before the operand table. Useful for things like alternative
 #                assembly syntax, or deviations from the usual meaning of the
 #                instruction. (optional)
 #
 # The operands field should be a list, corresponding to the operands in the
 # order they will appear in the syntax. Each operand is either a string (the
 # operand name) or a dictionary. In the latter case, it has the following
 # fields:
 #
 #  name:      The name of the operand. Required and must be unique.
 #
 #  type:      The type of the operand. A string. If this can be figured out
 #             from the operand name, it is optional. See below for a list of
 #             possible operand types and the rules for recognising them
 #             automatically.
 #
 #  doc:       A fragment of markdown that documents the operand
 #
 # The valid operand types are as follows:
 #
 #  gpr:       The name of a general purpose register. Syntax "grs" for a
 #             source; "grd" for a destination.
 #
 #  wdr:       The name of a wide register. Syntax "wrs" for a source;
 #             "wrd" for a destination.
 #
 #  csr:       The name of a CSR. Syntax "csr" (always considered a destination)
 #
 #  wsr:       The name of a WSR. Syntax "wsr" (always considered a destination)
 #
 #  immediate: An immediate operand. Syntax "imm<n>" where n is the number of
 #             bits or just imm for an unspecified width.
 #
 #  enum:      An immediate with weird syntax. The syntax is "enum(a,b,c,d)"
 #             where a..d are the different possible syntaxes that can be used.
 #             The values are interpreted as immediates in enumeration order (so
 #             a is 0; d is 3).
 #
 #  option:    A 1-bit immediate with weird syntax. Written "option(foo)" to
 #             mean that if the string "foo" appears then the immediate has
 #             value 1 and if it doesn't the immediate has value 0.
 #
 # Operand types are inferred from names as follows:
 #
 #  grd:        GPR destination
 #  grs, grs<n> GPR source
 #  wrd:        WDR destination
 #  wrs, wrs<n> WDR source
 #  csr         CSR name
 #  wsr         WSR name
 #  imm, imm<n> Immediate
 #  offset      Immediate (unspecified width)

 insns:
   - mnemonic: add
     rv32i: true
     synopsis: Add
     operands: [grd, grs1, grs2]
     encoding:
       scheme: R
       mapping:
         funct7: b0000000
         rs2: grs2
         rs1: grs1
         funct3: b000
         rd: grd
         opcode: b01100

   - mnemonic: addi
     rv32i: true
     synopsis: Add Immediate
     operands: [grd, grs1, imm]
     encoding:
       scheme: I
       mapping:
         imm: imm
         rs1: grs1
         funct3: b000
         rd: grd
         opcode: b00100

   - mnemonic: lui
     rv32i: true
     synopsis: Load Upper Immediate
     operands: [grd, imm]
     encoding:
       scheme: U
       mapping:
         imm: imm
         rd: grd
         opcode: b01101

   - mnemonic: sub
     rv32i: true
     synopsis: Subtract
     operands: [grd, grs1, grs2]
     encoding:
       scheme: R
       mapping:
         funct7: b0100000
         rs2: grs2
         rs1: grs1
         funct3: b000
         rd: grd
         opcode: b01100

   - mnemonic: sll
     rv32i: true
     synopsis: Logical left shift
     operands: [grd, grs1, grs2]
     encoding:
       scheme: R
       mapping:
         funct7: b0000000
         rs2: grs2
         rs1: grs1
         funct3: b001
         rd: grd
         opcode: b01100

   - mnemonic: slli
     rv32i: true
     synopsis: Logical left shift with Immediate
     operands:
       - grd
       - grs1
       - &shamt-operand
         name: shamt
         type: imm
     encoding:
       scheme: Is
       mapping:
         arithmetic: b0
         shamt: shamt
         rs1: grs1
         funct3: b001
         rd: grd
         opcode: b00100

   - mnemonic: srl
     rv32i: true
     synopsis: Logical right shift
     operands: [grd, grs1, grs2]
     encoding:
       scheme: R
       mapping:
         funct7: b0000000
         rs2: grs2
         rs1: grs1
         funct3: b101
         rd: grd
         opcode: b01100

   - mnemonic: srli
     rv32i: true
     synopsis: Logical right shift with Immediate
     operands:
       - grd
       - grs1
       - *shamt-operand
     encoding:
       scheme: Is
       mapping:
         arithmetic: b0
         shamt: shamt
         rs1: grs1
         funct3: b101
         rd: grd
         opcode: b00100

   - mnemonic: sra
     rv32i: true
     synopsis: Arithmetic right shift
     operands: [grd, grs1, grs2]
     encoding:
       scheme: R
       mapping:
         funct7: b0100000
         rs2: grs2
         rs1: grs1
         funct3: b101
         rd: grd
         opcode: b01100

   - mnemonic: srai
     rv32i: true
     synopsis: Arithmetic right shift with Immediate
     operands:
       - grd
       - grs1
       - *shamt-operand
     encoding:
       scheme: Is
       mapping:
         arithmetic: b1
         shamt: shamt
         rs1: grs1
         funct3: b101
         rd: grd
         opcode: b00100

   - mnemonic: and
     rv32i: true
     synopsis: Bitwise AND
     operands: [grd, grs1, grs2]
     encoding:
       scheme: R
       mapping:
         funct7: b0000000
         rs2: grs2
         rs1: grs1
         funct3: b111
         rd: grd
         opcode: b01100

   - mnemonic: andi
     rv32i: true
     synopsis: Bitwise AND with Immediate
     operands: [grd, grs1, imm]
     encoding:
       scheme: I
       mapping:
         imm: imm
         rs1: grs1
         funct3: b111
         rd: grd
         opcode: b00100

   - mnemonic: or
     rv32i: true
     synopsis: Bitwise OR
     operands: [grd, grs1, grs2]
     encoding:
       scheme: R
       mapping:
         funct7: b0000000
         rs2: grs2
         rs1: grs1
         funct3: b110
         rd: grd
         opcode: b01100

   - mnemonic: ori
     rv32i: true
     synopsis: Bitwise OR with Immediate
     operands: [grd, grs1, imm]
     encoding:
       scheme: I
       mapping:
         imm: imm
         rs1: grs1
         funct3: b110
         rd: grd
         opcode: b00100

   - mnemonic: xor
     rv32i: true
     synopsis: Bitwise XOR
     operands: [grd, grs1, grs2]
     encoding:
       scheme: R
       mapping:
         funct7: b0000000
         rs2: grs2
         rs1: grs1
         funct3: b100
         rd: grd
         opcode: b01100

   - mnemonic: xori
     rv32i: true
     synopsis: Bitwise XOR with Immediate
     operands: [grd, grs, imm]
     encoding:
       scheme: I
       mapping:
         imm: imm
         rs1: grs
         funct3: b100
         rd: grd
         opcode: b00100

   - mnemonic: lw
     rv32i: true
     synopsis: Load Word
     operands: [grd, offset, grs1]
     syntax: <grd>, <offset>(<grs1>)
     encoding:
       scheme: I
       mapping:
         imm: offset
         rs1: grs1
         funct3: b010
         rd: grd
         opcode: b00000
     trailing-doc: |
       Unaligned accesses are not supported.
       Any unaligned access will result in an error.

   - mnemonic: sw
     rv32i: true
     synopsis: Store Word
     operands: [grs2, offset, grs1]
     syntax: <grs2>, <offset>(<grs1>)
     encoding:
       scheme: S
       mapping:
         imm: offset
         rs2: grs2
         rs1: grs1
         funct3: b010
         opcode: b01000
     trailing-doc: |
       Unaligned accesses are not supported.
       Any unaligned access will result in an error.

   - mnemonic: beq
     rv32i: true
     synopsis: Branch Equal
     operands: [grs1, grs2, offset]
     encoding:
       scheme: B
       mapping:
         imm: offset
         rs2: grs2
         rs1: grs1
         funct3: b000
         opcode: b11000

   - mnemonic: bne
     rv32i: true
     synopsis: Branch Not Equal
     operands: [grs1, grs2, offset]
     encoding:
       scheme: B
       mapping:
         imm: offset
         rs2: grs2
         rs1: grs1
         funct3: b001
         opcode: b11000

   - mnemonic: jal
     rv32i: true
     synopsis: Jump And Link
     operands: [grd, offset]
     trailing-doc: |
       The JAL instruction has the same behavior as in RV32I, jumping by the given offset and writing `PC+4` as a link address to the destination register.
       OTBN has a hardware managed call stack, accessed through `x1`, which should be used when calling subroutines.
       Do so by using `x1` as the link register: `jal x1, <offset>`.
     encoding:
       scheme: J
       mapping:
         imm: offset
         rd: grd
         opcode: b11011

   - mnemonic: jalr
     rv32i: true
     synopsis: Jump And Link Register
     operands: [grd, grs1, offset]
     trailing-doc: |
       The JALR instruction has the same behavior as in RV32I, jumping by `<grs1> + <offset>` and writing `PC+4` as a link address to the destination register.
       OTBN has a hardware managed call stack, accessed through `x1`, which should be used when calling and returning from subroutines.
       To return from a subroutine, use `jalr x0, x1, 0`.
       This pops a link address from the call stack and branches to it.
       To call a subroutine through a function pointer, use `jalr x1, <grs1>, 0`.
       This jumps to the address in `<grs1>` and pushes the link address onto the call stack.
     encoding:
       scheme: I
       mapping:
         imm: offset
         rs1: grs1
         funct3: b000
         rd: grd
         opcode: b11001

   - mnemonic: csrrs
     rv32i: true
     synopsis: Atomic Read and Set bits in CSR
     operands: [grd, csr, grs]
     encoding:
       scheme: I
       mapping:
         imm: csr
         rs1: grs
         funct3: b010
         rd: grd
         opcode: b11100

   - mnemonic: csrrw
     rv32i: true
     synopsis: Atomic Read/Write CSR
     operands: [grd, csr, grs]
     encoding:
       scheme: I
       mapping:
         imm: csr
         rs1: grs
         funct3: b001
         rd: grd
         opcode: b11100

   - mnemonic: ecall
     rv32i: true
     synopsis: Environment Call
     operands: []
     doc: |
       Triggers the `done` interrupt to indicate the completion of the
       operation.
     encoding:
       scheme: I
       mapping:
         imm: b000000000000
         rs1: b00000
         funct3: b000
         rd: b00000
         opcode: b11100

   - mnemonic: loop
     synopsis: Loop (indirect)
     operands:
       - name: grs
         doc: Name of the GPR containing the number of iterations
       - &bodysize-operand
         name: bodysize
         type: imm
         doc: Number of instructions in the loop body
     note: &loop-note |
       The LOOP and LOOPI instructions are under-specified, and improvements
       to them are being discussed. See
       https://github.com/lowRISC/opentitan/issues/2496 for up-to-date
       information.
     doc: |
       Repeat a sequence of code multiple times. The number of iterations is a
       GPR value. The length of the loop is given as immediate.
     trailing-doc: |
       Alternative assembly notation: The size of the loop body is given by the
       number of instructions in the parentheses.

       ```
       LOOP <grs> (
         # loop body
       )
       ```
     encoding:
       scheme: loop
       mapping:
         bodysize: bodysize
         grs: grs

   - mnemonic: loopi
     synopsis: Loop Immediate
     operands:
       - name: iterations
         type: imm
         doc: Number of iterations
       - *bodysize-operand
     note: *loop-note
     doc: |
       Repeat a sequence of code multiple times. The number of iterations is
       given as an immediate, as is the length of the loop. The number of
       iterations must be larger than zero.
     trailing-doc: |
       Alternative assembly notation. The size of the loop body is given by the
       number of instructions in the parentheses.

       ```
       LOOPI <iterations> (
         # loop body
       )
       ```
     encoding:
       scheme: loopi
       mapping:
         bodysize: bodysize
         iterations: iterations

   - mnemonic: bn.add
     group: bignum
     synopsis: Add
     operands: &bn-add-operands
       - name: wrd
         doc: Name of the destination WDR
       - name: wrs1
         doc: Name of the first source WDR
       - name: wrs2
         doc: Name of the second source WDR
       - &bn-shift-type-operand
         name: shift_type
         type: enum(<<, >>)
         doc: |
           The direction of an optional shift applied to `<wrs2>`.
       - &bn-shift-bytes-operand
         name: shift_bytes
         type: imm5
         doc: |
           Number of bytes by which to shift `<wrs2>`. Defaults to 0.
       - &bn-flag-group-operand
         name: flag_group
         type: imm1
         doc: Flag group to use. Defaults to 0.
     syntax: &bn-add-syntax |
       <wrd>, <wrs1>, <wrs2>[<shift_type> <shift_bytes>B][, FG<flag_group>]
     doc: |
       Adds two WDR values, writes the result to the destination WDR and updates
       flags. The content of the second source WDR can be shifted by an
       immediate before it is consumed by the operation.
     decode: |
       d = UInt(wrd)
       a = UInt(wrs1)
       b = UInt(wrs2)

       fg = DecodeFlagGroup(flag_group)
       sb = UInt(shift_bytes)
       st = DecodeShiftType(shift_type)
     operation: |
       b_shifted = ShiftReg(b, st, sb)
       (result, flags_out) = AddWithCarry(a, b_shifted, "0")

       WDR[d] = result
       FLAGS[flag_group] = flags_out
     encoding:
       scheme: bnaf
       mapping:
         fg: flag_group
         shift_type: shift_type
         shift_bytes: shift_bytes
         wrs2: wrs2
         wrs1: wrs1
         funct3: b000
         wrd: wrd

   - mnemonic: bn.addc
     group: bignum
     synopsis: Add with Carry
     operands: *bn-add-operands
     syntax: *bn-add-syntax
     doc: |
       Adds two WDR values and the Carry flag value, writes the result to the
       destination WDR, and updates the flags. The content of the second source
       WDR can be shifted by an immediate before it is consumed by the
       operation.
     decode: |
       d = UInt(wrd)
       a = UInt(wrs1)
       b = UInt(wrs2)

       fg = DecodeFlagGroup(flag_group)
       sb = UInt(shift_bytes)
       st = DecodeShiftType(shift_type)
     operation: |
       b_shifted = ShiftReg(b, st, sb)
       (result, flags_out) = AddWithCarry(a, b_shifted, FLAGS[flag_group].C)

       WDR[d] = result
       FLAGS[flag_group] = flags_out
     encoding:
       scheme: bnaf
       mapping:
         fg: flag_group
         shift_type: shift_type
         shift_bytes: shift_bytes
         wrs2: wrs2
         wrs1: wrs1
         funct3: b010
         wrd: wrd

   - mnemonic: bn.addi
     group: bignum
     synopsis: Add Immediate
     operands:
       - name: wrd
         doc: Name of the destination WDR
       - name: wrs
         doc: Name of the source WDR
       - name: imm
         doc: Immediate value
       - *bn-flag-group-operand
     syntax: |
       <wrd>, <wrs>, <imm> [, FG<flag_group>]
     doc: |
       Adds a zero-extended immediate to the value of a WDR, writes the result
       to the destination WDR, and updates the flags.
     decode: |
       d = UInt(wrd)
       a = UInt(wrs1)

       fg = DecodeFlagGroup(flag_group)
       i = ZeroExtend(imm, WLEN)
     operation: |
       (result, flags_out) = AddWithCarry(a, i, "0")

       WDR[d] = result
       FLAGS[flag_group] = flags_out
     encoding:
       scheme: bnai
       mapping:
         fg: flag_group
         sub: b0
         imm: imm
         wrs: wrs
         funct3: b100
         wrd: wrd

   - mnemonic: bn.addm
     group: bignum
     synopsis: Pseudo-Modulo Add
     operands: [wrd, wrs1, wrs2]
     doc: |
       Adds two WDR values, subtracts the value of the MOD WSR once if
       the result is equal or larger than MOD, and writes the result to
       the destination WDR. This operation is a modulo addition if the
       sum of the two input registers is smaller than twice the value
       of the MOD WSR. Flags are not used or saved.
     decode: |
       d = UInt(wrd)
       a = UInt(wrs1)
       b = UInt(wrs2)
     operation: |
       (result, ) = AddWithCarry(a, b, "0")

       if result >= MOD:
         result = result - MOD

       WDR[d] = result
     encoding:
       scheme: bnam
       mapping:
         sub: b0
         wrs2: wrs2
         wrs1: wrs1
         funct3: b101
         wrd: wrd

   - mnemonic: bn.mulqacc
     group: bignum
     synopsis: Quarter-word Multiply and Accumulate
     operands:
       - &mulqacc-zero-acc
         name: zero_acc
         type: option(.Z)
         doc: Zero the accumulator before accumulating the multiply result.
       - &mulqacc-wrs1
         name: wrs1
         doc: First source WDR
       - &mulqacc-wrs1-qwsel
         name: wrs1_qwsel
         type: imm2
         doc: |
           Quarter-word select for `<wrs1>`.

           Valid values:
           - `0`: Select `wrs1[WLEN/4-1:0]` (least significant quarter-word)
           - `1`: Select `wrs1[WLEN/2:WLEN/4]`
           - `2`: Select `wrs1[WLEN/4*3-1:WLEN/2]`
           - `3`: Select `wrs1[WLEN-1:WLEN/4*3]` (most significant quarter-word)
       - &mulqacc-wrs2
         name: wrs2
         doc: Second source WDR
       - &mulqacc-wrs2-qwsel
         name: wrs2_qwsel
         type: imm2
         doc: |
           Quarter-word select for `<wrs2>`.

           Valid values:
           - `0`: Select `wrs1[WLEN/4-1:0]` (least significant quarter-word)
           - `1`: Select `wrs1[WLEN/2:WLEN/4]`
           - `2`: Select `wrs1[WLEN/4*3-1:WLEN/2]`
           - `3`: Select `wrs1[WLEN-1:WLEN/4*3]` (most significant quarter-word)
       - &mulqacc-acc-shift-imm
         name: acc_shift_imm
         type: imm2
         doc: |
           How many quarter-words (`WLEN/4` bits) to shift the `WLEN/2`-bit multiply result before accumulating.
     syntax: |
       [<zero_acc>] <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>
     glued-ops: true
     doc: |
       Multiplies two `WLEN/4` WDR values, shifts the product by `<acc_shift_imm>` and adds the result to the accumulator.

       For versions of the instruction with writeback, see `BN.MULQACC.WO` and `BN.MULQACC.SO`.
     decode: |
       writeback_variant = None
       zero_accumulator = DecodeMulqaccZeroacc(zero_acc)

       d = None
       a = UInt(wrs1)
       b = UInt(wrs2)

       d_hwsel = None
       a_qwsel = DecodeQuarterWordSelect(wrs1_qwsel)
       b_qwsel = DecodeQuarterWordSelect(wrs2_qwsel)
     operation: &mulqacc-operation |
       a_qw = GetQuarterWord(a, a_qwsel)
       b_qw = GetQuarterWord(b, b_qwsel)

       mul_res = a_qw * b_qw

       if zero_accumulator:
         ACC = 0

       ACC = ACC + (mul_res << (acc_shift_imm * WLEN / 4))

       if writeback_variant == 'shiftout':
         if d_hwsel == 'L':
           WDR[d][WLEN/2-1:0] = ACC[WLEN/2-1:0]
         elif d_hwsel == 'U':
           WDR[d][WLEN-1:WLEN/2] = ACC[WLEN/2-1:0]
         ACC = ACC >> (WLEN/2)

       elif writeback_variant == 'writeout':
         WDR[d] = ACC
     encoding:
       scheme: bnaq
       mapping:
         wb: b00
         dh: bx
         qs2: wrs2_qwsel
         qs1: wrs1_qwsel
         wrs2: wrs2
         wrs1: wrs1
         acc: acc_shift_imm
         z: zero_acc
         wrd: bxxxxx

   - mnemonic: bn.mulqacc.wo
     group: bignum
     synopsis: Quarter-word Multiply and Accumulate with half-word writeback
     operands:
       - *mulqacc-zero-acc
       - &mulqacc-wrd
         name: wrd
         doc: Destination WDR.
       - *mulqacc-wrs1
       - *mulqacc-wrs1-qwsel
       - *mulqacc-wrs2
       - *mulqacc-wrs2-qwsel
       - *mulqacc-acc-shift-imm
     syntax: |
       [<zero_acc>] <wrd>, <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>
     glued-ops: true
     doc: |
       Multiplies two `WLEN/4` WDR values, shifts the product by `<acc_shift_imm>` and adds the result to the accumulator.
       Writes the resulting accumulator to `<wrd>`.
     decode: |
       writeback_variant = 'writeout'
       zero_accumulator = DecodeMulqaccZeroacc(zero_acc)

       d = UInt(wrd)
       a = UInt(wrs1)
       b = UInt(wrs2)

       d_hwsel = None
       a_qwsel = DecodeQuarterWordSelect(wrs1_qwsel)
       b_qwsel = DecodeQuarterWordSelect(wrs2_qwsel)
     operation: *mulqacc-operation
     encoding:
       scheme: bnaq
       mapping:
         wb: b01
         dh: bx
         qs2: wrs2_qwsel
         qs1: wrs1_qwsel
         wrs2: wrs2
         wrs1: wrs1
         acc: acc_shift_imm
         z: zero_acc
         wrd: wrd

   - mnemonic: bn.mulqacc.so
     group: bignum
     synopsis: Quarter-word Multiply and Accumulate with half-word writeback
     operands:
       - *mulqacc-zero-acc
       - *mulqacc-wrd
       - name: wrd_hwsel
         type: enum(L,U)
         doc: |
           Half-word select for `<wrd>`.
           A value of `L` means the less significant half-word; `U` means the more significant half-word.
       - *mulqacc-wrs1
       - *mulqacc-wrs1-qwsel
       - *mulqacc-wrs2
       - *mulqacc-wrs2-qwsel
       - *mulqacc-acc-shift-imm
     syntax: |
       [<zero_acc>] <wrd>.<wrd_hwsel>,
       <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>
     glued-ops: true
     doc: |
       Multiplies two `WLEN/4` WDR values, shifts the product by `<acc_shift_imm>` and adds the result to the accumulator.
       Next, shifts the resulting accumulator right by half a word.
       The bits that are shifted out are written to a half-word of `<wrd>`, selected with `<wrd_hwsel>`.

     decode: |
       writeback_variant = 'shiftout'
       zero_accumulator = DecodeMulqaccZeroacc(zero_acc)

       d = UInt(wrd)
       a = UInt(wrs1)
       b = UInt(wrs2)

       d_hwsel = DecodeHalfWordSelect(wrd_hwsel)
       a_qwsel = DecodeQuarterWordSelect(wrs1_qwsel)
       b_qwsel = DecodeQuarterWordSelect(wrs2_qwsel)
     operation: *mulqacc-operation
     encoding:
       scheme: bnaq
       mapping:
         wb: b1x
         dh: wrd_hwsel
         qs2: wrs2_qwsel
         qs1: wrs1_qwsel
         wrs2: wrs2
         wrs1: wrs1
         acc: acc_shift_imm
         z: zero_acc
         wrd: wrd

   - mnemonic: bn.sub
     group: bignum
     synopsis: Subtraction
     operands: &bn-sub-operands
       - name: wrd
         doc: Name of the destination WDR
       - name: wrs1
         doc: Name of the first source WDR
       - name: wrs2
         doc: Name of the second source WDR
       - *bn-shift-type-operand
       - *bn-shift-bytes-operand
       - *bn-flag-group-operand
     syntax: *bn-add-syntax
     doc: |
       Subtracts the second WDR value from the first one, writes the result to the destination WDR and updates flags.
       The content of the second source WDR can be shifted by an immediate before it is consumed by the operation.
     decode: &bn-sub-decode |
       d = UInt(wrd)
       a = UInt(wrs1)
       b = UInt(wrs2)

       fg = DecodeFlagGroup(flag_group)
       sb = UInt(shift_bytes)
       st = DecodeShiftType(shift_type)
     operation: |
       b_shifted = ShiftReg(b, st, sb)
       (result, flags_out) = AddWithCarry(a, -b_shifted, "0")

       WDR[d] = result
       FLAGS[flag_group] = flags_out
     encoding:
       scheme: bnaf
       mapping:
         fg: flag_group
         shift_type: shift_type
         shift_bytes: shift_bytes
         wrs2: wrs2
         wrs1: wrs1
         funct3: b001
         wrd: wrd

   - mnemonic: bn.subb
     group: bignum
     synopsis: Subtract with borrow
     operands: *bn-sub-operands
     syntax: *bn-add-syntax
     doc: |
       Subtracts the second WDR value and the Carry from the first one, writes the result to the destination WDR, and updates the flags.
       The content of the second source WDR can be shifted by an immediate before it is consumed by the operation.
     decode: *bn-sub-decode
     operation: |
       b_shifted = ShiftReg(b, st, sb)
       (result, flags_out) = AddWithCarry(a, -b_shifted, ~FLAGS[flag_group].C)

       WDR[d] = result
       FLAGS[flag_group] = flags_out
     encoding:
       scheme: bnaf
       mapping:
         fg: flag_group
         shift_type: shift_type
         shift_bytes: shift_bytes
         wrs2: wrs2
         wrs1: wrs1
         funct3: b011
         wrd: wrd

   - mnemonic: bn.subi
     group: bignum
     synopsis: Subtract Immediate
     operands:
       - name: wrd
         doc: Name of the destination WDR
       - name: wrs
         doc: Name of the source WDR
       - name: imm
         doc: Immediate value
       - *bn-flag-group-operand
     syntax: <wrd>, <wrs>, <imm> [, FG<flag_group>]
     doc: |
       Subtracts a zero-extended immediate from the value of a WDR, writes the result to the destination WDR, and updates the flags.
     decode: |
       d = UInt(wrd)
       a = UInt(wrs1)

       fg = DecodeFlagGroup(flag_group)
       i = ZeroExtend(imm, WLEN)
     operation: |
       (result, flags_out) = AddWithCarry(a, -i, "0")

       WDR[d] = result
       FLAGS[flag_group] = flags_out
     encoding:
       scheme: bnai
       mapping:
         fg: flag_group
         sub: b1
         imm: imm
         wrs: wrs
         funct3: b100
         wrd: wrd

   - mnemonic: bn.subm
     group: bignum
     synopsis: Pseudo-modulo subtraction
     operands: [wrd, wrs1, wrs2]
     doc: |
       Subtracts the second WDR value from the first WDR value, performs a modulo operation with the MOD WSR, and writes the result to the destination WDR.
       This operation is equivalent to a modulo subtraction as long as `wrs1 - wrs2 >= -MOD` holds.
       This constraint is not checked in hardware.
       Flags are not used or saved.
     decode: |
       d = UInt(wrd)
       a = UInt(wrs1)
       b = UInt(wrs2)
     operation: |
       (result, ) = AddWithCarry(a, -b, "0")

       if result >= MOD:
         result = result - MOD

       WDR[d] = result
     encoding:
       scheme: bnam
       mapping:
         sub: b1
         wrs2: wrs2
         wrs1: wrs1
         funct3: b101
         wrd: wrd

   - mnemonic: bn.and
     group: bignum
     synopsis: Bitwise AND
     operands: &bn-and-operands
       - name: wrd
         doc: Name of the destination WDR
       - name: wrs1
         doc: Name of the first source WDR
       - name: wrs2
         doc: Name of the second source WDR
       - *bn-shift-type-operand
       - *bn-shift-bytes-operand
     syntax: &bn-and-syntax |
       <wrd>, <wrs1>, <wrs2> [, <shift_type> <shift_bytes>B]
     doc: |
       Performs a bitwise and operation.
       Takes the values stored in registers referenced by `wrs1` and `wrs2` and stores the result in the register referenced by `wrd`.
       The content of the second source register can be shifted by an immediate before it is consumed by the operation.
     decode: &bn-and-decode |
       d = UInt(wrd)
       a = UInt(wrs1)
       b = UInt(wrs2)

       sb = UInt(shift_bytes)
       st = DecodeShiftType(shift_type)
     operation: |
       b_shifted = ShiftReg(b, st, sb)
       result = a & b_shifted

       WDR[d] = result
     encoding:
       scheme: bna
       mapping:
         funct31: b0
         shift_type: shift_type
         shift_bytes: shift_bytes
         wrs2: wrs2
         wrs1: wrs1
         funct3: b110
         wrd: wrd

   - mnemonic: bn.or
     group: bignum
     synopsis: Bitwise OR
     operands: *bn-and-operands
     syntax: *bn-and-syntax
     doc: |
       Performs a bitwise or operation.
       Takes the values stored in WDRs referenced by `wrs1` and `wrs2` and stores the result in the WDR referenced by `wrd`.
       The content of the second source WDR can be shifted by an immediate before it is consumed by the operation.
     decode: *bn-and-decode
     operation: |
       b_shifted = ShiftReg(b, st, sb)
       result = a | b_shifted

       WDR[d] = result
     encoding:
       scheme: bna
       mapping:
         funct31: b1
         shift_type: shift_type
         shift_bytes: shift_bytes
         wrs2: wrs2
         wrs1: wrs1
         funct3: b110
         wrd: wrd

   - mnemonic: bn.not
     group: bignum
     synopsis: Bitwise NOT
     operands:
       - name: wrd
         doc: Name of the destination WDR
       - name: wrs
         doc: Name of the source WDR
       - *bn-shift-type-operand
       - *bn-shift-bytes-operand
     syntax: |
       <wrd>, <wrs> [, <shift_type> <shift_bytes>B]
     doc: |
       Negates the value in `<wrs>`, storing the result into `<wrd>`.
       The source value can be shifted by an immediate before it is consumed by the operation.
     decode: |
       d = UInt(wrd)
       a = UInt(wrs1)

       sb = UInt(shift_bytes)
       st = DecodeShiftType(shift_type)
     operation: |
       a_shifted = ShiftReg(a, st, sb)
       result = ~a_shifted

       WDR[d] = result
     encoding:
       scheme: bna
       mapping:
         funct31: b0
         shift_type: shift_type
         shift_bytes: shift_bytes
         wrs2: wrs
         wrs1: bxxxxx
         funct3: b111
         wrd: wrd

   - mnemonic: bn.xor
     group: bignum
     synopsis: Bitwise XOR
     operands: *bn-and-operands
     syntax: *bn-and-syntax
     doc: |
       Performs a bitwise xor operation.
       Takes the values stored in WDRs referenced by `wrs1` and `wrs2` and stores the result in the WDR referenced by `wrd`.
       The content of the second source WDR can be shifted by an immediate before it is consumed by the operation.
     decode: *bn-and-decode
     operation: |
       b_shifted = ShiftReg(b, st, sb)
       result = a ^ b_shifted

       WDR[d] = result
     encoding:
       scheme: bnaf
       mapping:
         fg: b1
         shift_type: shift_type
         shift_bytes: shift_bytes
         wrs2: wrs2
         wrs1: wrs1
         funct3: b111
         wrd: wrd

   - mnemonic: bn.rshi
     group: bignum
     synopsis: Concatenate and right shift immediate
     operands:
       - name: wrd
         doc: Name of the destination WDR
       - name: wrs1
         doc: Name of the first source WDR
       - name: wrs2
         doc: Name of the second source WDR
       - name: imm
         doc: |
           Number of bits to shift the second source register by. Valid range: 0..(WLEN-1).
     syntax: |
       <wrd>, <wrs1>, <wrs2> >> <imm>
     doc: |
       The concatenation of the content from the WDRs referenced by `wrs1` and `wrs2` (`wrs1` forms the upper part) is right shifted by an immediate value and truncated to WLEN bit.
       The result is stored in the WDR referenced by `wrd`.
     decode: |
       d = UInt(wrd)
       a = UInt(wrs1)
       b = UInt(wrs2)
       shift_bit = Uint(imm)
     operation: |
       WDR[d] = (((a << WLEN) | b) >> shift_bit)[WLEN-1:0]
     encoding:
       scheme: bnr
       mapping:
         imm: imm
         wrs2: wrs2
         wrs1: wrs1
         funct2: b11
         wrd: wrd

   - mnemonic: bn.sel
     group: bignum
     synopsis: Flag Select
     operands:
       - name: wrd
         doc: Name of the destination WDR
       - name: wrs1
         doc: Name of the first source WDR
       - name: wrs2
         doc: Name of the second source WDR
       - *bn-flag-group-operand
       - name: flag
         type: enum(C, M, L, Z)
         doc: |
           Flag to check. Valid values:
           - C: Carry flag
           - M: MSB flag
           - L: LSB flag
           - Z: Zero flag
     syntax: |
       <wrd>, <wrs1>, <wrs2>, [FG<flag_group>.]<flag>
     doc: |
       Returns in the destination WDR the value of the first source WDR if the flag in the chosen flag group is set, otherwise returns the value of the second source WDR.
     decode: |
       d = UInt(wrd)
       a = UInt(wrs1)
       b = UInt(wrs2)
       fg = DecodeFlagGroup(flag_group)
       flag = DecodeFlag(flag)
     operation: |
       flag_is_set = FLAGS[fg].get(flag)

       WDR[d] = wrs1 if flag_is_set else wrs2
     encoding:
       scheme: bns
       mapping:
         fg: flag_group
         flag: flag
         wrs2: wrs2
         wrs1: wrs1
         wrd: wrd

   - mnemonic: bn.cmp
     group: bignum
     synopsis: Compare
     operands: &bn-cmp-operands
       - name: wrs1
         doc: Name of the first source WDR
       - name: wrs2
         doc: Name of the second source WDR
       - *bn-shift-type-operand
       - *bn-shift-bytes-operand
       - *bn-flag-group-operand
     syntax: &bn-cmp-syntax |
       <wrs1>, <wrs2>[, <shift_type> <shift_bytes>B][, FG<flag_group>]
     doc: |
       Subtracts the second WDR value from the first one and updates flags.
       This instruction is identical to BN.SUB, except that no result register is written.
     decode: &bn-cmp-decode |
       a = UInt(wrs1)
       b = UInt(wrs2)

       fg = DecodeFlagGroup(flag_group)
       sb = UInt(shift_bytes)
       st = DecodeShiftType(shift_type)
     operation: |
       b_shifted = ShiftReg(b, st, sb)
       (, flags_out) = AddWithCarry(a, -b_shifted, "0")

       FLAGS[flag_group] = flags_out
     encoding:
       scheme: bnc
       mapping:
         fg: flag_group
         shift_type: shift_type
         shift_bytes: shift_bytes
         wrs2: wrs2
         wrs1: wrs1
         funct3: b001

   - mnemonic: bn.cmpb
     group: bignum
     synopsis: Compare with Borrow
     operands: *bn-cmp-operands
     syntax: *bn-cmp-syntax
     doc: |
       Subtracts the second WDR value from the first one and updates flags.
       This instruction is identical to BN.SUBB, except that no result register is written.
     decode: *bn-cmp-decode
     operation: |
       (, flags_out) = AddWithCarry(a, -b, ~FLAGS[flag_group].C)

       FLAGS[flag_group] = flags_out
     encoding:
       scheme: bnc
       mapping:
         fg: flag_group
         shift_type: shift_type
         shift_bytes: shift_bytes
         wrs2: wrs2
         wrs1: wrs1
         funct3: b011

   - mnemonic: bn.lid
     group: bignum
     synopsis: Load Word (indirect source, indirect destination)
     operands:
       - name: grd
         doc: Name of the GPR referencing the destination WDR
       - name: grs1
         doc: |
           Name of the GPR containing the memory byte address.
           The value contained in the referenced GPR must be WLEN-aligned.
       - name: offset
         doc: |
           Offset value.
           Must be WLEN-aligned.
       - name: grs1_inc
         type: option(++)
         doc: |
           Increment the value in `<grs1>` by WLEN/8 (one word).
           Cannot be specified together with `grd_inc`.
       - name: grd_inc
         type: option(++)
         doc: |
           Increment the value in `<grd>` by one.
           Cannot be specified together with `grs1_inc`.
     syntax: |
       <grd>[<grd_inc>], <offset>(<grs1>[<grs1_inc>])
     doc: |
       Calculates a byte memory address by adding the offset to the value in the GPR `grs1`.
       The value from this memory address is then copied into the WDR pointed to by the value in GPR `grd`.

       After the operation, either the value in the GPR `grs1`, or the value in `grd` can be optionally incremented.

       - If `grs1_inc` is set, the value in `grs1` is incremented by the value WLEN/8 (one word).
       - If `grd_inc` is set, the value in `grd` is incremented by the value 1.

       Only aligned (to WLEN sized regions) accesses are supported.
       Any unaligned access will result in an error.

       TODO: how to handle overflows?
     decode: |
       rd = UInt(grd)
       rs1 = UInt(grs1)
       offset = UInt(offset)
     operation: |
       mem_addr = GPR[rs1] + offset
       wdr_dest = GPR[rd]

       if grs1_inc and grd_inc:
           raise Unsupported() # prevented in encoding
       if mem_addr % (WLEN / 8):
           raise Unaligned()

       mem_index = mem_addr // (WLEN / 8)

       WDR[wdr_dest] = LoadWlenWordFromMemory(mem_index)

       if grs1_inc:
           GPR[rs1] = GPR[rs1] + (WLEN / 8)
       if grd_inc:
           GPR[rd] = GPR[rd] + 1
     encoding:
       scheme: bnxid
       mapping:
         imm: offset
         spp: grs1_inc
         dpp: grd_inc
         rs: grs1
         funct3: b100
         rd: grd

   - mnemonic: bn.sid
     group: bignum
     synopsis: Store Word (indirect source, indirect destination)
     operands:
       - name: grs1
         doc: |
           Name of the GPR containing the memory byte address.
           The value contained in the referenced GPR must be WLEN-aligned.
       - name: grs2
         doc: Name of the GPR referencing the source WDR.
       - name: offset
         doc: |
           Offset value.
           Must be WLEN-aligned.
       - name: grs1_inc
         type: option(++)
         doc: |
           Increment the value in `<grs1>` by WLEN/8 (one word).
           Cannot be specified together with `grs2_inc`.
       - name: grs2_inc
         type: option(++)
         doc: |
           Increment the value in `<grs2>` by one.
           Cannot be specified together with `grs1_inc`.
     syntax: |
       <grs1>[<grs1_inc>], <offset>(<grs2>[<grs2_inc>])
     doc: |
       Calculates a byte memory address by adding the offset to the value in the GPR `grs1`.
       The value from the WDR pointed to by `grs2` is then copied into the memory.

       After the operation, either the value in the GPR `grs1`, or the value in `grs2` can be optionally incremented.

       - If `grs1_inc` is set, the value in `grs1` is incremented by the value WLEN/8 (one word).
       - If `grs2_inc` is set, the value in `grs2` is incremented by the value 1.

       Only aligned (to WLEN sized regions) accesses are supported.
       Any unaligned access will result in an error.
     decode: |
       rs1 = UInt(grs1)
       rs2 = UInt(grs2)
       offset = UInt(offset)
     operation: |
       mem_addr = GPR[rs1] + offset
       wdr_src = GPR[rs2]

       if grs1_inc and grs2_inc:
           raise Unsupported() # prevented in encoding
       if mem_addr % (WLEN / 8):
           raise Unaligned()

       mem_index = mem_addr // (WLEN / 8)

       StoreWlenWordToMemory(mem_index, WDR[wdr_src])

       if grs1_inc:
           GPR[rs1] = GPR[rs1] + (WLEN / 8)
       if grs2_inc:
           GPR[rs2] = GPR[rs2] + 1
     encoding:
       scheme: bnxid
       mapping:
         imm: offset
         spp: grs1_inc
         dpp: grs2_inc
         rs: grs1
         funct3: b101
         rd: grs2

   - mnemonic: bn.mov
     group: bignum
     synopsis: Copy content between WDRs (direct addressing)
     operands: [wrd, wrs]
     decode: |
       s = UInt(wrs)
       d = UInt(wrd)
     operation: WDR[d] = WDR[s]
     encoding:
       scheme: bnmov
       mapping:
         indirect: b0
         spp: bx
         dpp: bx
         src: wrs
         dst: wrd

   - mnemonic: bn.movr
     group: bignum
     synopsis: Copy content between WDRs (register-indirect addressing)
     operands:
       - name: grd
         doc: Name of the GPR containing the destination WDR.
       - name: grs
         doc: Name of the GPR referencing the source WDR.
       - name: grd_inc
         type: option(++)
         doc: |
           Increment the value in `<grd>` by one.
           Cannot be specified together with `grs_inc`.
       - name: grs_inc
         type: option(++)
         doc: |
           Increment the value in `<grs>` by one.
           Cannot be specified together with `grd_inc`.
     syntax: |
       <grd>[<grd_inc>], <grs>[<grs_inc>]
     doc: |
       Copy WDR contents between registers with indirect addressing.
       Optionally, either the source or the destination register address can be incremented by 1.
     decode: |
       s = UInt(grs)
       d = UInt(grd)
     operation: |
       WDR[GPR[d]] = WDR[GPR[s]]

       if grs_inc:
         GPR[s] = GPR[s] + 1
       if grd_inc:
         GPR[d] = GPR[d] + 1
     encoding:
       scheme: bnmov
       mapping:
         indirect: b1
         spp: grs_inc
         dpp: grd_inc
         src: grs
         dst: grd

   - mnemonic: bn.wsrrs
     group: bignum
     synopsis: Atomic Read and Set Bits in WSR
     operands: [wrd, wsr, wrs]
     encoding:
       scheme: wcsr
       mapping:
         write: b0
         wcsr: wsr
         wrs: wrs
         wrd: wrd

   - mnemonic: bn.wsrrw
     group: bignum
     synopsis: Atomic Read/Write WSR
     operands: [wrd, wsr, wrs]
     encoding:
       scheme: wcsr
       mapping:
         write: b1
         wcsr: wsr
         wrs: wrs
         wrd: wrd